apollo-crawler 0.1.30 → 0.1.31

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4f7597400d1ab12d2ab2d1d52330ca29e4f2192f
4
- data.tar.gz: 297216744ee4ea8a9af6595b1361dd3bfcf8d0b1
3
+ metadata.gz: c51a69932406b288595a4063140629cf633bc893
4
+ data.tar.gz: bb1ad85084a5b0c3188301e2339ae1656c8807c5
5
5
  SHA512:
6
- metadata.gz: 71bf482ad465efa67976547c5e0a99d8a728478c5c346f987e675da8574b45e4dba44375fd501e8699c083e42eaf7e19ac1db6d2d9962f9736d408c52f896844
7
- data.tar.gz: 01651a1aa4d3408ce63142a46c6e3727ceee35995520cfcc2df4b6451495b09cf5cebe1cc1244dca6ebef2138bb06b6ede97ac1720b75b8de866b3f381c2e25f
6
+ metadata.gz: 6bf291b011220dc67e0f5bcaad0f9f3744e0cf607638372a441fb9f63752e6b65b82b384787ea3d506862c40a10ea3799e370e4b93e0282605aa6e2af602cdec
7
+ data.tar.gz: 48ea2fe6c144b771f72bddfa9012d227aa8f349fd04fea3378f2590cdd6ba30ef99ddf186dd199e450d94d6ee0b15dbb0cdff94cdc77c6fe849db3ef7c622f90
@@ -24,9 +24,6 @@
24
24
  # Config First
25
25
  require File.join(File.dirname(__FILE__), 'apollo_crawler/env')
26
26
 
27
- # Adapters
28
- require File.join(File.dirname(__FILE__), 'apollo_crawler/adapter/adapters')
29
-
30
27
  # Agents
31
28
  require File.join(File.dirname(__FILE__), 'apollo_crawler/agent/agents')
32
29
 
@@ -21,9 +21,6 @@
21
21
  # Environment first
22
22
  require File.join(File.dirname(__FILE__), 'env')
23
23
 
24
- # Adapters
25
- require File.join(File.dirname(__FILE__), 'adapter/adapters')
26
-
27
24
  # Agents
28
25
  require File.join(File.dirname(__FILE__), 'agent/agents')
29
26
 
@@ -18,9 +18,22 @@
18
18
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
19
  # THE SOFTWARE.
20
20
 
21
+ require File.join(File.dirname(__FILE__), 'base_model')
22
+
21
23
  module Apollo
22
- module Adapter
23
- class AmqpAdapter
24
- end # class AmqpAdapter
25
- end # module Adapter
24
+ module Model
25
+ class DataChunk < BaseModel
26
+ include Mongoid::Document
27
+ include Mongoid::Timestamps
28
+
29
+ store_in collection: "data_chunks"
30
+
31
+ field :data
32
+ field :crawler
33
+ field :data_hash
34
+
35
+ # Indexes
36
+ index({ created_at: 1, updated_at: 1, crawler: 1, data_hash: 1})
37
+ end # class DataSource
38
+ end # module Model
26
39
  end # module Apollo
@@ -20,7 +20,9 @@
20
20
 
21
21
  require File.join(File.dirname(__FILE__), 'base_model')
22
22
  require File.join(File.dirname(__FILE__), 'crawler')
23
+ require File.join(File.dirname(__FILE__), 'data_chunk')
23
24
  require File.join(File.dirname(__FILE__), 'data_source')
24
25
  require File.join(File.dirname(__FILE__), 'domain')
25
26
  require File.join(File.dirname(__FILE__), 'queued_url')
26
27
  require File.join(File.dirname(__FILE__), 'raw_document')
28
+ require File.join(File.dirname(__FILE__), 'user')
@@ -18,9 +18,24 @@
18
18
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
19
  # THE SOFTWARE.
20
20
 
21
+ require File.join(File.dirname(__FILE__), 'base_model')
22
+
21
23
  module Apollo
22
- module Adapter
23
- class MongoAdapter
24
- end # class AmqpAdapter
25
- end # module MongoAdapter
26
- end # module Apollo
24
+ module Model
25
+ class User < BaseModel
26
+ include Mongoid::Document
27
+ include Mongoid::Timestamps
28
+
29
+ store_in collection: "users"
30
+
31
+ field :nick
32
+ field :email
33
+
34
+ # Indexes
35
+ index({ created_at: 1, updated_at: 1})
36
+ index({ nick: 1 }, { unique: true, background: true })
37
+ index({ email: 1 }, { unique: true, background: true })
38
+
39
+ end # class User
40
+ end # module Model
41
+ end # module Apollo
@@ -47,6 +47,10 @@ module Apollo
47
47
  # Bindings
48
48
  declarations[:queues]["planner.fetched.queue"].bind(declarations[:exchanges]["planner.fetched"]).subscribe do |delivery_info, metadata, payload|
49
49
  msg = JSON.parse(payload)
50
+ puts "#{msg.inspect}" if opts[:verbose]
51
+
52
+ puts "REQ: #{msg['request']}" if opts[:verbose]
53
+ puts "RESP: #{msg['response']}" if opts[:verbose]
50
54
 
51
55
  request = msg['request']
52
56
  response = msg['response']
@@ -98,6 +102,9 @@ module Apollo
98
102
  links = msg['links']
99
103
  links = [] if links.nil?
100
104
 
105
+ data_hash = Digest::SHA256.new.update(data).hexdigest
106
+ puts "#{data_hash}"
107
+
101
108
  links.each do |url|
102
109
  link = url['link']
103
110
 
@@ -19,5 +19,5 @@
19
19
  # THE SOFTWARE.
20
20
 
21
21
  module Apollo
22
- VERSION = '0.1.30'
22
+ VERSION = '0.1.31'
23
23
  end # Apollo
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: apollo-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.30
4
+ version: 0.1.31
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tomas Korcak
@@ -410,9 +410,6 @@ files:
410
410
  - ./config/mongoid.yml
411
411
  - ./config/mongoid.yml.default
412
412
  - ./lib/apollo_crawler.rb
413
- - ./lib/apollo_crawler/adapter/adapters.rb
414
- - ./lib/apollo_crawler/adapter/amqp_adapter.rb
415
- - ./lib/apollo_crawler/adapter/mongo_adapter.rb
416
413
  - ./lib/apollo_crawler/agent/agents.rb
417
414
  - ./lib/apollo_crawler/agent/base_agent.rb
418
415
  - ./lib/apollo_crawler/agent/crawler_agent.rb
@@ -457,11 +454,13 @@ files:
457
454
  - ./lib/apollo_crawler/logger/loggers.rb
458
455
  - ./lib/apollo_crawler/model/base_model.rb
459
456
  - ./lib/apollo_crawler/model/crawler.rb
457
+ - ./lib/apollo_crawler/model/data_chunk.rb
460
458
  - ./lib/apollo_crawler/model/data_source.rb
461
459
  - ./lib/apollo_crawler/model/domain.rb
462
460
  - ./lib/apollo_crawler/model/models.rb
463
461
  - ./lib/apollo_crawler/model/queued_url.rb
464
462
  - ./lib/apollo_crawler/model/raw_document.rb
463
+ - ./lib/apollo_crawler/model/user.rb
465
464
  - ./lib/apollo_crawler/planner/base_planner.rb
466
465
  - ./lib/apollo_crawler/planner/planners.rb
467
466
  - ./lib/apollo_crawler/planner/smart_planner.rb
@@ -1,22 +0,0 @@
1
- # Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
2
- #
3
- # Permission is hereby granted, free of charge, to any person obtaining a copy
4
- # of this software and associated documentation files (the "Software"), to deal
5
- # in the Software without restriction, including without limitation the rights
6
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
- # copies of the Software, and to permit persons to whom the Software is
8
- # furnished to do so, subject to the following conditions:
9
- #
10
- # The above copyright notice and this permission notice shall be included in
11
- # all copies or substantial portions of the Software.
12
- #
13
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
- # THE SOFTWARE.
20
-
21
- require File.join(File.dirname(__FILE__), 'amqp_adapter')
22
- require File.join(File.dirname(__FILE__), 'mongo_adapter')