apollo-crawler 0.1.27 → 0.1.28
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 22e840b1ad9ecabcb011c4b02383c060d325a61f
|
4
|
+
data.tar.gz: faf47f1ae1b7451262037d7c3e28a83bb05a2678
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f916c2da81657168389ddf998407d338bbdafd323dfb5b7f0e3b4dc9ef2e9d001d00974fee48d2918a12762a90e5172146820553b09d798720f33c319db052a7
|
7
|
+
data.tar.gz: a820bf980c655156eefcad5487a79805f121f7e59d2e95285415d0229583b3df4440a4c0e8021186fc963837239cd0b1a9c1e8ca18718dc5db67bc54ff6ce9a7
|
@@ -90,6 +90,8 @@ module Apollo
|
|
90
90
|
declarations[:queues]["planner.crawled.queue"].bind(declarations[:exchanges]["planner.crawled"]).subscribe do |delivery_info, metadata, payload|
|
91
91
|
msg = JSON.parse(payload)
|
92
92
|
|
93
|
+
puts "Crawled - msg.inspect"
|
94
|
+
|
93
95
|
request = msg['request']
|
94
96
|
response = msg['response']
|
95
97
|
data = msg['data']
|
@@ -122,7 +124,8 @@ module Apollo
|
|
122
124
|
|
123
125
|
def get_next_url(opts={})
|
124
126
|
tmp = Apollo::Model::QueuedUrl.where({:state => :queued}).order_by(:created_at.asc)
|
125
|
-
tmp.find_and_modify({ "$set" => { state: :fetching }}, new: true)
|
127
|
+
res = tmp.find_and_modify({ "$set" => { state: :fetching }}, new: true)
|
128
|
+
return res
|
126
129
|
end
|
127
130
|
|
128
131
|
def fetch_queued_urls(opts={})
|
@@ -26,7 +26,9 @@ module Apollo
|
|
26
26
|
def self.schedule(url, crawler=nil, opts={})
|
27
27
|
queued_url = Apollo::Model::QueuedUrl.where(:url => url).first
|
28
28
|
|
29
|
-
|
29
|
+
if queued_url.nil? == false
|
30
|
+
return queued_url
|
31
|
+
end
|
30
32
|
|
31
33
|
res = Apollo::Model::QueuedUrl.new(:url => url, :state => :queued, :crawler_name => crawler.to_s)
|
32
34
|
res.save
|