apollo-crawler 0.1.25 → 0.1.26

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 79e9ecdfed577a1ce13b74b24d6d5bc26bf75843
4
- data.tar.gz: 6d93c6da6316d4666ddc5e434bab1caadc213ba3
3
+ metadata.gz: 5e6fa7213e0e7f81364c5dbda6a8f53def1fda6e
4
+ data.tar.gz: ecd79cd04f4a4331124b3910ef5e85da5f590692
5
5
  SHA512:
6
- metadata.gz: 863d10a255722bd53c9ee998e2886fd86d04cf7808284323d33f7da1fe77fc99ac0874f1e2a61f20c8656e039bc3bac2d2a9cef911e9b6e6d12266e91636b3bc
7
- data.tar.gz: db77a2d4606dcecbec1ae2e0d872cc41f6fea64012280e179b1147d8bbabfe7eef5446bf364978ee930f338cb367624e30a86e11fe0613e4499e03dcbc670e4b
6
+ metadata.gz: ae28d3adaca4125abdeee5ffbf73615a1efc6388e97882b67359846e81b6b58acca74fc08f59034c13f132892cca92b324876c20ab6680222aefc08b5e0d9ea1
7
+ data.tar.gz: f452e71e138696125affbaa3ae646775857bf7b99a06a0656a85e08881e66fdbe357cd31eac4bc5f3030aadbf0075b4f602f280c669d65b139b14bc20ab974ec
@@ -52,7 +52,7 @@ module Apollo
52
52
 
53
53
  doc = Apollo::Model::QueuedUrl.find(request["_id"])
54
54
  doc.update_attributes(msg['request'])
55
- doc.state = "fetched"
55
+ doc.state = :fetched
56
56
  doc.save
57
57
 
58
58
  doc = Apollo::Model::RawDocument.where(:url => request['url']).first
@@ -112,11 +112,20 @@ module Apollo
112
112
  declarations[:exchanges]["fetcher"].publish(url.to_json, :reply_to => "planner.fetched")
113
113
  end
114
114
 
115
+ def get_next_url(opts={})
116
+ Apollo::Model::QueuedUrl.where({:state => :queued}).find_and_modify({ "$set" => { state: :fetching }}, new: true)
117
+ end
118
+
115
119
  def fetch_queued_urls(opts={})
116
- while url = Apollo::Model::QueuedUrl.where({:state => :queued}).find_and_modify({ "$set" => { state: :fetching }}, new: true)
120
+ url = get_next_url(opts)
121
+
122
+ while url
123
+ puts url.inspect
117
124
  # puts "Count of URLs in Queue: #{url.count}" if opts[:verbose]
118
125
 
119
126
  fetch_url(url, opts)
127
+
128
+ url = get_next_url()
120
129
  end
121
130
  end
122
131
 
@@ -173,6 +173,16 @@ module Apollo
173
173
  return nil
174
174
  end
175
175
 
176
+ def requeue_fetching_urls(opts={})
177
+ urls = Apollo::Model::QueuedUrl.where(:state => :fetching)
178
+ urls.each do |url|
179
+ puts "Requeing '#{url.inspect}'" if opts[:verbose]
180
+
181
+ url.state = :queued
182
+ url.save
183
+ end
184
+ end
185
+
176
186
  # Run Program
177
187
  def run(args = ARGV)
178
188
  res = super(args)
@@ -180,6 +190,8 @@ module Apollo
180
190
 
181
191
  init_domains()
182
192
 
193
+ requeue_fetching_urls(self.options)
194
+
183
195
  # Here we start
184
196
  # if(ARGV.length < 1)
185
197
  # puts optparser
@@ -19,5 +19,5 @@
19
19
  # THE SOFTWARE.
20
20
 
21
21
  module Apollo
22
- VERSION = '0.1.25'
22
+ VERSION = '0.1.26'
23
23
  end # Apollo
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: apollo-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.25
4
+ version: 0.1.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tomas Korcak