polipus 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,15 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 16d138f0f4b5f27b4e34851e577947641b2617eb
4
- data.tar.gz: 4fdf80f57823ac96080641d3546dd67d13f36a64
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NjU0MmViNGYzZWVjNDBkOTI1MDMwNjRlMTE0ZDVmN2RjODc4MGI2Mw==
5
+ data.tar.gz: !binary |-
6
+ YjQ3OTZmMThiNjI3ZTRiNTUxNWFlZjA1MGFiOGRhZTcxYzdiYTY0Ng==
5
7
  SHA512:
6
- metadata.gz: a3f21d158167db5030c3587dcdd5bfb83b2d167bef634af887e2563437988dc6b2908bcac44a0386459c0f02b2597cc515d9a383e0d2cb483e857828bd213349
7
- data.tar.gz: d268b9485a71474da10b91fded3e673d832a27690e16b149abf83c2125a9bbbdf369b4efeb6d3913a83da6a3b9483f239f8dd607748e930c7ef75d4a55d4b74d
8
+ metadata.gz: !binary |-
9
+ Zjc0N2Q2NmM3Yzg0OTg5NTEwYjg2N2Q5ODk5ZjU2OWQzZWQyMzEwNjk4NmJi
10
+ NzAxYzFlNDhhZGE1YzA0MGFiYjJiOWVlZThhMTFmZTM0YzAxZDE0MWE4MzAw
11
+ NjU5NWI5OThjNWE2ZWMzNThmNDU5MmZkY2Q0ODAzNDdkYmNjOTU=
12
+ data.tar.gz: !binary |-
13
+ ZWQ0MzFiMWYxNGRkNzAxM2FmZmFlYmVmNDljYTJjOGQxZGU4NjZjYTQ1ZjZi
14
+ ZDU3ZGNjMDc4Njg1MzJkMjFjYjQwYjZjNGNlNTk2ODBjN2MxMTI1ZGMwY2Ix
15
+ NjM4ZWFlY2U1NWYzZGE0NjFjNWZlYWE5Y2Y2NTA4ODk3NTIwMzU=
@@ -7,6 +7,7 @@ module Polipus
7
7
  @main_q = main_q
8
8
  @adapter = @polipus.queue_overflow_adapter
9
9
  @item_limit = item_limit
10
+ @redis = @polipus.redis
10
11
  end
11
12
 
12
13
  def url_filter &block
@@ -41,6 +42,7 @@ module Polipus
41
42
  end
42
43
  end
43
44
  source.commit if source.respond_to? :commit
45
+ @redis.expire "polipus_queue_overflow-#{@polipus.job_name}.lock", 180
44
46
  break if !message || source.empty?
45
47
  }
46
48
  performed
@@ -1,4 +1,4 @@
1
1
  module Polipus
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  HOMEPAGE = "https://github.com/taganaka/polipus"
4
4
  end
data/polipus.gemspec CHANGED
@@ -12,6 +12,7 @@ Gem::Specification.new do |s|
12
12
  s.description = %q{
13
13
  An easy to use distributed web-crawler framework based on Redis
14
14
  }
15
+ s.licenses = ["MIT"]
15
16
 
16
17
  s.rubyforge_project = "polipus"
17
18
 
@@ -20,23 +21,25 @@ Gem::Specification.new do |s|
20
21
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
22
  s.require_paths = ["lib"]
22
23
 
23
- s.add_dependency "redis-bloomfilter", "~> 0.0.1"
24
- s.add_dependency "redis-queue", "~> 0.0.3"
25
- s.add_dependency "nokogiri", "~> 1.6.0"
26
- s.add_dependency "hiredis", "~> 0.4.5"
27
- s.add_dependency "redis", "~> 3.0.4"
28
- s.add_dependency "mongo", "~> 1.9.2"
24
+ s.add_runtime_dependency 'redis-bloomfilter', '~> 0.0', '>= 0.0.1'
25
+ s.add_runtime_dependency 'redis-queue', '~> 0.0', '>= 0.0.3'
26
+ s.add_runtime_dependency 'nokogiri', '~> 1.6', '>= 1.6.0'
27
+ s.add_runtime_dependency 'hiredis', '~> 0.4', '>= 0.4.5'
28
+ s.add_runtime_dependency 'redis', '~> 3.0', '>= 3.0.4'
29
+ s.add_runtime_dependency 'mongo', '~> 1.9', '>= 1.9.2'
30
+
29
31
  if defined?(JRUBY_VERSION)
30
- s.add_dependency "bson", "~> 1.9.2"
32
+ s.add_runtime_dependency 'bson', '~> 1.9', '>= 1.9.2'
31
33
  else
32
- s.add_dependency "bson_ext", "~> 1.9.2"
34
+ s.add_runtime_dependency 'bson_ext', '~> 1.9', '>= 1.9.2'
33
35
  end
34
- s.add_dependency "aws-s3", "~> 0.6.3"
35
- s.add_dependency "http-cookie", "~> 1.0.1"
36
-
37
- s.add_development_dependency "rspec"
38
- s.add_development_dependency "vcr", "~> 2.5.0"
39
- s.add_development_dependency "webmock"
40
- s.add_development_dependency "flexmock", "~> 1.3.2"
36
+ s.add_runtime_dependency 'aws-s3', '~> 0.6', '>= 0.6.3'
37
+ s.add_runtime_dependency 'http-cookie', '~> 1.0', '>= 1.0.1'
38
+
39
+ s.add_development_dependency 'rspec', '~> 2.14', '>= 2.14.1'
40
+ s.add_development_dependency 'vcr', '~> 2.5', '>= 2.5.0'
41
+ s.add_development_dependency 'webmock', '>= 1.8.0', '< 1.12'
42
+ s.add_development_dependency 'flexmock', '~> 1.3', '>= 1.3.2'
43
+
41
44
 
42
45
  end
data/spec/http_spec.rb CHANGED
@@ -11,7 +11,7 @@ describe Polipus::HTTP do
11
11
  http = Polipus::HTTP.new
12
12
  page = http.fetch_page("http://sfbay.craigslist.org/apa/")
13
13
  page.should be_an_instance_of(Polipus::Page)
14
- page.doc.search("title").text.strip.should be == "SF bay area apts/housing for rent classifieds - craigslist"
14
+ page.doc.search("title").text.strip.should eq "SF bay area apts/housing for rent classifieds - craigslist"
15
15
  end
16
16
  end
17
17
 
@@ -22,9 +22,9 @@ describe Polipus::HTTP do
22
22
  page = http.fetch_page("http://greenbytes.de/tech/tc/httpredirects/t300bodyandloc.asis")
23
23
 
24
24
  page.should be_an_instance_of(Polipus::Page)
25
- page.code.should be == 200
26
- page.url.to_s.should be == "http://greenbytes.de/tech/tc/httpredirects/300.txt"
27
- page.body.strip.should be == "You have reached the target\r\nof a 300 redirect."
25
+ page.code.should be 200
26
+ page.url.to_s.should eq "http://greenbytes.de/tech/tc/httpredirects/300.txt"
27
+ page.body.strip.should eq "You have reached the target\r\nof a 300 redirect."
28
28
  end
29
29
  end
30
30
 
data/spec/page_spec.rb CHANGED
@@ -2,21 +2,30 @@ require "spec_helper"
2
2
  require "polipus/page"
3
3
 
4
4
  describe Polipus::Page do
5
- it 'should honor domain_aliases attribute' do
6
- body = <<EOF
7
- <html>
8
- <body>
9
- <a href="/page/1">1</a>
10
- <a href="/page/2">2</a>
11
- <a href="http://www.google.com/page/3">3</a>
12
- <a href="http://google.com/page/3">4</a>
13
- <a href="http://not.google.com/page/3">4</a>
14
- </body>
15
- </html>
5
+ let(:page) do
6
+ body = <<EOF
7
+ <html>
8
+ <body>
9
+ <a href="/page/1">1</a>
10
+ <a href="/page/2">2</a>
11
+ <a href="http://www.google.com/page/3">3</a>
12
+ <a href="http://google.com/page/3">4</a>
13
+ <a href="http://not.google.com/page/3">4</a>
14
+ </body>
15
+ </html>
16
16
  EOF
17
- h = {'content-type' => ['text/html']}
18
- domain_aliases = %w(www.google.com google.com)
19
- p = Polipus::Page.new 'http://www.google.com/', :code => 200, :body => body, :headers => h, :domain_aliases => domain_aliases
20
- p.links.count.should be == 4
17
+ Polipus::Page.new 'http://www.google.com/',
18
+ code: 200,
19
+ body: body,
20
+ headers: {'content-type' => ['text/html']},
21
+ domain_aliases: %w(www.google.com google.com)
22
+ end
23
+
24
+ it 'should be fetched' do
25
+ page.fetched?.should be_true
26
+ end
27
+
28
+ it 'should honor domain_aliases attribute' do
29
+ page.links.count.should be 4
21
30
  end
22
31
  end
@@ -9,9 +9,12 @@ describe Polipus::QueueOverflow::Manager do
9
9
  @storage = Polipus::Storage.mongo_store(@mongo, '_test_pages')
10
10
  @redis_q = Redis::Queue.new("queue_test","bp_queue_test", :redis => Redis.new())
11
11
  @queue_overflow = Polipus::QueueOverflow.mongo_queue(nil, "queue_test")
12
+ @redis = Redis.new
12
13
  @polipus = flexmock("polipus")
13
14
  @polipus.should_receive(:queue_overflow_adapter).and_return(@queue_overflow)
14
15
  @polipus.should_receive(:storage).and_return(@storage)
16
+ @polipus.should_receive(:redis).and_return(@redis)
17
+ @polipus.should_receive(:job_name).and_return("___test")
15
18
  @manager = Polipus::QueueOverflow::Manager.new(@polipus, @redis_q, 10)
16
19
  end
17
20
 
metadata CHANGED
@@ -1,198 +1,276 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polipus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francesco Laurita
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-01 00:00:00.000000000 Z
11
+ date: 2014-03-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redis-bloomfilter
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '0.0'
20
+ - - ! '>='
18
21
  - !ruby/object:Gem::Version
19
22
  version: 0.0.1
20
23
  type: :runtime
21
24
  prerelease: false
22
25
  version_requirements: !ruby/object:Gem::Requirement
23
26
  requirements:
24
- - - "~>"
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '0.0'
30
+ - - ! '>='
25
31
  - !ruby/object:Gem::Version
26
32
  version: 0.0.1
27
33
  - !ruby/object:Gem::Dependency
28
34
  name: redis-queue
29
35
  requirement: !ruby/object:Gem::Requirement
30
36
  requirements:
31
- - - "~>"
37
+ - - ~>
38
+ - !ruby/object:Gem::Version
39
+ version: '0.0'
40
+ - - ! '>='
32
41
  - !ruby/object:Gem::Version
33
42
  version: 0.0.3
34
43
  type: :runtime
35
44
  prerelease: false
36
45
  version_requirements: !ruby/object:Gem::Requirement
37
46
  requirements:
38
- - - "~>"
47
+ - - ~>
48
+ - !ruby/object:Gem::Version
49
+ version: '0.0'
50
+ - - ! '>='
39
51
  - !ruby/object:Gem::Version
40
52
  version: 0.0.3
41
53
  - !ruby/object:Gem::Dependency
42
54
  name: nokogiri
43
55
  requirement: !ruby/object:Gem::Requirement
44
56
  requirements:
45
- - - "~>"
57
+ - - ~>
58
+ - !ruby/object:Gem::Version
59
+ version: '1.6'
60
+ - - ! '>='
46
61
  - !ruby/object:Gem::Version
47
62
  version: 1.6.0
48
63
  type: :runtime
49
64
  prerelease: false
50
65
  version_requirements: !ruby/object:Gem::Requirement
51
66
  requirements:
52
- - - "~>"
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: '1.6'
70
+ - - ! '>='
53
71
  - !ruby/object:Gem::Version
54
72
  version: 1.6.0
55
73
  - !ruby/object:Gem::Dependency
56
74
  name: hiredis
57
75
  requirement: !ruby/object:Gem::Requirement
58
76
  requirements:
59
- - - "~>"
77
+ - - ~>
78
+ - !ruby/object:Gem::Version
79
+ version: '0.4'
80
+ - - ! '>='
60
81
  - !ruby/object:Gem::Version
61
82
  version: 0.4.5
62
83
  type: :runtime
63
84
  prerelease: false
64
85
  version_requirements: !ruby/object:Gem::Requirement
65
86
  requirements:
66
- - - "~>"
87
+ - - ~>
88
+ - !ruby/object:Gem::Version
89
+ version: '0.4'
90
+ - - ! '>='
67
91
  - !ruby/object:Gem::Version
68
92
  version: 0.4.5
69
93
  - !ruby/object:Gem::Dependency
70
94
  name: redis
71
95
  requirement: !ruby/object:Gem::Requirement
72
96
  requirements:
73
- - - "~>"
97
+ - - ~>
98
+ - !ruby/object:Gem::Version
99
+ version: '3.0'
100
+ - - ! '>='
74
101
  - !ruby/object:Gem::Version
75
102
  version: 3.0.4
76
103
  type: :runtime
77
104
  prerelease: false
78
105
  version_requirements: !ruby/object:Gem::Requirement
79
106
  requirements:
80
- - - "~>"
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: '3.0'
110
+ - - ! '>='
81
111
  - !ruby/object:Gem::Version
82
112
  version: 3.0.4
83
113
  - !ruby/object:Gem::Dependency
84
114
  name: mongo
85
115
  requirement: !ruby/object:Gem::Requirement
86
116
  requirements:
87
- - - "~>"
117
+ - - ~>
118
+ - !ruby/object:Gem::Version
119
+ version: '1.9'
120
+ - - ! '>='
88
121
  - !ruby/object:Gem::Version
89
122
  version: 1.9.2
90
123
  type: :runtime
91
124
  prerelease: false
92
125
  version_requirements: !ruby/object:Gem::Requirement
93
126
  requirements:
94
- - - "~>"
127
+ - - ~>
128
+ - !ruby/object:Gem::Version
129
+ version: '1.9'
130
+ - - ! '>='
95
131
  - !ruby/object:Gem::Version
96
132
  version: 1.9.2
97
133
  - !ruby/object:Gem::Dependency
98
134
  name: bson_ext
99
135
  requirement: !ruby/object:Gem::Requirement
100
136
  requirements:
101
- - - "~>"
137
+ - - ~>
138
+ - !ruby/object:Gem::Version
139
+ version: '1.9'
140
+ - - ! '>='
102
141
  - !ruby/object:Gem::Version
103
142
  version: 1.9.2
104
143
  type: :runtime
105
144
  prerelease: false
106
145
  version_requirements: !ruby/object:Gem::Requirement
107
146
  requirements:
108
- - - "~>"
147
+ - - ~>
148
+ - !ruby/object:Gem::Version
149
+ version: '1.9'
150
+ - - ! '>='
109
151
  - !ruby/object:Gem::Version
110
152
  version: 1.9.2
111
153
  - !ruby/object:Gem::Dependency
112
154
  name: aws-s3
113
155
  requirement: !ruby/object:Gem::Requirement
114
156
  requirements:
115
- - - "~>"
157
+ - - ~>
158
+ - !ruby/object:Gem::Version
159
+ version: '0.6'
160
+ - - ! '>='
116
161
  - !ruby/object:Gem::Version
117
162
  version: 0.6.3
118
163
  type: :runtime
119
164
  prerelease: false
120
165
  version_requirements: !ruby/object:Gem::Requirement
121
166
  requirements:
122
- - - "~>"
167
+ - - ~>
168
+ - !ruby/object:Gem::Version
169
+ version: '0.6'
170
+ - - ! '>='
123
171
  - !ruby/object:Gem::Version
124
172
  version: 0.6.3
125
173
  - !ruby/object:Gem::Dependency
126
174
  name: http-cookie
127
175
  requirement: !ruby/object:Gem::Requirement
128
176
  requirements:
129
- - - "~>"
177
+ - - ~>
178
+ - !ruby/object:Gem::Version
179
+ version: '1.0'
180
+ - - ! '>='
130
181
  - !ruby/object:Gem::Version
131
182
  version: 1.0.1
132
183
  type: :runtime
133
184
  prerelease: false
134
185
  version_requirements: !ruby/object:Gem::Requirement
135
186
  requirements:
136
- - - "~>"
187
+ - - ~>
188
+ - !ruby/object:Gem::Version
189
+ version: '1.0'
190
+ - - ! '>='
137
191
  - !ruby/object:Gem::Version
138
192
  version: 1.0.1
139
193
  - !ruby/object:Gem::Dependency
140
194
  name: rspec
141
195
  requirement: !ruby/object:Gem::Requirement
142
196
  requirements:
143
- - - ">="
197
+ - - ~>
198
+ - !ruby/object:Gem::Version
199
+ version: '2.14'
200
+ - - ! '>='
144
201
  - !ruby/object:Gem::Version
145
- version: '0'
202
+ version: 2.14.1
146
203
  type: :development
147
204
  prerelease: false
148
205
  version_requirements: !ruby/object:Gem::Requirement
149
206
  requirements:
150
- - - ">="
207
+ - - ~>
208
+ - !ruby/object:Gem::Version
209
+ version: '2.14'
210
+ - - ! '>='
151
211
  - !ruby/object:Gem::Version
152
- version: '0'
212
+ version: 2.14.1
153
213
  - !ruby/object:Gem::Dependency
154
214
  name: vcr
155
215
  requirement: !ruby/object:Gem::Requirement
156
216
  requirements:
157
- - - "~>"
217
+ - - ~>
218
+ - !ruby/object:Gem::Version
219
+ version: '2.5'
220
+ - - ! '>='
158
221
  - !ruby/object:Gem::Version
159
222
  version: 2.5.0
160
223
  type: :development
161
224
  prerelease: false
162
225
  version_requirements: !ruby/object:Gem::Requirement
163
226
  requirements:
164
- - - "~>"
227
+ - - ~>
228
+ - !ruby/object:Gem::Version
229
+ version: '2.5'
230
+ - - ! '>='
165
231
  - !ruby/object:Gem::Version
166
232
  version: 2.5.0
167
233
  - !ruby/object:Gem::Dependency
168
234
  name: webmock
169
235
  requirement: !ruby/object:Gem::Requirement
170
236
  requirements:
171
- - - ">="
237
+ - - ! '>='
238
+ - !ruby/object:Gem::Version
239
+ version: 1.8.0
240
+ - - <
172
241
  - !ruby/object:Gem::Version
173
- version: '0'
242
+ version: '1.12'
174
243
  type: :development
175
244
  prerelease: false
176
245
  version_requirements: !ruby/object:Gem::Requirement
177
246
  requirements:
178
- - - ">="
247
+ - - ! '>='
179
248
  - !ruby/object:Gem::Version
180
- version: '0'
249
+ version: 1.8.0
250
+ - - <
251
+ - !ruby/object:Gem::Version
252
+ version: '1.12'
181
253
  - !ruby/object:Gem::Dependency
182
254
  name: flexmock
183
255
  requirement: !ruby/object:Gem::Requirement
184
256
  requirements:
185
- - - "~>"
257
+ - - ~>
258
+ - !ruby/object:Gem::Version
259
+ version: '1.3'
260
+ - - ! '>='
186
261
  - !ruby/object:Gem::Version
187
262
  version: 1.3.2
188
263
  type: :development
189
264
  prerelease: false
190
265
  version_requirements: !ruby/object:Gem::Requirement
191
266
  requirements:
192
- - - "~>"
267
+ - - ~>
268
+ - !ruby/object:Gem::Version
269
+ version: '1.3'
270
+ - - ! '>='
193
271
  - !ruby/object:Gem::Version
194
272
  version: 1.3.2
195
- description: "\n An easy to use distributed web-crawler framework based on Redis\n
273
+ description: ! "\n An easy to use distributed web-crawler framework based on Redis\n
196
274
  \ "
197
275
  email:
198
276
  - francesco.laurita@gmail.com
@@ -200,9 +278,9 @@ executables: []
200
278
  extensions: []
201
279
  extra_rdoc_files: []
202
280
  files:
203
- - ".document"
204
- - ".gitignore"
205
- - ".rspec"
281
+ - .document
282
+ - .gitignore
283
+ - .rspec
206
284
  - Gemfile
207
285
  - LICENSE.txt
208
286
  - README.md
@@ -254,7 +332,8 @@ files:
254
332
  - spec/storage_s3_spec.rb
255
333
  - spec/url_tracker_spec.rb
256
334
  homepage: https://github.com/taganaka/polipus
257
- licenses: []
335
+ licenses:
336
+ - MIT
258
337
  metadata: {}
259
338
  post_install_message:
260
339
  rdoc_options: []
@@ -262,17 +341,17 @@ require_paths:
262
341
  - lib
263
342
  required_ruby_version: !ruby/object:Gem::Requirement
264
343
  requirements:
265
- - - ">="
344
+ - - ! '>='
266
345
  - !ruby/object:Gem::Version
267
346
  version: '0'
268
347
  required_rubygems_version: !ruby/object:Gem::Requirement
269
348
  requirements:
270
- - - ">="
349
+ - - ! '>='
271
350
  - !ruby/object:Gem::Version
272
351
  version: '0'
273
352
  requirements: []
274
353
  rubyforge_project: polipus
275
- rubygems_version: 2.2.1
354
+ rubygems_version: 2.2.2
276
355
  signing_key:
277
356
  specification_version: 4
278
357
  summary: Polipus distributed web-crawler framework