polipus 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,15 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 16d138f0f4b5f27b4e34851e577947641b2617eb
4
- data.tar.gz: 4fdf80f57823ac96080641d3546dd67d13f36a64
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NjU0MmViNGYzZWVjNDBkOTI1MDMwNjRlMTE0ZDVmN2RjODc4MGI2Mw==
5
+ data.tar.gz: !binary |-
6
+ YjQ3OTZmMThiNjI3ZTRiNTUxNWFlZjA1MGFiOGRhZTcxYzdiYTY0Ng==
5
7
  SHA512:
6
- metadata.gz: a3f21d158167db5030c3587dcdd5bfb83b2d167bef634af887e2563437988dc6b2908bcac44a0386459c0f02b2597cc515d9a383e0d2cb483e857828bd213349
7
- data.tar.gz: d268b9485a71474da10b91fded3e673d832a27690e16b149abf83c2125a9bbbdf369b4efeb6d3913a83da6a3b9483f239f8dd607748e930c7ef75d4a55d4b74d
8
+ metadata.gz: !binary |-
9
+ Zjc0N2Q2NmM3Yzg0OTg5NTEwYjg2N2Q5ODk5ZjU2OWQzZWQyMzEwNjk4NmJi
10
+ NzAxYzFlNDhhZGE1YzA0MGFiYjJiOWVlZThhMTFmZTM0YzAxZDE0MWE4MzAw
11
+ NjU5NWI5OThjNWE2ZWMzNThmNDU5MmZkY2Q0ODAzNDdkYmNjOTU=
12
+ data.tar.gz: !binary |-
13
+ ZWQ0MzFiMWYxNGRkNzAxM2FmZmFlYmVmNDljYTJjOGQxZGU4NjZjYTQ1ZjZi
14
+ ZDU3ZGNjMDc4Njg1MzJkMjFjYjQwYjZjNGNlNTk2ODBjN2MxMTI1ZGMwY2Ix
15
+ NjM4ZWFlY2U1NWYzZGE0NjFjNWZlYWE5Y2Y2NTA4ODk3NTIwMzU=
@@ -7,6 +7,7 @@ module Polipus
7
7
  @main_q = main_q
8
8
  @adapter = @polipus.queue_overflow_adapter
9
9
  @item_limit = item_limit
10
+ @redis = @polipus.redis
10
11
  end
11
12
 
12
13
  def url_filter &block
@@ -41,6 +42,7 @@ module Polipus
41
42
  end
42
43
  end
43
44
  source.commit if source.respond_to? :commit
45
+ @redis.expire "polipus_queue_overflow-#{@polipus.job_name}.lock", 180
44
46
  break if !message || source.empty?
45
47
  }
46
48
  performed
@@ -1,4 +1,4 @@
1
1
  module Polipus
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  HOMEPAGE = "https://github.com/taganaka/polipus"
4
4
  end
data/polipus.gemspec CHANGED
@@ -12,6 +12,7 @@ Gem::Specification.new do |s|
12
12
  s.description = %q{
13
13
  An easy to use distributed web-crawler framework based on Redis
14
14
  }
15
+ s.licenses = ["MIT"]
15
16
 
16
17
  s.rubyforge_project = "polipus"
17
18
 
@@ -20,23 +21,25 @@ Gem::Specification.new do |s|
20
21
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
22
  s.require_paths = ["lib"]
22
23
 
23
- s.add_dependency "redis-bloomfilter", "~> 0.0.1"
24
- s.add_dependency "redis-queue", "~> 0.0.3"
25
- s.add_dependency "nokogiri", "~> 1.6.0"
26
- s.add_dependency "hiredis", "~> 0.4.5"
27
- s.add_dependency "redis", "~> 3.0.4"
28
- s.add_dependency "mongo", "~> 1.9.2"
24
+ s.add_runtime_dependency 'redis-bloomfilter', '~> 0.0', '>= 0.0.1'
25
+ s.add_runtime_dependency 'redis-queue', '~> 0.0', '>= 0.0.3'
26
+ s.add_runtime_dependency 'nokogiri', '~> 1.6', '>= 1.6.0'
27
+ s.add_runtime_dependency 'hiredis', '~> 0.4', '>= 0.4.5'
28
+ s.add_runtime_dependency 'redis', '~> 3.0', '>= 3.0.4'
29
+ s.add_runtime_dependency 'mongo', '~> 1.9', '>= 1.9.2'
30
+
29
31
  if defined?(JRUBY_VERSION)
30
- s.add_dependency "bson", "~> 1.9.2"
32
+ s.add_runtime_dependency 'bson', '~> 1.9', '>= 1.9.2'
31
33
  else
32
- s.add_dependency "bson_ext", "~> 1.9.2"
34
+ s.add_runtime_dependency 'bson_ext', '~> 1.9', '>= 1.9.2'
33
35
  end
34
- s.add_dependency "aws-s3", "~> 0.6.3"
35
- s.add_dependency "http-cookie", "~> 1.0.1"
36
-
37
- s.add_development_dependency "rspec"
38
- s.add_development_dependency "vcr", "~> 2.5.0"
39
- s.add_development_dependency "webmock"
40
- s.add_development_dependency "flexmock", "~> 1.3.2"
36
+ s.add_runtime_dependency 'aws-s3', '~> 0.6', '>= 0.6.3'
37
+ s.add_runtime_dependency 'http-cookie', '~> 1.0', '>= 1.0.1'
38
+
39
+ s.add_development_dependency 'rspec', '~> 2.14', '>= 2.14.1'
40
+ s.add_development_dependency 'vcr', '~> 2.5', '>= 2.5.0'
41
+ s.add_development_dependency 'webmock', '>= 1.8.0', '< 1.12'
42
+ s.add_development_dependency 'flexmock', '~> 1.3', '>= 1.3.2'
43
+
41
44
 
42
45
  end
data/spec/http_spec.rb CHANGED
@@ -11,7 +11,7 @@ describe Polipus::HTTP do
11
11
  http = Polipus::HTTP.new
12
12
  page = http.fetch_page("http://sfbay.craigslist.org/apa/")
13
13
  page.should be_an_instance_of(Polipus::Page)
14
- page.doc.search("title").text.strip.should be == "SF bay area apts/housing for rent classifieds - craigslist"
14
+ page.doc.search("title").text.strip.should eq "SF bay area apts/housing for rent classifieds - craigslist"
15
15
  end
16
16
  end
17
17
 
@@ -22,9 +22,9 @@ describe Polipus::HTTP do
22
22
  page = http.fetch_page("http://greenbytes.de/tech/tc/httpredirects/t300bodyandloc.asis")
23
23
 
24
24
  page.should be_an_instance_of(Polipus::Page)
25
- page.code.should be == 200
26
- page.url.to_s.should be == "http://greenbytes.de/tech/tc/httpredirects/300.txt"
27
- page.body.strip.should be == "You have reached the target\r\nof a 300 redirect."
25
+ page.code.should be 200
26
+ page.url.to_s.should eq "http://greenbytes.de/tech/tc/httpredirects/300.txt"
27
+ page.body.strip.should eq "You have reached the target\r\nof a 300 redirect."
28
28
  end
29
29
  end
30
30
 
data/spec/page_spec.rb CHANGED
@@ -2,21 +2,30 @@ require "spec_helper"
2
2
  require "polipus/page"
3
3
 
4
4
  describe Polipus::Page do
5
- it 'should honor domain_aliases attribute' do
6
- body = <<EOF
7
- <html>
8
- <body>
9
- <a href="/page/1">1</a>
10
- <a href="/page/2">2</a>
11
- <a href="http://www.google.com/page/3">3</a>
12
- <a href="http://google.com/page/3">4</a>
13
- <a href="http://not.google.com/page/3">4</a>
14
- </body>
15
- </html>
5
+ let(:page) do
6
+ body = <<EOF
7
+ <html>
8
+ <body>
9
+ <a href="/page/1">1</a>
10
+ <a href="/page/2">2</a>
11
+ <a href="http://www.google.com/page/3">3</a>
12
+ <a href="http://google.com/page/3">4</a>
13
+ <a href="http://not.google.com/page/3">4</a>
14
+ </body>
15
+ </html>
16
16
  EOF
17
- h = {'content-type' => ['text/html']}
18
- domain_aliases = %w(www.google.com google.com)
19
- p = Polipus::Page.new 'http://www.google.com/', :code => 200, :body => body, :headers => h, :domain_aliases => domain_aliases
20
- p.links.count.should be == 4
17
+ Polipus::Page.new 'http://www.google.com/',
18
+ code: 200,
19
+ body: body,
20
+ headers: {'content-type' => ['text/html']},
21
+ domain_aliases: %w(www.google.com google.com)
22
+ end
23
+
24
+ it 'should be fetched' do
25
+ page.fetched?.should be_true
26
+ end
27
+
28
+ it 'should honor domain_aliases attribute' do
29
+ page.links.count.should be 4
21
30
  end
22
31
  end
@@ -9,9 +9,12 @@ describe Polipus::QueueOverflow::Manager do
9
9
  @storage = Polipus::Storage.mongo_store(@mongo, '_test_pages')
10
10
  @redis_q = Redis::Queue.new("queue_test","bp_queue_test", :redis => Redis.new())
11
11
  @queue_overflow = Polipus::QueueOverflow.mongo_queue(nil, "queue_test")
12
+ @redis = Redis.new
12
13
  @polipus = flexmock("polipus")
13
14
  @polipus.should_receive(:queue_overflow_adapter).and_return(@queue_overflow)
14
15
  @polipus.should_receive(:storage).and_return(@storage)
16
+ @polipus.should_receive(:redis).and_return(@redis)
17
+ @polipus.should_receive(:job_name).and_return("___test")
15
18
  @manager = Polipus::QueueOverflow::Manager.new(@polipus, @redis_q, 10)
16
19
  end
17
20
 
metadata CHANGED
@@ -1,198 +1,276 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polipus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francesco Laurita
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-01 00:00:00.000000000 Z
11
+ date: 2014-03-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redis-bloomfilter
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '0.0'
20
+ - - ! '>='
18
21
  - !ruby/object:Gem::Version
19
22
  version: 0.0.1
20
23
  type: :runtime
21
24
  prerelease: false
22
25
  version_requirements: !ruby/object:Gem::Requirement
23
26
  requirements:
24
- - - "~>"
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '0.0'
30
+ - - ! '>='
25
31
  - !ruby/object:Gem::Version
26
32
  version: 0.0.1
27
33
  - !ruby/object:Gem::Dependency
28
34
  name: redis-queue
29
35
  requirement: !ruby/object:Gem::Requirement
30
36
  requirements:
31
- - - "~>"
37
+ - - ~>
38
+ - !ruby/object:Gem::Version
39
+ version: '0.0'
40
+ - - ! '>='
32
41
  - !ruby/object:Gem::Version
33
42
  version: 0.0.3
34
43
  type: :runtime
35
44
  prerelease: false
36
45
  version_requirements: !ruby/object:Gem::Requirement
37
46
  requirements:
38
- - - "~>"
47
+ - - ~>
48
+ - !ruby/object:Gem::Version
49
+ version: '0.0'
50
+ - - ! '>='
39
51
  - !ruby/object:Gem::Version
40
52
  version: 0.0.3
41
53
  - !ruby/object:Gem::Dependency
42
54
  name: nokogiri
43
55
  requirement: !ruby/object:Gem::Requirement
44
56
  requirements:
45
- - - "~>"
57
+ - - ~>
58
+ - !ruby/object:Gem::Version
59
+ version: '1.6'
60
+ - - ! '>='
46
61
  - !ruby/object:Gem::Version
47
62
  version: 1.6.0
48
63
  type: :runtime
49
64
  prerelease: false
50
65
  version_requirements: !ruby/object:Gem::Requirement
51
66
  requirements:
52
- - - "~>"
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: '1.6'
70
+ - - ! '>='
53
71
  - !ruby/object:Gem::Version
54
72
  version: 1.6.0
55
73
  - !ruby/object:Gem::Dependency
56
74
  name: hiredis
57
75
  requirement: !ruby/object:Gem::Requirement
58
76
  requirements:
59
- - - "~>"
77
+ - - ~>
78
+ - !ruby/object:Gem::Version
79
+ version: '0.4'
80
+ - - ! '>='
60
81
  - !ruby/object:Gem::Version
61
82
  version: 0.4.5
62
83
  type: :runtime
63
84
  prerelease: false
64
85
  version_requirements: !ruby/object:Gem::Requirement
65
86
  requirements:
66
- - - "~>"
87
+ - - ~>
88
+ - !ruby/object:Gem::Version
89
+ version: '0.4'
90
+ - - ! '>='
67
91
  - !ruby/object:Gem::Version
68
92
  version: 0.4.5
69
93
  - !ruby/object:Gem::Dependency
70
94
  name: redis
71
95
  requirement: !ruby/object:Gem::Requirement
72
96
  requirements:
73
- - - "~>"
97
+ - - ~>
98
+ - !ruby/object:Gem::Version
99
+ version: '3.0'
100
+ - - ! '>='
74
101
  - !ruby/object:Gem::Version
75
102
  version: 3.0.4
76
103
  type: :runtime
77
104
  prerelease: false
78
105
  version_requirements: !ruby/object:Gem::Requirement
79
106
  requirements:
80
- - - "~>"
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: '3.0'
110
+ - - ! '>='
81
111
  - !ruby/object:Gem::Version
82
112
  version: 3.0.4
83
113
  - !ruby/object:Gem::Dependency
84
114
  name: mongo
85
115
  requirement: !ruby/object:Gem::Requirement
86
116
  requirements:
87
- - - "~>"
117
+ - - ~>
118
+ - !ruby/object:Gem::Version
119
+ version: '1.9'
120
+ - - ! '>='
88
121
  - !ruby/object:Gem::Version
89
122
  version: 1.9.2
90
123
  type: :runtime
91
124
  prerelease: false
92
125
  version_requirements: !ruby/object:Gem::Requirement
93
126
  requirements:
94
- - - "~>"
127
+ - - ~>
128
+ - !ruby/object:Gem::Version
129
+ version: '1.9'
130
+ - - ! '>='
95
131
  - !ruby/object:Gem::Version
96
132
  version: 1.9.2
97
133
  - !ruby/object:Gem::Dependency
98
134
  name: bson_ext
99
135
  requirement: !ruby/object:Gem::Requirement
100
136
  requirements:
101
- - - "~>"
137
+ - - ~>
138
+ - !ruby/object:Gem::Version
139
+ version: '1.9'
140
+ - - ! '>='
102
141
  - !ruby/object:Gem::Version
103
142
  version: 1.9.2
104
143
  type: :runtime
105
144
  prerelease: false
106
145
  version_requirements: !ruby/object:Gem::Requirement
107
146
  requirements:
108
- - - "~>"
147
+ - - ~>
148
+ - !ruby/object:Gem::Version
149
+ version: '1.9'
150
+ - - ! '>='
109
151
  - !ruby/object:Gem::Version
110
152
  version: 1.9.2
111
153
  - !ruby/object:Gem::Dependency
112
154
  name: aws-s3
113
155
  requirement: !ruby/object:Gem::Requirement
114
156
  requirements:
115
- - - "~>"
157
+ - - ~>
158
+ - !ruby/object:Gem::Version
159
+ version: '0.6'
160
+ - - ! '>='
116
161
  - !ruby/object:Gem::Version
117
162
  version: 0.6.3
118
163
  type: :runtime
119
164
  prerelease: false
120
165
  version_requirements: !ruby/object:Gem::Requirement
121
166
  requirements:
122
- - - "~>"
167
+ - - ~>
168
+ - !ruby/object:Gem::Version
169
+ version: '0.6'
170
+ - - ! '>='
123
171
  - !ruby/object:Gem::Version
124
172
  version: 0.6.3
125
173
  - !ruby/object:Gem::Dependency
126
174
  name: http-cookie
127
175
  requirement: !ruby/object:Gem::Requirement
128
176
  requirements:
129
- - - "~>"
177
+ - - ~>
178
+ - !ruby/object:Gem::Version
179
+ version: '1.0'
180
+ - - ! '>='
130
181
  - !ruby/object:Gem::Version
131
182
  version: 1.0.1
132
183
  type: :runtime
133
184
  prerelease: false
134
185
  version_requirements: !ruby/object:Gem::Requirement
135
186
  requirements:
136
- - - "~>"
187
+ - - ~>
188
+ - !ruby/object:Gem::Version
189
+ version: '1.0'
190
+ - - ! '>='
137
191
  - !ruby/object:Gem::Version
138
192
  version: 1.0.1
139
193
  - !ruby/object:Gem::Dependency
140
194
  name: rspec
141
195
  requirement: !ruby/object:Gem::Requirement
142
196
  requirements:
143
- - - ">="
197
+ - - ~>
198
+ - !ruby/object:Gem::Version
199
+ version: '2.14'
200
+ - - ! '>='
144
201
  - !ruby/object:Gem::Version
145
- version: '0'
202
+ version: 2.14.1
146
203
  type: :development
147
204
  prerelease: false
148
205
  version_requirements: !ruby/object:Gem::Requirement
149
206
  requirements:
150
- - - ">="
207
+ - - ~>
208
+ - !ruby/object:Gem::Version
209
+ version: '2.14'
210
+ - - ! '>='
151
211
  - !ruby/object:Gem::Version
152
- version: '0'
212
+ version: 2.14.1
153
213
  - !ruby/object:Gem::Dependency
154
214
  name: vcr
155
215
  requirement: !ruby/object:Gem::Requirement
156
216
  requirements:
157
- - - "~>"
217
+ - - ~>
218
+ - !ruby/object:Gem::Version
219
+ version: '2.5'
220
+ - - ! '>='
158
221
  - !ruby/object:Gem::Version
159
222
  version: 2.5.0
160
223
  type: :development
161
224
  prerelease: false
162
225
  version_requirements: !ruby/object:Gem::Requirement
163
226
  requirements:
164
- - - "~>"
227
+ - - ~>
228
+ - !ruby/object:Gem::Version
229
+ version: '2.5'
230
+ - - ! '>='
165
231
  - !ruby/object:Gem::Version
166
232
  version: 2.5.0
167
233
  - !ruby/object:Gem::Dependency
168
234
  name: webmock
169
235
  requirement: !ruby/object:Gem::Requirement
170
236
  requirements:
171
- - - ">="
237
+ - - ! '>='
238
+ - !ruby/object:Gem::Version
239
+ version: 1.8.0
240
+ - - <
172
241
  - !ruby/object:Gem::Version
173
- version: '0'
242
+ version: '1.12'
174
243
  type: :development
175
244
  prerelease: false
176
245
  version_requirements: !ruby/object:Gem::Requirement
177
246
  requirements:
178
- - - ">="
247
+ - - ! '>='
179
248
  - !ruby/object:Gem::Version
180
- version: '0'
249
+ version: 1.8.0
250
+ - - <
251
+ - !ruby/object:Gem::Version
252
+ version: '1.12'
181
253
  - !ruby/object:Gem::Dependency
182
254
  name: flexmock
183
255
  requirement: !ruby/object:Gem::Requirement
184
256
  requirements:
185
- - - "~>"
257
+ - - ~>
258
+ - !ruby/object:Gem::Version
259
+ version: '1.3'
260
+ - - ! '>='
186
261
  - !ruby/object:Gem::Version
187
262
  version: 1.3.2
188
263
  type: :development
189
264
  prerelease: false
190
265
  version_requirements: !ruby/object:Gem::Requirement
191
266
  requirements:
192
- - - "~>"
267
+ - - ~>
268
+ - !ruby/object:Gem::Version
269
+ version: '1.3'
270
+ - - ! '>='
193
271
  - !ruby/object:Gem::Version
194
272
  version: 1.3.2
195
- description: "\n An easy to use distributed web-crawler framework based on Redis\n
273
+ description: ! "\n An easy to use distributed web-crawler framework based on Redis\n
196
274
  \ "
197
275
  email:
198
276
  - francesco.laurita@gmail.com
@@ -200,9 +278,9 @@ executables: []
200
278
  extensions: []
201
279
  extra_rdoc_files: []
202
280
  files:
203
- - ".document"
204
- - ".gitignore"
205
- - ".rspec"
281
+ - .document
282
+ - .gitignore
283
+ - .rspec
206
284
  - Gemfile
207
285
  - LICENSE.txt
208
286
  - README.md
@@ -254,7 +332,8 @@ files:
254
332
  - spec/storage_s3_spec.rb
255
333
  - spec/url_tracker_spec.rb
256
334
  homepage: https://github.com/taganaka/polipus
257
- licenses: []
335
+ licenses:
336
+ - MIT
258
337
  metadata: {}
259
338
  post_install_message:
260
339
  rdoc_options: []
@@ -262,17 +341,17 @@ require_paths:
262
341
  - lib
263
342
  required_ruby_version: !ruby/object:Gem::Requirement
264
343
  requirements:
265
- - - ">="
344
+ - - ! '>='
266
345
  - !ruby/object:Gem::Version
267
346
  version: '0'
268
347
  required_rubygems_version: !ruby/object:Gem::Requirement
269
348
  requirements:
270
- - - ">="
349
+ - - ! '>='
271
350
  - !ruby/object:Gem::Version
272
351
  version: '0'
273
352
  requirements: []
274
353
  rubyforge_project: polipus
275
- rubygems_version: 2.2.1
354
+ rubygems_version: 2.2.2
276
355
  signing_key:
277
356
  specification_version: 4
278
357
  summary: Polipus distributed web-crawler framework