polipus 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/polipus.rb +7 -8
- data/lib/polipus/http.rb +0 -2
- data/lib/polipus/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MjkyMTAxZWU0ODJmMzI5OTcwZjI0ZTFlNzZjOTYxNzY1MGUxOTJjZQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
OTg4M2UzY2I2ODNkMWMxZDYwMzkxOGI5MmRhMWJkN2I0N2ViYTMyMg==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
YWE4ZjM0NDA1NWJiMGVkYzUyMTM5ZWJjN2I3YTU5YTI2NDIwNmI0ZTNkZTRl
|
10
|
+
NTg0MzQyOTgxMGYzZjBlZGVkMmE2ODJkZTA0ZTg3NzY0MTJjZTljYWEwN2Fm
|
11
|
+
YTc4MjI3NGViMmNlMjQzNWIxODVlMmNlNWJjNDFhMzE1MzQxMjk=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YjRlMmVjYzA4MDVlYzAxMmM0NGYwMWFiYWMwNmYxNDg3MmMyOTY4YzQzZDQ2
|
14
|
+
OWZjNWUzYjJlMzk3MDg5NjE4ZmQxYTk5MWQ1Y2M0ZmQ5MjhkYjQxZjdkNzRk
|
15
|
+
ZjZjNzkyMjJmM2M0MzNiNDNkOTQxOWYyMDc2MDdhZTJlNWE1Y2M=
|
data/lib/polipus.rb
CHANGED
@@ -106,9 +106,7 @@ module Polipus
|
|
106
106
|
|
107
107
|
@storage = @options[:storage] ||= Storage.dev_null
|
108
108
|
|
109
|
-
@http_pool = []
|
110
109
|
@workers_pool = []
|
111
|
-
@queues_pool = []
|
112
110
|
|
113
111
|
@follow_links_like = []
|
114
112
|
@skip_links_like = []
|
@@ -150,8 +148,8 @@ module Polipus
|
|
150
148
|
@options[:workers].times do |worker_number|
|
151
149
|
@workers_pool << Thread.new do
|
152
150
|
@logger.debug { "Start worker #{worker_number}" }
|
153
|
-
http =
|
154
|
-
queue =
|
151
|
+
http = HTTP.new(@options)
|
152
|
+
queue = queue_factory
|
155
153
|
queue.process(false, @options[:queue_timeout]) do |message|
|
156
154
|
|
157
155
|
next if message.nil?
|
@@ -216,7 +214,7 @@ module Polipus
|
|
216
214
|
if @options[:depth_limit] == false || @options[:depth_limit] > page.depth
|
217
215
|
links_for(page).each do |url_to_visit|
|
218
216
|
next unless should_be_visited?(url_to_visit)
|
219
|
-
enqueue url_to_visit, page
|
217
|
+
enqueue url_to_visit, page
|
220
218
|
end
|
221
219
|
else
|
222
220
|
@logger.info { "[worker ##{worker_number}] Depth limit reached #{page.depth}" }
|
@@ -396,12 +394,12 @@ module Polipus
|
|
396
394
|
end
|
397
395
|
|
398
396
|
# The url is enqueued for a later visit
|
399
|
-
def enqueue(url_to_visit, current_page
|
397
|
+
def enqueue(url_to_visit, current_page)
|
400
398
|
page_to_visit = Page.new(url_to_visit.to_s, referer: current_page.url.to_s, depth: current_page.depth + 1)
|
401
|
-
|
399
|
+
internal_queue << page_to_visit.to_json
|
402
400
|
to_track = @options[:include_query_string_in_saved_page] ? url_to_visit.to_s : url_to_visit.to_s.gsub(/\?.*$/, '')
|
403
401
|
url_tracker.visit to_track
|
404
|
-
@logger.debug { "Added
|
402
|
+
@logger.debug { "Added (#{url_to_visit}) to the queue" }
|
405
403
|
end
|
406
404
|
|
407
405
|
# It creates a redis client
|
@@ -444,6 +442,7 @@ module Polipus
|
|
444
442
|
removed, restored = @overflow_manager.perform
|
445
443
|
@logger.info { "Overflow Manager: items removed=#{removed}, items restored=#{restored}, items stored=#{queue_overflow_adapter.size}" }
|
446
444
|
sleep @options[:queue_overflow_manager_check_time]
|
445
|
+
break if SignalHandler.terminated?
|
447
446
|
end
|
448
447
|
|
449
448
|
end
|
data/lib/polipus/http.rb
CHANGED
@@ -209,8 +209,6 @@ module Polipus
|
|
209
209
|
end
|
210
210
|
|
211
211
|
def refresh_connection(url)
|
212
|
-
proxy_host, proxy_port = proxy_host_port unless @opts[:proxy_host_port].nil?
|
213
|
-
|
214
212
|
if @opts[:logger] && proxy_host && proxy_port
|
215
213
|
@opts[:logger].debug { "Request #{url} using proxy: #{proxy_host}:#{proxy_port}" }
|
216
214
|
end
|
data/lib/polipus/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polipus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francesco Laurita
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redis-bloomfilter
|