polipus 0.3.3 → 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MGZhM2Q1OWE5ZTkxZWE3Y2JhMDJkNzZkMWRjMWZjYjI4YmRhODJmYw==
4
+ MjkyMTAxZWU0ODJmMzI5OTcwZjI0ZTFlNzZjOTYxNzY1MGUxOTJjZQ==
5
5
  data.tar.gz: !binary |-
6
- ODg4NTk5ZjhjNDM0ZjBhN2M2OWJlOWQyOTE4OWFiZmY1ZmQxMTZiYQ==
6
+ OTg4M2UzY2I2ODNkMWMxZDYwMzkxOGI5MmRhMWJkN2I0N2ViYTMyMg==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- NTdmYWZhMmMzYTQxOWY5OTBlNGE1ZWQzNWYwMThjMjAwNzQ4NmU4MDY4OWRi
10
- OWVkNzEwNmYwNTY3OGI4NmFkZGJiNzNhN2I3M2ZjMDUxNjAzMDAyZjEyYmZj
11
- ZGFkODliNDc5ZTYzYWE2ODNiMzdkMWFjZGExYjgzNTQyNmM4MDc=
9
+ YWE4ZjM0NDA1NWJiMGVkYzUyMTM5ZWJjN2I3YTU5YTI2NDIwNmI0ZTNkZTRl
10
+ NTg0MzQyOTgxMGYzZjBlZGVkMmE2ODJkZTA0ZTg3NzY0MTJjZTljYWEwN2Fm
11
+ YTc4MjI3NGViMmNlMjQzNWIxODVlMmNlNWJjNDFhMzE1MzQxMjk=
12
12
  data.tar.gz: !binary |-
13
- NGYzNDc2ODQ3OTBiMzE5N2M5YmM3ZDliN2IyYWRmNjBiZDI3ZTYyYjkxNjBk
14
- MDFhZjA0NDE5YjFkYzdiZTQxMjg0ZGEwODA3Nzk0YWZiNDRmY2I1MmE2ZWI4
15
- MjQxNTU4NjgwOTM1ZDdlYjM4Mjg0NjhjY2M2OGU5MDA1YjNjMjY=
13
+ YjRlMmVjYzA4MDVlYzAxMmM0NGYwMWFiYWMwNmYxNDg3MmMyOTY4YzQzZDQ2
14
+ OWZjNWUzYjJlMzk3MDg5NjE4ZmQxYTk5MWQ1Y2M0ZmQ5MjhkYjQxZjdkNzRk
15
+ ZjZjNzkyMjJmM2M0MzNiNDNkOTQxOWYyMDc2MDdhZTJlNWE1Y2M=
data/lib/polipus.rb CHANGED
@@ -106,9 +106,7 @@ module Polipus
106
106
 
107
107
  @storage = @options[:storage] ||= Storage.dev_null
108
108
 
109
- @http_pool = []
110
109
  @workers_pool = []
111
- @queues_pool = []
112
110
 
113
111
  @follow_links_like = []
114
112
  @skip_links_like = []
@@ -150,8 +148,8 @@ module Polipus
150
148
  @options[:workers].times do |worker_number|
151
149
  @workers_pool << Thread.new do
152
150
  @logger.debug { "Start worker #{worker_number}" }
153
- http = @http_pool[worker_number] ||= HTTP.new(@options)
154
- queue = @queues_pool[worker_number] ||= queue_factory
151
+ http = HTTP.new(@options)
152
+ queue = queue_factory
155
153
  queue.process(false, @options[:queue_timeout]) do |message|
156
154
 
157
155
  next if message.nil?
@@ -216,7 +214,7 @@ module Polipus
216
214
  if @options[:depth_limit] == false || @options[:depth_limit] > page.depth
217
215
  links_for(page).each do |url_to_visit|
218
216
  next unless should_be_visited?(url_to_visit)
219
- enqueue url_to_visit, page, queue
217
+ enqueue url_to_visit, page
220
218
  end
221
219
  else
222
220
  @logger.info { "[worker ##{worker_number}] Depth limit reached #{page.depth}" }
@@ -396,12 +394,12 @@ module Polipus
396
394
  end
397
395
 
398
396
  # The url is enqueued for a later visit
399
- def enqueue(url_to_visit, current_page, queue)
397
+ def enqueue(url_to_visit, current_page)
400
398
  page_to_visit = Page.new(url_to_visit.to_s, referer: current_page.url.to_s, depth: current_page.depth + 1)
401
- queue << page_to_visit.to_json
399
+ internal_queue << page_to_visit.to_json
402
400
  to_track = @options[:include_query_string_in_saved_page] ? url_to_visit.to_s : url_to_visit.to_s.gsub(/\?.*$/, '')
403
401
  url_tracker.visit to_track
404
- @logger.debug { "Added [#{url_to_visit}] to the queue" }
402
+ @logger.debug { "Added (#{url_to_visit}) to the queue" }
405
403
  end
406
404
 
407
405
  # It creates a redis client
@@ -444,6 +442,7 @@ module Polipus
444
442
  removed, restored = @overflow_manager.perform
445
443
  @logger.info { "Overflow Manager: items removed=#{removed}, items restored=#{restored}, items stored=#{queue_overflow_adapter.size}" }
446
444
  sleep @options[:queue_overflow_manager_check_time]
445
+ break if SignalHandler.terminated?
447
446
  end
448
447
 
449
448
  end
data/lib/polipus/http.rb CHANGED
@@ -209,8 +209,6 @@ module Polipus
209
209
  end
210
210
 
211
211
  def refresh_connection(url)
212
- proxy_host, proxy_port = proxy_host_port unless @opts[:proxy_host_port].nil?
213
-
214
212
  if @opts[:logger] && proxy_host && proxy_port
215
213
  @opts[:logger].debug { "Request #{url} using proxy: #{proxy_host}:#{proxy_port}" }
216
214
  end
@@ -1,5 +1,5 @@
1
1
  # encoding: UTF-8
2
2
  module Polipus
3
- VERSION = '0.3.3'
3
+ VERSION = '0.3.4'
4
4
  HOMEPAGE = 'https://github.com/taganaka/polipus'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polipus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francesco Laurita
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-27 00:00:00.000000000 Z
11
+ date: 2014-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redis-bloomfilter