polipus 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MGZhM2Q1OWE5ZTkxZWE3Y2JhMDJkNzZkMWRjMWZjYjI4YmRhODJmYw==
4
+ MjkyMTAxZWU0ODJmMzI5OTcwZjI0ZTFlNzZjOTYxNzY1MGUxOTJjZQ==
5
5
  data.tar.gz: !binary |-
6
- ODg4NTk5ZjhjNDM0ZjBhN2M2OWJlOWQyOTE4OWFiZmY1ZmQxMTZiYQ==
6
+ OTg4M2UzY2I2ODNkMWMxZDYwMzkxOGI5MmRhMWJkN2I0N2ViYTMyMg==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- NTdmYWZhMmMzYTQxOWY5OTBlNGE1ZWQzNWYwMThjMjAwNzQ4NmU4MDY4OWRi
10
- OWVkNzEwNmYwNTY3OGI4NmFkZGJiNzNhN2I3M2ZjMDUxNjAzMDAyZjEyYmZj
11
- ZGFkODliNDc5ZTYzYWE2ODNiMzdkMWFjZGExYjgzNTQyNmM4MDc=
9
+ YWE4ZjM0NDA1NWJiMGVkYzUyMTM5ZWJjN2I3YTU5YTI2NDIwNmI0ZTNkZTRl
10
+ NTg0MzQyOTgxMGYzZjBlZGVkMmE2ODJkZTA0ZTg3NzY0MTJjZTljYWEwN2Fm
11
+ YTc4MjI3NGViMmNlMjQzNWIxODVlMmNlNWJjNDFhMzE1MzQxMjk=
12
12
  data.tar.gz: !binary |-
13
- NGYzNDc2ODQ3OTBiMzE5N2M5YmM3ZDliN2IyYWRmNjBiZDI3ZTYyYjkxNjBk
14
- MDFhZjA0NDE5YjFkYzdiZTQxMjg0ZGEwODA3Nzk0YWZiNDRmY2I1MmE2ZWI4
15
- MjQxNTU4NjgwOTM1ZDdlYjM4Mjg0NjhjY2M2OGU5MDA1YjNjMjY=
13
+ YjRlMmVjYzA4MDVlYzAxMmM0NGYwMWFiYWMwNmYxNDg3MmMyOTY4YzQzZDQ2
14
+ OWZjNWUzYjJlMzk3MDg5NjE4ZmQxYTk5MWQ1Y2M0ZmQ5MjhkYjQxZjdkNzRk
15
+ ZjZjNzkyMjJmM2M0MzNiNDNkOTQxOWYyMDc2MDdhZTJlNWE1Y2M=
data/lib/polipus.rb CHANGED
@@ -106,9 +106,7 @@ module Polipus
106
106
 
107
107
  @storage = @options[:storage] ||= Storage.dev_null
108
108
 
109
- @http_pool = []
110
109
  @workers_pool = []
111
- @queues_pool = []
112
110
 
113
111
  @follow_links_like = []
114
112
  @skip_links_like = []
@@ -150,8 +148,8 @@ module Polipus
150
148
  @options[:workers].times do |worker_number|
151
149
  @workers_pool << Thread.new do
152
150
  @logger.debug { "Start worker #{worker_number}" }
153
- http = @http_pool[worker_number] ||= HTTP.new(@options)
154
- queue = @queues_pool[worker_number] ||= queue_factory
151
+ http = HTTP.new(@options)
152
+ queue = queue_factory
155
153
  queue.process(false, @options[:queue_timeout]) do |message|
156
154
 
157
155
  next if message.nil?
@@ -216,7 +214,7 @@ module Polipus
216
214
  if @options[:depth_limit] == false || @options[:depth_limit] > page.depth
217
215
  links_for(page).each do |url_to_visit|
218
216
  next unless should_be_visited?(url_to_visit)
219
- enqueue url_to_visit, page, queue
217
+ enqueue url_to_visit, page
220
218
  end
221
219
  else
222
220
  @logger.info { "[worker ##{worker_number}] Depth limit reached #{page.depth}" }
@@ -396,12 +394,12 @@ module Polipus
396
394
  end
397
395
 
398
396
  # The url is enqueued for a later visit
399
- def enqueue(url_to_visit, current_page, queue)
397
+ def enqueue(url_to_visit, current_page)
400
398
  page_to_visit = Page.new(url_to_visit.to_s, referer: current_page.url.to_s, depth: current_page.depth + 1)
401
- queue << page_to_visit.to_json
399
+ internal_queue << page_to_visit.to_json
402
400
  to_track = @options[:include_query_string_in_saved_page] ? url_to_visit.to_s : url_to_visit.to_s.gsub(/\?.*$/, '')
403
401
  url_tracker.visit to_track
404
- @logger.debug { "Added [#{url_to_visit}] to the queue" }
402
+ @logger.debug { "Added (#{url_to_visit}) to the queue" }
405
403
  end
406
404
 
407
405
  # It creates a redis client
@@ -444,6 +442,7 @@ module Polipus
444
442
  removed, restored = @overflow_manager.perform
445
443
  @logger.info { "Overflow Manager: items removed=#{removed}, items restored=#{restored}, items stored=#{queue_overflow_adapter.size}" }
446
444
  sleep @options[:queue_overflow_manager_check_time]
445
+ break if SignalHandler.terminated?
447
446
  end
448
447
 
449
448
  end
data/lib/polipus/http.rb CHANGED
@@ -209,8 +209,6 @@ module Polipus
209
209
  end
210
210
 
211
211
  def refresh_connection(url)
212
- proxy_host, proxy_port = proxy_host_port unless @opts[:proxy_host_port].nil?
213
-
214
212
  if @opts[:logger] && proxy_host && proxy_port
215
213
  @opts[:logger].debug { "Request #{url} using proxy: #{proxy_host}:#{proxy_port}" }
216
214
  end
@@ -1,5 +1,5 @@
1
1
  # encoding: UTF-8
2
2
  module Polipus
3
- VERSION = '0.3.3'
3
+ VERSION = '0.3.4'
4
4
  HOMEPAGE = 'https://github.com/taganaka/polipus'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polipus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francesco Laurita
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-27 00:00:00.000000000 Z
11
+ date: 2014-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redis-bloomfilter