pba_crawler 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,12 +12,16 @@ module PbaCrawler
12
12
  end
13
13
 
14
14
  def crawl_url(url)
15
+ @@proxy = @@proxies.fetch(@@cursor)
16
+
17
+ p "#{url}|#{@@proxy.address}|#{@@proxy.port}"
18
+
15
19
  agent = Mechanize.new { |a|
16
20
  a.html_parser = Nokogiri::HTML
17
21
  a.open_timeout = 20
18
22
  a.read_timeout = 20
19
23
  }
20
- @@proxy = @@proxies.fetch(@@cursor)
24
+
21
25
  agent.set_proxy(@@proxy.address,@@proxy.port)
22
26
  agent.user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; nl; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13'
23
27
 
@@ -40,9 +44,9 @@ module PbaCrawler
40
44
 
41
45
  def rescue_get_url(agent,url)
42
46
  if @@cursor + 1 < @@proxies.size
43
- p "#{agent.proxy_addr} #{agent.proxy_port}"
44
47
  @@cursor = @@cursor + 1
45
48
  @@proxy = @@proxies.fetch(@@cursor)
49
+ p "#{url}|#{@@proxy.address}|#{@@proxy.port}"
46
50
  agent.set_proxy(@@proxy.address,@@proxy.port)
47
51
  response = get_url(agent,url)
48
52
  end
@@ -1,3 +1,3 @@
1
1
  module PbaCrawler
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.8"
3
3
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: pba_crawler
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.7
5
+ version: 0.0.8
6
6
  platform: ruby
7
7
  authors:
8
8
  - Pierre BASILE
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-03-22 00:00:00 +01:00
13
+ date: 2011-04-19 00:00:00 +02:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency