scrubber-scrubyt 0.4.11 → 0.4.12

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -17,7 +17,7 @@ task "cleanup_readme" => ["rdoc"]
17
17
 
18
18
  gem_spec = Gem::Specification.new do |s|
19
19
  s.name = 'scrubyt'
20
- s.version = '0.4.1'
20
+ s.version = '0.4.12'
21
21
  s.summary = 'A powerful Web-scraping framework built on Mechanize and Hpricot (and FireWatir)'
22
22
  s.description = %{scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!}
23
23
  # Files containing Test::Unit test cases.
@@ -29,13 +29,14 @@ module Scrubyt
29
29
  #_doc_url_ - the url or file name to fetch
30
30
  def self.fetch(doc_url, *args)
31
31
  #Refactor this crap!!! with option_accessor stuff
32
-
33
32
  if args.size > 0
34
33
  mechanize_doc = args[0][:mechanize_doc]
35
34
  html = args[0][:html]
36
35
  resolve = args[0][:resolve]
37
36
  basic_auth = args[0][:basic_auth]
38
37
  parse_and_set_basic_auth(basic_auth) if basic_auth
38
+ proxy = args[0][:proxy]
39
+ parse_and_set_proxy(proxy) if proxy
39
40
  if html
40
41
  @@current_doc_protocol = 'string'
41
42
  mechanize_doc = page = WWW::Mechanize::Page.new(nil, {'content-type' => 'text/html'}, html)
@@ -122,6 +123,38 @@ module Scrubyt
122
123
  @@original_host_name ||= @@host_name
123
124
  end #end of method store_host_name
124
125
 
126
+ def self.parse_and_set_proxy(proxy)
127
+ proxy = proxy[:proxy]
128
+ if proxy.downcase == 'localhost'
129
+ @@host = 'localhost'
130
+ @@port = proxy.split(':').last
131
+ else
132
+ parts = proxy.split(':')
133
+ if (parts.size > 2)
134
+ user_pass = parts[0].split('@')
135
+ if (user.pass.size > 1)
136
+ @@proxy_user = user_pass[0]
137
+ @@proxy_pass = user_pass[1]
138
+ else
139
+ @@proxy_user = user_pass
140
+ end
141
+ @@host = parts[1]
142
+ @@port = parts[2]
143
+ else
144
+ @@host = parts[0]
145
+ @@port = parts[1]
146
+ end
147
+
148
+ if (@@host == nil || @@port == nil)# !@@host =~ /^http/)
149
+ puts "Invalid proxy specification..."
150
+ puts "neither host nor port can be nil!"
151
+ exit
152
+ end
153
+ end
154
+ Scrubyt.log :ACTION, "[ACTION] Setting proxy: host=<#{@@host}>, port=<#{@@port}>, username=<#{@@proxy_user}, password=<#{@@proxy_pass}>"
155
+ @@agent.set_proxy(@@host, @@port)
156
+ end
157
+
125
158
  def self.determine_protocol
126
159
  old_protocol = @@current_doc_protocol
127
160
  new_protocol = case @@current_doc_url
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrubber-scrubyt
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.11
4
+ version: 0.4.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Szinek