scrubber-scrubyt 0.4.11 → 0.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -17,7 +17,7 @@ task "cleanup_readme" => ["rdoc"]
17
17
 
18
18
  gem_spec = Gem::Specification.new do |s|
19
19
  s.name = 'scrubyt'
20
- s.version = '0.4.1'
20
+ s.version = '0.4.12'
21
21
  s.summary = 'A powerful Web-scraping framework built on Mechanize and Hpricot (and FireWatir)'
22
22
  s.description = %{scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!}
23
23
  # Files containing Test::Unit test cases.
@@ -29,13 +29,14 @@ module Scrubyt
29
29
  #_doc_url_ - the url or file name to fetch
30
30
  def self.fetch(doc_url, *args)
31
31
  #Refactor this crap!!! with option_accessor stuff
32
-
33
32
  if args.size > 0
34
33
  mechanize_doc = args[0][:mechanize_doc]
35
34
  html = args[0][:html]
36
35
  resolve = args[0][:resolve]
37
36
  basic_auth = args[0][:basic_auth]
38
37
  parse_and_set_basic_auth(basic_auth) if basic_auth
38
+ proxy = args[0][:proxy]
39
+ parse_and_set_proxy(proxy) if proxy
39
40
  if html
40
41
  @@current_doc_protocol = 'string'
41
42
  mechanize_doc = page = WWW::Mechanize::Page.new(nil, {'content-type' => 'text/html'}, html)
@@ -122,6 +123,38 @@ module Scrubyt
122
123
  @@original_host_name ||= @@host_name
123
124
  end #end of method store_host_name
124
125
 
126
+ def self.parse_and_set_proxy(proxy)
127
+ proxy = proxy[:proxy]
128
+ if proxy.downcase == 'localhost'
129
+ @@host = 'localhost'
130
+ @@port = proxy.split(':').last
131
+ else
132
+ parts = proxy.split(':')
133
+ if (parts.size > 2)
134
+ user_pass = parts[0].split('@')
135
+ if (user.pass.size > 1)
136
+ @@proxy_user = user_pass[0]
137
+ @@proxy_pass = user_pass[1]
138
+ else
139
+ @@proxy_user = user_pass
140
+ end
141
+ @@host = parts[1]
142
+ @@port = parts[2]
143
+ else
144
+ @@host = parts[0]
145
+ @@port = parts[1]
146
+ end
147
+
148
+ if (@@host == nil || @@port == nil)# !@@host =~ /^http/)
149
+ puts "Invalid proxy specification..."
150
+ puts "neither host nor port can be nil!"
151
+ exit
152
+ end
153
+ end
154
+ Scrubyt.log :ACTION, "[ACTION] Setting proxy: host=<#{@@host}>, port=<#{@@port}>, username=<#{@@proxy_user}, password=<#{@@proxy_pass}>"
155
+ @@agent.set_proxy(@@host, @@port)
156
+ end
157
+
125
158
  def self.determine_protocol
126
159
  old_protocol = @@current_doc_protocol
127
160
  new_protocol = case @@current_doc_url
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrubber-scrubyt
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.11
4
+ version: 0.4.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Szinek