scrubber-scrubyt 0.4.28 → 0.4.30

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -17,7 +17,7 @@ task "cleanup_readme" => ["rdoc"]
17
17
 
18
18
  gem_spec = Gem::Specification.new do |s|
19
19
  s.name = 'scrubyt'
20
- s.version = '0.4.26'
20
+ s.version = '0.4.30'
21
21
  s.summary = 'A powerful Web-scraping framework built on Mechanize and Hpricot (and FireWatir)'
22
22
  s.description = %{scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!}
23
23
  # Files containing Test::Unit test cases.
@@ -13,8 +13,8 @@ module Scrubyt
13
13
  module Firewatir
14
14
 
15
15
  def self.included(base)
16
- base.module_eval do
17
- @@agent = FireWatir::Firefox.new
16
+ base.module_eval do
17
+ @@agent = FireWatir::Firefox.new unless defined? @@agent
18
18
  @@current_doc_url = nil
19
19
  @@current_doc_protocol = nil
20
20
  @@base_dir = nil
@@ -60,6 +60,11 @@ module Scrubyt
60
60
  store_host_name(@@agent.url) # in case we're on a new host
61
61
  end
62
62
 
63
+ def self.use_current_page
64
+ @@mechanize_doc = "<html>#{@@agent.html}</html>"
65
+ @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
66
+ end
67
+
63
68
  def self.frame(attribute, value)
64
69
  if @@current_frame
65
70
  @@current_frame.frame(attribute, value)
@@ -111,7 +116,24 @@ module Scrubyt
111
116
  @@mechanize_doc = "<html>#{@@agent.html}</html>"
112
117
  @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
113
118
  Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
114
- end
119
+ end
120
+
121
+ def self.click_by_xpath_if_exists(xpath, wait_secs=0)
122
+ begin
123
+ result_page = @@agent.element_by_xpath(xpath).click
124
+ sleep(wait_secs) if wait_secs > 0
125
+ @@agent.wait
126
+
127
+ extractor.evaluate_extractor
128
+
129
+ @@current_doc_url = @@agent.url
130
+ @@mechanize_doc = "<html>#{@@agent.html}</html>"
131
+ @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
132
+ Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
133
+ rescue Watir::Exception::UnknownObjectException
134
+ Scrubyt.log :INFO, "XPath #{xpath} doesn't exist in this document"
135
+ end
136
+ end
115
137
 
116
138
  def self.click_by_xpath(xpath, wait_secs=0)
117
139
  Scrubyt.log :ACTION, "Clicking by XPath : %p" % xpath
@@ -56,6 +56,10 @@ module Scrubyt
56
56
  def fetch(*args)
57
57
  FetchAction.fetch(*args)
58
58
  end
59
+
60
+ def use_current_page
61
+ FetchAction.use_current_page
62
+ end
59
63
  ##
60
64
  #Submit the current form
61
65
  def submit(index=nil, type=nil)
@@ -76,6 +80,10 @@ module Scrubyt
76
80
  FetchAction.click_link(link_spec, 0, sleep_secs)
77
81
  end
78
82
 
83
+ def click_by_xpath_if_exists(xpath, sleep_secs=0)
84
+ FetchAction.click_by_xpath_if_exists(xpath, sleep_secs)
85
+ end
86
+
79
87
  def click_by_xpath(xpath)
80
88
  FetchAction.click_by_xpath(xpath)
81
89
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrubber-scrubyt
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.28
4
+ version: 0.4.30
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Szinek