RubyGems - scrubber-scrubyt - Versions diffs - 0.4.28 → 0.4.30 - Mend

scrubber-scrubyt 0.4.28 → 0.4.30

Files changed (4) hide show

data/Rakefile +1 -1
data/lib/scrubyt/core/navigation/agents/firewatir.rb +25 -3
data/lib/scrubyt/core/navigation/navigation_actions.rb +8 -0
metadata +1 -1

data/Rakefile CHANGED Viewed

@@ -17,7 +17,7 @@ task "cleanup_readme" => ["rdoc"]
 gem_spec = Gem::Specification.new do |s|
   s.name = 'scrubyt'
-  s.version = '0.4.26'
+  s.version = '0.4.30'
   s.summary = 'A powerful Web-scraping framework built on Mechanize and Hpricot (and FireWatir)'
   s.description = %{scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!}
   # Files containing Test::Unit test cases.

data/lib/scrubyt/core/navigation/agents/firewatir.rb CHANGED Viewed

@@ -13,8 +13,8 @@ module Scrubyt
     module Firewatir
       def self.included(base)
-        base.module_eval do
-          @@agent = FireWatir::Firefox.new
+        base.module_eval do
+          @@agent = FireWatir::Firefox.new unless defined? @@agent
           @@current_doc_url = nil
           @@current_doc_protocol = nil
           @@base_dir = nil
@@ -60,6 +60,11 @@ module Scrubyt
             store_host_name(@@agent.url)   # in case we're on a new host
           end
+          def self.use_current_page
+            @@mechanize_doc = "<html>#{@@agent.html}</html>"
+            @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
+          end
           def self.frame(attribute, value)
             if @@current_frame
               @@current_frame.frame(attribute, value)
@@ -111,7 +116,24 @@ module Scrubyt
             @@mechanize_doc = "<html>#{@@agent.html}</html>"
             @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
             Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
-          end
+          end
+          def self.click_by_xpath_if_exists(xpath, wait_secs=0)
+            begin
+              result_page = @@agent.element_by_xpath(xpath).click
+              sleep(wait_secs) if wait_secs > 0
+              @@agent.wait
+              extractor.evaluate_extractor
+              @@current_doc_url = @@agent.url
+              @@mechanize_doc = "<html>#{@@agent.html}</html>"
+              @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))
+              Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
+            rescue Watir::Exception::UnknownObjectException
+              Scrubyt.log :INFO, "XPath #{xpath} doesn't exist in this document"
+            end
+          end
           def self.click_by_xpath(xpath, wait_secs=0)
             Scrubyt.log :ACTION, "Clicking by XPath : %p" % xpath

data/lib/scrubyt/core/navigation/navigation_actions.rb CHANGED Viewed

@@ -56,6 +56,10 @@ module Scrubyt
     def fetch(*args)
       FetchAction.fetch(*args)
     end
+    def use_current_page
+      FetchAction.use_current_page
+    end
     ##
     #Submit the current form
     def submit(index=nil, type=nil)
@@ -76,6 +80,10 @@ module Scrubyt
       FetchAction.click_link(link_spec, 0, sleep_secs)
     end
+    def click_by_xpath_if_exists(xpath, sleep_secs=0)
+      FetchAction.click_by_xpath_if_exists(xpath, sleep_secs)
+    end
     def click_by_xpath(xpath)
       FetchAction.click_by_xpath(xpath)
     end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: scrubber-scrubyt
 version: !ruby/object:Gem::Version
-  version: 0.4.28
+  version: 0.4.30
 platform: ruby
 authors:
 - Peter Szinek