RubyGems - scrubber-scrubyt - Versions diffs - 0.4.25 → 0.4.28 - Mend

scrubber-scrubyt 0.4.25 → 0.4.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/Rakefile +4 -4
data/lib/scrubyt.rb +5 -5
data/lib/scrubyt/core/navigation/agents/firewatir.rb +8 -2
data/lib/scrubyt/core/navigation/agents/mechanize.rb +32 -9
data/lib/scrubyt/core/navigation/navigation_actions.rb +4 -0
metadata +1 -1

data/Rakefile CHANGED Viewed

@@ -17,13 +17,13 @@ task "cleanup_readme" => ["rdoc"]
 gem_spec = Gem::Specification.new do |s|
   s.name = 'scrubyt'
-  s.version = '0.4.20'
+  s.version = '0.4.26'
   s.summary = 'A powerful Web-scraping framework built on Mechanize and Hpricot (and FireWatir)'
   s.description = %{scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!}
   # Files containing Test::Unit test cases.
   s.test_files = FileList['test/unittests/**/*']
   # List of other files to be included.
-  s.files = FileList['COPYING', 'README', 'CHANGELOG', 'Rakefile', 'lib/**/*.rb']
+  s.files = FileList['COPYING', 'README.rdoc', 'CHANGELOG', 'Rakefile', 'lib/**/*.rb']
   s.author = 'Peter Szinek'
   s.email = 'peter@rubyrailways.com'
   s.homepage = 'http://www.scrubyt.org'
@@ -37,9 +37,9 @@ end
 ###################################################
 Rake::RDocTask.new do |generate_rdoc|
-     files = ['lib/**/*.rb', 'README', 'CHANGELOG']
+     files = ['lib/**/*.rb', 'README.rdoc', 'CHANGELOG']
      generate_rdoc.rdoc_files.add(files)
-     generate_rdoc.main = "README" # page to start on
+     generate_rdoc.main = "README.rdoc" # page to start on
      generate_rdoc.title = "Scrubyt Documentation"
      generate_rdoc.template = "resources/allison/allison.rb"
      generate_rdoc.rdoc_dir = 'doc' # rdoc output folder

data/lib/scrubyt.rb CHANGED Viewed

@@ -30,11 +30,11 @@ require "#{File.dirname(__FILE__)}/scrubyt/output/scrubyt_result.rb"
 require "#{File.dirname(__FILE__)}/scrubyt/core/navigation/agents/mechanize.rb"
 # -- Making Firewatir optional --
-  if defined? Firewatir::Firefox
-    require "#{File.dirname(__FILE__)}/scrubyt/core/navigation/agents/firewatir.rb"
-  else
-    puts "The gem firewatir is not installed"
-  end
+begin
+  require "#{File.dirname(__FILE__)}/scrubyt/core/navigation/agents/firewatir.rb"
+rescue LoadError
+  puts "The gem firewatir is not installed, you'll be able to use Mechanize as the agent only"
+end
 # --
 require "#{File.dirname(__FILE__)}/scrubyt/core/navigation/navigation_actions.rb"

data/lib/scrubyt/core/navigation/agents/firewatir.rb CHANGED Viewed

@@ -1,5 +1,5 @@
-require 'rubygems'
 require 'firewatir'
 module Scrubyt
   ##
   #=<tt>Fetching pages (and related functionality)</tt>
@@ -113,10 +113,16 @@ module Scrubyt
             Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
           end
-          def self.click_by_xpath(xpath)
+          def self.click_by_xpath(xpath, wait_secs=0)
             Scrubyt.log :ACTION, "Clicking by XPath : %p" % xpath
             @@agent.element_by_xpath(xpath).click
+            Scrubyt.log :INFO, "sleeping #{wait_secs}..."
+            sleep(wait_secs) if wait_secs > 0
             @@agent.wait
+            # evaluate the results
+            extractor.evaluate_extractor
             @@current_doc_url = @@agent.url
             @@mechanize_doc = "<html>#{@@agent.html}</html>"
             @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))

data/lib/scrubyt/core/navigation/agents/mechanize.rb CHANGED Viewed

@@ -74,13 +74,13 @@ module Scrubyt
           def self.submit(index=nil, sleep_time=nil, type=nil)
             Scrubyt.log :ACTION, 'Submitting form...'
             if index == nil
-              result_page = @@agent.submit(@@current_form)
-              process_submit(@@current_form)
+              #result_page = @@agent.submit(@@current_form)
+              result_page = process_submit(@@current_form)
               #----- added by nickmerwin@gmail.com -----
             elsif index.class == String && !type.nil?
               button = @@current_form.buttons.detect{|b| b.name == index}
-              result_page = @@current_form.submit(button)
-              process_submit(@@current_form, button,type)
+              #result_page = @@current_form.submit(button)
+              result_page = process_submit(@@current_form, button,type)
               #-----------------------------------------
             else
               result_page = @@agent.submit(@@current_form, @@current_form.buttons[index])
@@ -215,7 +215,11 @@ module Scrubyt
           def self.fill_textfield(textfield_name, query_string, *unused)
             lookup_form_for_tag('input','textfield',textfield_name,query_string)
-            eval("@@current_form['#{textfield_name}'] = '#{query_string}'")
+            if(@@current_form)
+              eval("@@current_form['#{textfield_name}'] = '#{query_string}'")
+            else
+              Scrubyt.log :ERROR, "Couldn't find the form that contains this textfield. Please report a bug!"
+            end
           end
           ##
@@ -253,16 +257,29 @@ module Scrubyt
             else
               result_page = @@agent.submit(current_form, button)
             end
-            @@current_doc_url = result_page.uri.to_s
-            Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
-            fetch(@@current_doc_url, :mechanize_doc => result_page)
+            #@@current_doc_url = result_page.uri.to_s
+            #Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
+            #fetch(@@current_doc_url, :mechanize_doc => result_page)
+            result_page
           end
           def self.lookup_form_for_tag(tag, widget_name, name_attribute, query_string, index=0)
             Scrubyt.log :ACTION, "typing #{query_string} into the #{widget_name} named '#{name_attribute}'"
             widget = (FetchAction.get_hpricot_doc/"#{tag}[@name=#{name_attribute}]").map()[index]
             form_tag = Scrubyt::XPathUtils.traverse_up_until_name(widget, 'form')
-            find_form_based_on_tag(form_tag, ['name', 'id', 'action'])
+            puts "=" * 100
+            puts ">>#{Scrubyt::XPathUtils.generate_XPath(form_tag, nil, true)}<<"
+            puts "=" * 100
+            xp = Scrubyt::XPathUtils.generate_XPath(form_tag, nil, true)
+            form_element =  FetchAction.get_mechanize_doc/xp
+            FetchAction.get_mechanize_doc.forms.each do |f|
+              @@current_form = f
+              break if f.form_node == form_element
+            end
+            #find_form_based_on_tag(form_tag, ['name', 'id', 'action'])
           end
           def self.find_form_based_on_tag(tag, possible_attrs)
@@ -274,10 +291,16 @@ module Scrubyt
               lookup_attribute_value = tag.attributes[a]
               break if lookup_attribute_value != nil
             }
+            #puts lookup_attribute_name
+            #puts lookup_attribute_value
             i = 0
             loop do
               @@current_form = FetchAction.get_mechanize_doc.forms[i]
+              #p @@current_form.form_node
               return nil if @@current_form == nil
+              #puts  ">>#{@@current_form.form_node.attributes[lookup_attribute_name].to_s}<< :: >>#{lookup_attribute_value}<<"
               break if @@current_form.form_node.attributes[lookup_attribute_name].to_s == lookup_attribute_value
               i+= 1
             end

data/lib/scrubyt/core/navigation/navigation_actions.rb CHANGED Viewed

@@ -80,6 +80,10 @@ module Scrubyt
       FetchAction.click_by_xpath(xpath)
     end
+    def click_by_xpath_and_wait(xpath, secs)
+      FetchAction.click_by_xpath(xpath, secs)
+    end
     def click_image_map(index=0)
       FetchAction.click_image_map(index)
     end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: scrubber-scrubyt
 version: !ruby/object:Gem::Version
-  version: 0.4.25
+  version: 0.4.28
 platform: ruby
 authors:
 - Peter Szinek