RubyGems - scrubber-scrubyt - Versions diffs - 0.4.25 → 0.4.28 - Mend

scrubber-scrubyt 0.4.25 → 0.4.28

Files changed (6) hide show

data/Rakefile +4 -4
data/lib/scrubyt.rb +5 -5
data/lib/scrubyt/core/navigation/agents/firewatir.rb +8 -2
data/lib/scrubyt/core/navigation/agents/mechanize.rb +32 -9
data/lib/scrubyt/core/navigation/navigation_actions.rb +4 -0
metadata +1 -1

data/Rakefile CHANGED Viewed

@@ -17,13 +17,13 @@ task "cleanup_readme" => ["rdoc"]
 gem_spec = Gem::Specification.new do |s|
   s.name = 'scrubyt'
-  s.version = '0.4.20'
+  s.version = '0.4.26'
   s.summary = 'A powerful Web-scraping framework built on Mechanize and Hpricot (and FireWatir)'
   s.description = %{scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!}
   # Files containing Test::Unit test cases.
   s.test_files = FileList['test/unittests/**/*']
   # List of other files to be included.
-  s.files = FileList['COPYING', 'README', 'CHANGELOG', 'Rakefile', 'lib/**/*.rb']
+  s.files = FileList['COPYING', 'README.rdoc', 'CHANGELOG', 'Rakefile', 'lib/**/*.rb']
   s.author = 'Peter Szinek'
   s.email = 'peter@rubyrailways.com'
   s.homepage = 'http://www.scrubyt.org'
@@ -37,9 +37,9 @@ end
 ###################################################
 Rake::RDocTask.new do |generate_rdoc|
-     files = ['lib/**/*.rb', 'README', 'CHANGELOG']
+     files = ['lib/**/*.rb', 'README.rdoc', 'CHANGELOG']
      generate_rdoc.rdoc_files.add(files)
-     generate_rdoc.main = "README" # page to start on
+     generate_rdoc.main = "README.rdoc" # page to start on
      generate_rdoc.title = "Scrubyt Documentation"
      generate_rdoc.template = "resources/allison/allison.rb"
      generate_rdoc.rdoc_dir = 'doc' # rdoc output folder

data/lib/scrubyt.rb CHANGED Viewed

@@ -30,11 +30,11 @@ require "#{File.dirname(__FILE__)}/scrubyt/output/scrubyt_result.rb"
 require "#{File.dirname(__FILE__)}/scrubyt/core/navigation/agents/mechanize.rb"
 # -- Making Firewatir optional --
-  if defined? Firewatir::Firefox
-    require "#{File.dirname(__FILE__)}/scrubyt/core/navigation/agents/firewatir.rb"
-  else
-    puts "The gem firewatir is not installed"
-  end
+begin
+  require "#{File.dirname(__FILE__)}/scrubyt/core/navigation/agents/firewatir.rb"
+rescue LoadError
+  puts "The gem firewatir is not installed, you'll be able to use Mechanize as the agent only"
+end
 # --
 require "#{File.dirname(__FILE__)}/scrubyt/core/navigation/navigation_actions.rb"

data/lib/scrubyt/core/navigation/agents/firewatir.rb CHANGED Viewed

@@ -1,5 +1,5 @@
-require 'rubygems'
 require 'firewatir'
 module Scrubyt
   ##
   #=<tt>Fetching pages (and related functionality)</tt>
@@ -113,10 +113,16 @@ module Scrubyt
             Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
           end
-          def self.click_by_xpath(xpath)
+          def self.click_by_xpath(xpath, wait_secs=0)
             Scrubyt.log :ACTION, "Clicking by XPath : %p" % xpath
             @@agent.element_by_xpath(xpath).click
+            Scrubyt.log :INFO, "sleeping #{wait_secs}..."
+            sleep(wait_secs) if wait_secs > 0
             @@agent.wait
+            # evaluate the results
+            extractor.evaluate_extractor
             @@current_doc_url = @@agent.url
             @@mechanize_doc = "<html>#{@@agent.html}</html>"
             @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(@@mechanize_doc))

data/lib/scrubyt/core/navigation/agents/mechanize.rb CHANGED Viewed

@@ -74,13 +74,13 @@ module Scrubyt
           def self.submit(index=nil, sleep_time=nil, type=nil)
             Scrubyt.log :ACTION, 'Submitting form...'
             if index == nil
-              result_page = @@agent.submit(@@current_form)
-              process_submit(@@current_form)
+              #result_page = @@agent.submit(@@current_form)
+              result_page = process_submit(@@current_form)
               #----- added by nickmerwin@gmail.com -----
             elsif index.class == String && !type.nil?
               button = @@current_form.buttons.detect{|b| b.name == index}
-              result_page = @@current_form.submit(button)
-              process_submit(@@current_form, button,type)
+              #result_page = @@current_form.submit(button)
+              result_page = process_submit(@@current_form, button,type)
               #-----------------------------------------
             else
               result_page = @@agent.submit(@@current_form, @@current_form.buttons[index])
@@ -215,7 +215,11 @@ module Scrubyt
           def self.fill_textfield(textfield_name, query_string, *unused)
             lookup_form_for_tag('input','textfield',textfield_name,query_string)
-            eval("@@current_form['#{textfield_name}'] = '#{query_string}'")
+            if(@@current_form)
+              eval("@@current_form['#{textfield_name}'] = '#{query_string}'")
+            else
+              Scrubyt.log :ERROR, "Couldn't find the form that contains this textfield. Please report a bug!"
+            end
           end
           ##
@@ -253,16 +257,29 @@ module Scrubyt
             else
               result_page = @@agent.submit(current_form, button)
             end
-            @@current_doc_url = result_page.uri.to_s
-            Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
-            fetch(@@current_doc_url, :mechanize_doc => result_page)
+            #@@current_doc_url = result_page.uri.to_s
+            #Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
+            #fetch(@@current_doc_url, :mechanize_doc => result_page)
+            result_page
           end
           def self.lookup_form_for_tag(tag, widget_name, name_attribute, query_string, index=0)
             Scrubyt.log :ACTION, "typing #{query_string} into the #{widget_name} named '#{name_attribute}'"
             widget = (FetchAction.get_hpricot_doc/"#{tag}[@name=#{name_attribute}]").map()[index]
             form_tag = Scrubyt::XPathUtils.traverse_up_until_name(widget, 'form')
-            find_form_based_on_tag(form_tag, ['name', 'id', 'action'])
+            puts "=" * 100
+            puts ">>#{Scrubyt::XPathUtils.generate_XPath(form_tag, nil, true)}<<"
+            puts "=" * 100
+            xp = Scrubyt::XPathUtils.generate_XPath(form_tag, nil, true)
+            form_element =  FetchAction.get_mechanize_doc/xp
+            FetchAction.get_mechanize_doc.forms.each do |f|
+              @@current_form = f
+              break if f.form_node == form_element
+            end
+            #find_form_based_on_tag(form_tag, ['name', 'id', 'action'])
           end
           def self.find_form_based_on_tag(tag, possible_attrs)
@@ -274,10 +291,16 @@ module Scrubyt
               lookup_attribute_value = tag.attributes[a]
               break if lookup_attribute_value != nil
             }
+            #puts lookup_attribute_name
+            #puts lookup_attribute_value
             i = 0
             loop do
               @@current_form = FetchAction.get_mechanize_doc.forms[i]
+              #p @@current_form.form_node
               return nil if @@current_form == nil
+              #puts  ">>#{@@current_form.form_node.attributes[lookup_attribute_name].to_s}<< :: >>#{lookup_attribute_value}<<"
               break if @@current_form.form_node.attributes[lookup_attribute_name].to_s == lookup_attribute_value
               i+= 1
             end

data/lib/scrubyt/core/navigation/navigation_actions.rb CHANGED Viewed

@@ -80,6 +80,10 @@ module Scrubyt
       FetchAction.click_by_xpath(xpath)
     end
+    def click_by_xpath_and_wait(xpath, secs)
+      FetchAction.click_by_xpath(xpath, secs)
+    end
     def click_image_map(index=0)
       FetchAction.click_image_map(index)
     end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: scrubber-scrubyt
 version: !ruby/object:Gem::Version
-  version: 0.4.25
+  version: 0.4.28
 platform: ruby
 authors:
 - Peter Szinek