scrubyt 0.2.8 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. data/CHANGELOG +32 -2
  2. data/Rakefile +25 -20
  3. data/lib/scrubyt.rb +24 -5
  4. data/lib/scrubyt/core/navigation/fetch_action.rb +76 -42
  5. data/lib/scrubyt/core/navigation/navigation_actions.rb +24 -6
  6. data/lib/scrubyt/core/scraping/filters/base_filter.rb +5 -5
  7. data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +2 -2
  8. data/lib/scrubyt/core/scraping/filters/download_filter.rb +2 -1
  9. data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +7 -2
  10. data/lib/scrubyt/core/scraping/filters/tree_filter.rb +37 -12
  11. data/lib/scrubyt/core/scraping/pattern.rb +82 -90
  12. data/lib/scrubyt/core/scraping/pre_filter_document.rb +2 -1
  13. data/lib/scrubyt/core/shared/evaluation_context.rb +14 -37
  14. data/lib/scrubyt/core/shared/extractor.rb +55 -54
  15. data/lib/scrubyt/logging.rb +16 -0
  16. data/lib/scrubyt/output/export.rb +1 -1
  17. data/lib/scrubyt/output/post_processor.rb +6 -5
  18. data/lib/scrubyt/output/result.rb +1 -0
  19. data/lib/scrubyt/output/result_dumper.rb +4 -3
  20. data/lib/scrubyt/output/result_node.rb +73 -0
  21. data/lib/scrubyt/output/scrubyt_result.rb +28 -0
  22. data/lib/scrubyt/utils/ruby_extensions.rb +8 -0
  23. data/lib/scrubyt/utils/simple_example_lookup.rb +14 -1
  24. data/lib/scrubyt/utils/xpathutils.rb +11 -0
  25. metadata +7 -12
  26. data/test/unittests/constraint_test.rb +0 -107
  27. data/test/unittests/extractor_test.rb +0 -91
  28. data/test/unittests/filter_test.rb +0 -79
  29. data/test/unittests/input/constraint_test.html +0 -55
  30. data/test/unittests/input/test.html +0 -39
  31. data/test/unittests/pattern_test.rb +0 -27
  32. data/test/unittests/simple_example_lookup_test.rb +0 -68
  33. data/test/unittests/xpathutils_test.rb +0 -152
data/CHANGELOG CHANGED
@@ -1,7 +1,38 @@
1
1
  = scRUBYt! Changelog
2
2
 
3
+ == 0.3.0
4
+ === 21st May, 2007
5
+
6
+ =<tt>changes:</tt>
7
+
8
+ [NEW] complete rewrite of the output system, creating
9
+ a solid foundation for more robust output functions
10
+ (credit: Neelance)
11
+ [NEW] logging - no annoying puts messages anymore! (credit: Tim Fletcher)
12
+ [NEW] can index an example - e.g.
13
+ link 'more[5]'
14
+ semantics: give me the 6th element with the text 'link'
15
+ [NEW] can use XPath checking an attribute value, like "//div[@id='content']"
16
+ [NEW] default values for missing elements (first version was done in 0.2.8
17
+ but it did not work for all cases)
18
+ [NEW] possibility to click button with it's text (instead of it's index)
19
+ (credit: Nick Merwin)
20
+ [NEW] can click on image buttons (by specifying the name of the button)
21
+ [NEW] possibility to extract an URL with one step, like so:
22
+ link 'The Difference/@href'
23
+ i.e. give me the href attribute of the element matched by the example 'The Difference'
24
+ [NEW] new way to match an element of the page:
25
+ div 'div[The Difference]'
26
+ means 'return the div which contains the string "The Difference"'. This is
27
+ useful if the XPath of the element is non-constant across the same site (e.g.
28
+ sometimes a banner or add is added, sometimes not etc.)
29
+ [FIX] Replacing \240 (&nbsp;) with space in the preprocessing phase automatically
30
+ [FIX] Fixed: correctly downloading image if the src
31
+ attribute had a leading space, as in
32
+ <img src=' /files/downloads/images/image.jpg'/>
33
+
3
34
  == 0.2.7
4
- === 15th April, 2007
35
+ === 12th April, 2007
5
36
 
6
37
  =<tt>changes:</tt>
7
38
 
@@ -9,7 +40,6 @@
9
40
  parent pattern
10
41
  [NEW] checking checkboxes
11
42
  [NEW] basic authentication support
12
- [NEW] default values for missing elements
13
43
  [NEW] possibility to resolve relative paths against a custom url
14
44
  [NEW] first simple version of to_csv and to_hash
15
45
  [NEW] complete rewrite of the exporting system (Credit: Neelance)
data/Rakefile CHANGED
@@ -7,8 +7,7 @@ require 'rake/packagetask'
7
7
  # Dependencies
8
8
  ###################################################
9
9
 
10
- task "default" => ["test"]
11
- task "fulltest" => ["test", "blackbox"]
10
+ task "default" => ["test_all"]
12
11
  task "generate_rdoc" => ["cleanup_readme"]
13
12
  task "cleanup_readme" => ["rdoc"]
14
13
 
@@ -16,22 +15,24 @@ task "cleanup_readme" => ["rdoc"]
16
15
  # Gem specification
17
16
  ###################################################
18
17
 
19
- gem_spec = Gem::Specification.new do |s|
20
- s.name = 'scrubyt'
21
- s.version = '0.2.8'
18
+ gem_spec = Gem::Specification.new do |s|
19
+ s.name = 'scrubyt'
20
+ s.version = '0.3.0'
22
21
  s.summary = 'A powerful Web-scraping framework'
23
- s.description = %{scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!}
24
- # Files containing Test::Unit test cases.
25
- s.test_files = FileList['test/unittests/**/*']
26
- # List of other files to be included.
22
+ s.description = %{scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!}
23
+ # Files containing Test::Unit test cases.
24
+ s.test_files = FileList['test/unittests/**/*']
25
+ # List of other files to be included.
27
26
  s.files = FileList['COPYING', 'README', 'CHANGELOG', 'Rakefile', 'lib/**/*.rb']
28
27
  s.author = 'Peter Szinek'
29
- s.email = 'peter@rubyrailways.com'
28
+ s.email = 'peter@rubyrailways.com'
30
29
  s.homepage = 'http://www.scrubyt.org'
31
30
  s.add_dependency('hpricot', '>= 0.5')
32
- s.add_dependency('mechanize', '>= 0.6.3')
31
+ s.add_dependency('mechanize', '>= 0.6.3')
32
+ #s.add_dependency('parsetree', '>= 1.7.0')
33
+ #s.add_dependency('ruby2ruby', '>= 1.1.5')
33
34
  s.has_rdoc = 'true'
34
- end
35
+ end
35
36
 
36
37
  ###################################################
37
38
  # Tasks
@@ -47,12 +48,16 @@ Rake::RDocTask.new do |generate_rdoc|
47
48
  generate_rdoc.options << '--line-numbers' << '--inline-source'
48
49
  end
49
50
 
50
- Rake::TestTask.new do |test|
51
- test.pattern = 'test/unittests/*_test.rb'
52
- end
51
+ Rake::TestTask.new(:test_all) do |task|
52
+ task.pattern = 'test/*_test.rb'
53
+ end
54
+
55
+ Rake::TestTask.new(:test_blackbox) do |task|
56
+ task.test_files = ['test/blackbox_test.rb']
57
+ end
53
58
 
54
- task "blackbox" do
55
- ruby "test/blackbox/run_blackbox_tests.rb"
59
+ Rake::TestTask.new(:test_non_blackbox) do |task|
60
+ task.test_files = FileList['test/*_test.rb'] - ['test/blackbox_test.rb']
56
61
  end
57
62
 
58
63
  task "cleanup_readme" do
@@ -77,12 +82,12 @@ end
77
82
  task "generate_rdoc" do
78
83
  end
79
84
 
80
- Rake::GemPackageTask.new(gem_spec) do |pkg|
85
+ Rake::GemPackageTask.new(gem_spec) do |pkg|
81
86
  pkg.need_zip = false
82
- pkg.need_tar = false
87
+ pkg.need_tar = false
83
88
  end
84
89
 
85
- Rake::PackageTask.new('scrubyt-examples', '0.2.8') do |pkg|
90
+ Rake::PackageTask.new('scrubyt-examples', '0.3.0') do |pkg|
86
91
  pkg.need_zip = true
87
92
  pkg.need_tar = true
88
93
  pkg.package_files.include("examples/**/*")
@@ -1,14 +1,34 @@
1
1
  #ruby core
2
2
  require 'open-uri'
3
+ require 'erb'
3
4
 
4
5
  #gems
5
6
  require 'rubygems'
6
7
  require 'mechanize'
7
8
  require 'hpricot'
8
- require 'parse_tree'
9
+ require 'parse_tree_reloaded'
10
+
11
+ #little hack to avoid that ruby2ruby tries to load the original parse_tree
12
+ if Gem
13
+ module Gem
14
+ class << self
15
+ alias_method :activate_orig, :activate
16
+ def activate(gem, autorequire, *version_requirements)
17
+ activate_orig(gem, autorequire, *version_requirements) unless gem.is_a?(Gem::Dependency) && gem.name == 'ParseTree'
18
+ end
19
+ end
20
+ end
21
+ end
22
+ module Kernel
23
+ alias_method :require_orig, :require
24
+ def require(path)
25
+ require_orig(path) unless path == 'parse_tree'
26
+ end
27
+ end
9
28
  require 'ruby2ruby'
10
29
 
11
30
  #scrubyt
31
+ require 'scrubyt/logging'
12
32
  require 'scrubyt/utils/ruby_extensions.rb'
13
33
  require 'scrubyt/utils/xpathutils.rb'
14
34
  require 'scrubyt/utils/shared_utils.rb'
@@ -19,6 +39,8 @@ require 'scrubyt/core/scraping/constraint.rb'
19
39
  require 'scrubyt/core/scraping/result_indexer.rb'
20
40
  require 'scrubyt/core/scraping/pre_filter_document.rb'
21
41
  require 'scrubyt/core/scraping/compound_example.rb'
42
+ require 'scrubyt/output/result_node.rb'
43
+ require 'scrubyt/output/scrubyt_result.rb'
22
44
  require 'scrubyt/output/export.rb'
23
45
  require 'scrubyt/core/shared/extractor.rb'
24
46
  require 'scrubyt/core/scraping/filters/base_filter.rb'
@@ -29,10 +51,7 @@ require 'scrubyt/core/scraping/filters/html_subtree_filter.rb'
29
51
  require 'scrubyt/core/scraping/filters/regexp_filter.rb'
30
52
  require 'scrubyt/core/scraping/filters/tree_filter.rb'
31
53
  require 'scrubyt/core/scraping/pattern.rb'
32
- require 'scrubyt/output/result_dumper.rb'
33
- require 'scrubyt/output/result.rb'
34
- require 'scrubyt/output/post_processor.rb'
35
54
  require 'scrubyt/core/navigation/navigation_actions.rb'
36
55
  require 'scrubyt/core/navigation/fetch_action.rb'
37
56
  require 'scrubyt/core/shared/evaluation_context.rb'
38
- require 'scrubyt/core/shared/u_r_i_builder.rb'
57
+ require 'scrubyt/core/shared/u_r_i_builder.rb'
@@ -8,14 +8,13 @@ module Scrubyt
8
8
  #which is loading a document (even by submitting a form or clicking a link)
9
9
  #and related things like setting a proxy etc. you should find it here.
10
10
  class FetchAction
11
- def initialize
12
- @@current_doc_url = nil
13
- @@current_doc_protocol = nil
14
- @@base_dir = nil
15
- @@host_name = nil
16
- @@agent = WWW::Mechanize.new
17
- @@history = []
18
- end
11
+
12
+ @@current_doc_url = nil
13
+ @@current_doc_protocol = nil
14
+ @@base_dir = nil
15
+ @@host_name = nil
16
+ @@agent = WWW::Mechanize.new
17
+ @@history = []
19
18
 
20
19
  ##
21
20
  #Action to fetch a document (either a file or a http address)
@@ -25,29 +24,38 @@ module Scrubyt
25
24
  #_doc_url_ - the url or file name to fetch
26
25
  def self.fetch(doc_url, *args)
27
26
  #Refactor this crap!!! with option_accessor stuff
28
- proxy = args[0][:proxy]
29
- mechanize_doc = args[0][:mechanize_doc]
30
- resolve = args[0][:resolve] || :full
31
- basic_auth = args[0][:basic_auth]
32
- user_agent = args[0][:user_agent] || "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
33
- #Refactor this whole stuff as well!!! It looks awful...
34
- parse_and_set_proxy(proxy) if proxy
35
- set_user_agent(user_agent)
36
- parse_and_set_basic_auth(basic_auth) if basic_auth
37
- if !mechanize_doc
38
- @@current_doc_url = doc_url
39
- @@current_doc_protocol = determine_protocol
27
+
28
+ if args.size > 0
29
+ proxy = args[0][:proxy]
30
+ mechanize_doc = args[0][:mechanize_doc]
31
+ resolve = args[0][:resolve]
32
+ basic_auth = args[0][:basic_auth]
33
+ user_agent = args[0][:user_agent] || "Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)"
34
+ #Refactor this whole stuff as well!!! It looks awful...
35
+ parse_and_set_proxy(proxy) if proxy
36
+ set_user_agent(user_agent)
37
+ parse_and_set_basic_auth(basic_auth) if basic_auth
38
+ else
39
+ mechanize_doc = nil
40
+ resolve = :full
41
+ end
42
+
43
+ @@current_doc_url = doc_url
44
+ @@current_doc_protocol = determine_protocol
45
+
46
+ if mechanize_doc.nil? && @@current_doc_protocol != 'file'
40
47
  handle_relative_path(doc_url)
41
- handle_relative_url(doc_url,resolve)
42
- puts "[ACTION] fetching document: #{@@current_doc_url}"
43
- if @@current_doc_protocol != 'file'
48
+ handle_relative_url(doc_url, resolve)
49
+
50
+ Scrubyt.log :ACTION, "fetching document: #{@@current_doc_url}"
51
+
52
+ unless 'file' == @@current_doc_protocol
44
53
  @@mechanize_doc = @@agent.get(@@current_doc_url)
45
54
  end
46
55
  else
47
- @@current_doc_url = doc_url
48
56
  @@mechanize_doc = mechanize_doc
49
- @@current_doc_protocol = determine_protocol
50
57
  end
58
+
51
59
  if @@current_doc_protocol == 'file'
52
60
  @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(open(@@current_doc_url).read))
53
61
  else
@@ -58,22 +66,24 @@ module Scrubyt
58
66
 
59
67
  ##
60
68
  #Submit the last form;
61
- def self.submit(current_form, button=nil)
62
- puts '[ACTION] submitting form...'
69
+ def self.submit(current_form, button=nil, type=nil)
70
+ Scrubyt.log :ACTION, 'Submitting form...'
63
71
  if button == nil
64
72
  result_page = @@agent.submit(current_form)
73
+ elsif type
74
+ result_page = current_form.submit(button)
65
75
  else
66
76
  result_page = @@agent.submit(current_form, button)
67
77
  end
68
78
  @@current_doc_url = result_page.uri.to_s
69
- puts "[ACTION] fetched #{@@current_doc_url}"
79
+ Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
70
80
  fetch(@@current_doc_url, :mechanize_doc => result_page)
71
81
  end
72
82
 
73
83
  ##
74
84
  #Click the link specified by the text
75
85
  def self.click_link(link_spec,index = 0)
76
- print "[ACTION] clicking link specified by: "; p link_spec
86
+ Scrubyt.log :ACTION, "Clicking link specified by: %p" % link_spec
77
87
  if link_spec.is_a? Hash
78
88
  clicked_elem = CompoundExampleLookup.find_node_from_compund_example(@@hpricot_doc, link_spec, false, index)
79
89
  else
@@ -82,7 +92,16 @@ module Scrubyt
82
92
  clicked_elem = XPathUtils.find_nearest_node_with_attribute(clicked_elem, 'href')
83
93
  result_page = @@agent.click(clicked_elem)
84
94
  @@current_doc_url = result_page.uri.to_s
85
- puts "[ACTION] fetched #{@@current_doc_url}"
95
+ Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
96
+ fetch(@@current_doc_url, :mechanize_doc => result_page)
97
+ end
98
+
99
+ def self.click_image_map(index = 0)
100
+ Scrubyt.log :ACTION, "Clicking image map at index: %p" % index
101
+ uri = @@mechanize_doc.search("//area")[index]['href']
102
+ result_page = @@agent.get(uri)
103
+ @@current_doc_url = result_page.uri.to_s
104
+ Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
86
105
  fetch(@@current_doc_url, :mechanize_doc => result_page)
87
106
  end
88
107
 
@@ -118,6 +137,14 @@ module Scrubyt
118
137
  @@hpricot_doc = @@history.pop
119
138
  end
120
139
 
140
+ def self.store_host_name(doc_url)
141
+ @@host_name = 'http://' + @@mechanize_doc.uri.to_s.scan(/http:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'http'
142
+ @@host_name = 'https://' + @@mechanize_doc.uri.to_s.scan(/https:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'https'
143
+ @@host_name = doc_url if @@host_name == nil
144
+ @@host_name = @@host_name[0..-2] if @@host_name[-1].chr == '/'
145
+ @@original_host_name ||= @@host_name
146
+ end #end of method store_host_name
147
+
121
148
  def self.determine_protocol
122
149
  old_protocol = @@current_doc_protocol
123
150
  new_protocol = case @@current_doc_url
@@ -149,18 +176,18 @@ module Scrubyt
149
176
  exit
150
177
  end
151
178
  end
152
- puts "[ACTION] Setting proxy: host=<#{@@host}>, port=<#{@@port}>"
179
+ Scrubyt.log :ACTION, "Setting proxy: host=<#{@@host}>, port=<#{@@port}>"
153
180
  @@agent.set_proxy(@@host, @@port)
154
181
  end
155
182
 
156
183
  def self.parse_and_set_basic_auth(basic_auth)
157
184
  login, pass = basic_auth.split('@')
158
- puts "[ACTION] Basic authentication: login=<#{login}>, pass=<#{pass}>"
185
+ Scrubyt.log :ACTION, "Basic authentication: login=<#{login}>, pass=<#{pass}>"
159
186
  @@agent.basic_auth(login, pass)
160
187
  end
161
188
 
162
189
  def self.set_user_agent(user_agent)
163
- #puts "[ACTION] Setting user-agent to #{user_agent}"
190
+ Scrubyt.log :ACTION, "Setting user-agent to #{user_agent}"
164
191
  @@agent.user_agent = user_agent
165
192
  end
166
193
 
@@ -172,22 +199,29 @@ module Scrubyt
172
199
  end
173
200
  end
174
201
 
175
- def self.store_host_name(doc_url)
176
- @@host_name = 'http://' + @@mechanize_doc.uri.to_s.scan(/http:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'http'
177
- @@host_name = 'https://' + @@mechanize_doc.uri.to_s.scan(/https:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'https'
178
- @@host_name = doc_url if @@host_name == nil
179
- @@host_name = @@host_name[0..-2] if @@host_name[-1].chr == '/'
180
- @@original_host_name ||= @@host_name
181
- end #end of method store_host_name
182
-
183
202
  def self.handle_relative_url(doc_url, resolve)
184
203
  return if doc_url =~ /^http/
204
+ if doc_url !~ /^\//
205
+ first_char = doc_url[0..0]
206
+ doc_url = ( first_char == '?' ? '' : '/' ) + doc_url
207
+ if first_char == '?' #This is an ugly hack... really have to throw this shit out and go with mechanize's
208
+ current_uri = @@mechanize_doc.uri.to_s
209
+ current_uri = @@agent.history.first.uri.to_s if current_uri =~ /\/popup\//
210
+ if (current_uri.include? '?')
211
+ current_uri = current_uri.scan(/.+\//)[0]
212
+ else
213
+ current_uri += '/' unless current_uri[-1..-1] == '/'
214
+ end
215
+ @@current_doc_url = current_uri + doc_url
216
+ return
217
+ end
218
+ end
185
219
  case resolve
186
220
  when :full
187
221
  @@current_doc_url = (@@host_name + doc_url) if ( @@host_name != nil && (doc_url !~ /#{@@host_name}/))
188
222
  @@current_doc_url = @@current_doc_url.split('/').uniq.join('/')
189
223
  when :host
190
- base_host_name = @@host_name.scan(/(http.+?\/\/.+?)\//)[0][0]
224
+ base_host_name = (@@host_name.count("/") == 2 ? @@host_name : @@host_name.scan(/(http.+?\/\/.+?)\//)[0][0])
191
225
  @@current_doc_url = base_host_name + doc_url
192
226
  else
193
227
  #custom resilving
@@ -13,8 +13,10 @@ module Scrubyt
13
13
  'fill_textarea',
14
14
  'submit',
15
15
  'click_link',
16
+ 'click_image_map',
16
17
  'select_option',
17
18
  'check_checkbox',
19
+ 'check_radiobutton',
18
20
  'end']
19
21
 
20
22
  def initialize
@@ -48,16 +50,20 @@ module Scrubyt
48
50
  def self.select_option(selectlist_name, option)
49
51
  lookup_form_for_tag('select','select list',selectlist_name,option)
50
52
  select_list = @@current_form.fields.find {|f| f.name == selectlist_name}
51
- searched_option = select_list.options.find{|f| f.text == option}
53
+ searched_option = select_list.options.find{|f| f.text.strip == option}
52
54
  searched_option.click
53
55
  end
54
56
 
55
57
  def self.check_checkbox(checkbox_name)
56
- puts checkbox_name
57
58
  lookup_form_for_tag('input','checkbox',checkbox_name, '')
58
59
  @@current_form.checkboxes.name(checkbox_name).check
59
60
  end
60
61
 
62
+ def self.check_radiobutton(checkbox_name, index=0)
63
+ lookup_form_for_tag('input','radiobutton',checkbox_name, '',index)
64
+ @@current_form.radiobuttons.name(checkbox_name)[index].check
65
+ end
66
+
61
67
  ##
62
68
  #Fetch the document
63
69
  def self.fetch(*args)
@@ -65,9 +71,14 @@ module Scrubyt
65
71
  end
66
72
  ##
67
73
  #Submit the current form (delegate it to NavigationActions)
68
- def self.submit(index=nil)
74
+ def self.submit(index=nil, type=nil)
69
75
  if index == nil
70
76
  FetchAction.submit(@@current_form)
77
+ #----- added by nickmerwin@gmail.com -----
78
+ elsif index.class == String
79
+ button = @@current_form.buttons.detect{|b| b.name == index}
80
+ FetchAction.submit(@@current_form, button,type)
81
+ #-----------------------------------------
71
82
  else
72
83
  FetchAction.submit(@@current_form, @@current_form.buttons[index])
73
84
  end
@@ -79,6 +90,10 @@ module Scrubyt
79
90
  FetchAction.click_link(link_spec,index)
80
91
  end
81
92
 
93
+ def self.click_image_map(index=0)
94
+ FetchAction.click_image_map(index)
95
+ end
96
+
82
97
  def self.get_hpricot_doc
83
98
  FetchAction.get_hpricot_doc
84
99
  end
@@ -92,10 +107,12 @@ module Scrubyt
92
107
  end
93
108
 
94
109
  private
95
- def self.lookup_form_for_tag(tag,widget_name,name_attribute,query_string)
96
- puts "[ACTION] typing #{query_string} into the #{widget_name} named '#{name_attribute}'"
97
- widget = (FetchAction.get_hpricot_doc/"#{tag}[@name=#{name_attribute}]").map()[0]
110
+ def self.lookup_form_for_tag(tag, widget_name, name_attribute, query_string, index=0)
111
+ Scrubyt.log :ACTION, "typing #{query_string} into the #{widget_name} named '#{name_attribute}'"
112
+ widget = (FetchAction.get_hpricot_doc/"#{tag}[@name=#{name_attribute}]").map()[index]
113
+ p widget
98
114
  form_tag = Scrubyt::XPathUtils.traverse_up_until_name(widget, 'form')
115
+ p form_tag
99
116
  find_form_based_on_tag(form_tag, ['name', 'id', 'action'])
100
117
  end
101
118
 
@@ -112,6 +129,7 @@ private
112
129
  loop do
113
130
  @@current_form = FetchAction.get_mechanize_doc.forms[i]
114
131
  return nil if @@current_form == nil
132
+ puts i
115
133
  break if @@current_form.form_node.attributes[lookup_attribute_name] == lookup_attribute_value
116
134
  i+= 1
117
135
  end