scrubyt 0.2.8 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. data/CHANGELOG +32 -2
  2. data/Rakefile +25 -20
  3. data/lib/scrubyt.rb +24 -5
  4. data/lib/scrubyt/core/navigation/fetch_action.rb +76 -42
  5. data/lib/scrubyt/core/navigation/navigation_actions.rb +24 -6
  6. data/lib/scrubyt/core/scraping/filters/base_filter.rb +5 -5
  7. data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +2 -2
  8. data/lib/scrubyt/core/scraping/filters/download_filter.rb +2 -1
  9. data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +7 -2
  10. data/lib/scrubyt/core/scraping/filters/tree_filter.rb +37 -12
  11. data/lib/scrubyt/core/scraping/pattern.rb +82 -90
  12. data/lib/scrubyt/core/scraping/pre_filter_document.rb +2 -1
  13. data/lib/scrubyt/core/shared/evaluation_context.rb +14 -37
  14. data/lib/scrubyt/core/shared/extractor.rb +55 -54
  15. data/lib/scrubyt/logging.rb +16 -0
  16. data/lib/scrubyt/output/export.rb +1 -1
  17. data/lib/scrubyt/output/post_processor.rb +6 -5
  18. data/lib/scrubyt/output/result.rb +1 -0
  19. data/lib/scrubyt/output/result_dumper.rb +4 -3
  20. data/lib/scrubyt/output/result_node.rb +73 -0
  21. data/lib/scrubyt/output/scrubyt_result.rb +28 -0
  22. data/lib/scrubyt/utils/ruby_extensions.rb +8 -0
  23. data/lib/scrubyt/utils/simple_example_lookup.rb +14 -1
  24. data/lib/scrubyt/utils/xpathutils.rb +11 -0
  25. metadata +7 -12
  26. data/test/unittests/constraint_test.rb +0 -107
  27. data/test/unittests/extractor_test.rb +0 -91
  28. data/test/unittests/filter_test.rb +0 -79
  29. data/test/unittests/input/constraint_test.html +0 -55
  30. data/test/unittests/input/test.html +0 -39
  31. data/test/unittests/pattern_test.rb +0 -27
  32. data/test/unittests/simple_example_lookup_test.rb +0 -68
  33. data/test/unittests/xpathutils_test.rb +0 -152
data/CHANGELOG CHANGED
@@ -1,7 +1,38 @@
1
1
  = scRUBYt! Changelog
2
2
 
3
+ == 0.3.0
4
+ === 21st May, 2007
5
+
6
+ =<tt>changes:</tt>
7
+
8
+ [NEW] complete rewrite of the output system, creating
9
+ a solid foundation for more robust output functions
10
+ (credit: Neelance)
11
+ [NEW] logging - no annoying puts messages anymore! (credit: Tim Fletcher)
12
+ [NEW] can index an example - e.g.
13
+ link 'more[5]'
14
+ semantics: give me the 6th element with the text 'link'
15
+ [NEW] can use XPath checking an attribute value, like "//div[@id='content']"
16
+ [NEW] default values for missing elements (first version was done in 0.2.8
17
+ but it did not work for all cases)
18
+ [NEW] possibility to click button with it's text (instead of it's index)
19
+ (credit: Nick Merwin)
20
+ [NEW] can click on image buttons (by specifying the name of the button)
21
+ [NEW] possibility to extract an URL with one step, like so:
22
+ link 'The Difference/@href'
23
+ i.e. give me the href attribute of the element matched by the example 'The Difference'
24
+ [NEW] new way to match an element of the page:
25
+ div 'div[The Difference]'
26
+ means 'return the div which contains the string "The Difference"'. This is
27
+ useful if the XPath of the element is non-constant across the same site (e.g.
28
+ sometimes a banner or add is added, sometimes not etc.)
29
+ [FIX] Replacing \240 (&nbsp;) with space in the preprocessing phase automatically
30
+ [FIX] Fixed: correctly downloading image if the src
31
+ attribute had a leading space, as in
32
+ <img src=' /files/downloads/images/image.jpg'/>
33
+
3
34
  == 0.2.7
4
- === 15th April, 2007
35
+ === 12th April, 2007
5
36
 
6
37
  =<tt>changes:</tt>
7
38
 
@@ -9,7 +40,6 @@
9
40
  parent pattern
10
41
  [NEW] checking checkboxes
11
42
  [NEW] basic authentication support
12
- [NEW] default values for missing elements
13
43
  [NEW] possibility to resolve relative paths against a custom url
14
44
  [NEW] first simple version of to_csv and to_hash
15
45
  [NEW] complete rewrite of the exporting system (Credit: Neelance)
data/Rakefile CHANGED
@@ -7,8 +7,7 @@ require 'rake/packagetask'
7
7
  # Dependencies
8
8
  ###################################################
9
9
 
10
- task "default" => ["test"]
11
- task "fulltest" => ["test", "blackbox"]
10
+ task "default" => ["test_all"]
12
11
  task "generate_rdoc" => ["cleanup_readme"]
13
12
  task "cleanup_readme" => ["rdoc"]
14
13
 
@@ -16,22 +15,24 @@ task "cleanup_readme" => ["rdoc"]
16
15
  # Gem specification
17
16
  ###################################################
18
17
 
19
- gem_spec = Gem::Specification.new do |s|
20
- s.name = 'scrubyt'
21
- s.version = '0.2.8'
18
+ gem_spec = Gem::Specification.new do |s|
19
+ s.name = 'scrubyt'
20
+ s.version = '0.3.0'
22
21
  s.summary = 'A powerful Web-scraping framework'
23
- s.description = %{scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!}
24
- # Files containing Test::Unit test cases.
25
- s.test_files = FileList['test/unittests/**/*']
26
- # List of other files to be included.
22
+ s.description = %{scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!}
23
+ # Files containing Test::Unit test cases.
24
+ s.test_files = FileList['test/unittests/**/*']
25
+ # List of other files to be included.
27
26
  s.files = FileList['COPYING', 'README', 'CHANGELOG', 'Rakefile', 'lib/**/*.rb']
28
27
  s.author = 'Peter Szinek'
29
- s.email = 'peter@rubyrailways.com'
28
+ s.email = 'peter@rubyrailways.com'
30
29
  s.homepage = 'http://www.scrubyt.org'
31
30
  s.add_dependency('hpricot', '>= 0.5')
32
- s.add_dependency('mechanize', '>= 0.6.3')
31
+ s.add_dependency('mechanize', '>= 0.6.3')
32
+ #s.add_dependency('parsetree', '>= 1.7.0')
33
+ #s.add_dependency('ruby2ruby', '>= 1.1.5')
33
34
  s.has_rdoc = 'true'
34
- end
35
+ end
35
36
 
36
37
  ###################################################
37
38
  # Tasks
@@ -47,12 +48,16 @@ Rake::RDocTask.new do |generate_rdoc|
47
48
  generate_rdoc.options << '--line-numbers' << '--inline-source'
48
49
  end
49
50
 
50
- Rake::TestTask.new do |test|
51
- test.pattern = 'test/unittests/*_test.rb'
52
- end
51
+ Rake::TestTask.new(:test_all) do |task|
52
+ task.pattern = 'test/*_test.rb'
53
+ end
54
+
55
+ Rake::TestTask.new(:test_blackbox) do |task|
56
+ task.test_files = ['test/blackbox_test.rb']
57
+ end
53
58
 
54
- task "blackbox" do
55
- ruby "test/blackbox/run_blackbox_tests.rb"
59
+ Rake::TestTask.new(:test_non_blackbox) do |task|
60
+ task.test_files = FileList['test/*_test.rb'] - ['test/blackbox_test.rb']
56
61
  end
57
62
 
58
63
  task "cleanup_readme" do
@@ -77,12 +82,12 @@ end
77
82
  task "generate_rdoc" do
78
83
  end
79
84
 
80
- Rake::GemPackageTask.new(gem_spec) do |pkg|
85
+ Rake::GemPackageTask.new(gem_spec) do |pkg|
81
86
  pkg.need_zip = false
82
- pkg.need_tar = false
87
+ pkg.need_tar = false
83
88
  end
84
89
 
85
- Rake::PackageTask.new('scrubyt-examples', '0.2.8') do |pkg|
90
+ Rake::PackageTask.new('scrubyt-examples', '0.3.0') do |pkg|
86
91
  pkg.need_zip = true
87
92
  pkg.need_tar = true
88
93
  pkg.package_files.include("examples/**/*")
@@ -1,14 +1,34 @@
1
1
  #ruby core
2
2
  require 'open-uri'
3
+ require 'erb'
3
4
 
4
5
  #gems
5
6
  require 'rubygems'
6
7
  require 'mechanize'
7
8
  require 'hpricot'
8
- require 'parse_tree'
9
+ require 'parse_tree_reloaded'
10
+
11
+ #little hack to avoid that ruby2ruby tries to load the original parse_tree
12
+ if Gem
13
+ module Gem
14
+ class << self
15
+ alias_method :activate_orig, :activate
16
+ def activate(gem, autorequire, *version_requirements)
17
+ activate_orig(gem, autorequire, *version_requirements) unless gem.is_a?(Gem::Dependency) && gem.name == 'ParseTree'
18
+ end
19
+ end
20
+ end
21
+ end
22
+ module Kernel
23
+ alias_method :require_orig, :require
24
+ def require(path)
25
+ require_orig(path) unless path == 'parse_tree'
26
+ end
27
+ end
9
28
  require 'ruby2ruby'
10
29
 
11
30
  #scrubyt
31
+ require 'scrubyt/logging'
12
32
  require 'scrubyt/utils/ruby_extensions.rb'
13
33
  require 'scrubyt/utils/xpathutils.rb'
14
34
  require 'scrubyt/utils/shared_utils.rb'
@@ -19,6 +39,8 @@ require 'scrubyt/core/scraping/constraint.rb'
19
39
  require 'scrubyt/core/scraping/result_indexer.rb'
20
40
  require 'scrubyt/core/scraping/pre_filter_document.rb'
21
41
  require 'scrubyt/core/scraping/compound_example.rb'
42
+ require 'scrubyt/output/result_node.rb'
43
+ require 'scrubyt/output/scrubyt_result.rb'
22
44
  require 'scrubyt/output/export.rb'
23
45
  require 'scrubyt/core/shared/extractor.rb'
24
46
  require 'scrubyt/core/scraping/filters/base_filter.rb'
@@ -29,10 +51,7 @@ require 'scrubyt/core/scraping/filters/html_subtree_filter.rb'
29
51
  require 'scrubyt/core/scraping/filters/regexp_filter.rb'
30
52
  require 'scrubyt/core/scraping/filters/tree_filter.rb'
31
53
  require 'scrubyt/core/scraping/pattern.rb'
32
- require 'scrubyt/output/result_dumper.rb'
33
- require 'scrubyt/output/result.rb'
34
- require 'scrubyt/output/post_processor.rb'
35
54
  require 'scrubyt/core/navigation/navigation_actions.rb'
36
55
  require 'scrubyt/core/navigation/fetch_action.rb'
37
56
  require 'scrubyt/core/shared/evaluation_context.rb'
38
- require 'scrubyt/core/shared/u_r_i_builder.rb'
57
+ require 'scrubyt/core/shared/u_r_i_builder.rb'
@@ -8,14 +8,13 @@ module Scrubyt
8
8
  #which is loading a document (even by submitting a form or clicking a link)
9
9
  #and related things like setting a proxy etc. you should find it here.
10
10
  class FetchAction
11
- def initialize
12
- @@current_doc_url = nil
13
- @@current_doc_protocol = nil
14
- @@base_dir = nil
15
- @@host_name = nil
16
- @@agent = WWW::Mechanize.new
17
- @@history = []
18
- end
11
+
12
+ @@current_doc_url = nil
13
+ @@current_doc_protocol = nil
14
+ @@base_dir = nil
15
+ @@host_name = nil
16
+ @@agent = WWW::Mechanize.new
17
+ @@history = []
19
18
 
20
19
  ##
21
20
  #Action to fetch a document (either a file or a http address)
@@ -25,29 +24,38 @@ module Scrubyt
25
24
  #_doc_url_ - the url or file name to fetch
26
25
  def self.fetch(doc_url, *args)
27
26
  #Refactor this crap!!! with option_accessor stuff
28
- proxy = args[0][:proxy]
29
- mechanize_doc = args[0][:mechanize_doc]
30
- resolve = args[0][:resolve] || :full
31
- basic_auth = args[0][:basic_auth]
32
- user_agent = args[0][:user_agent] || "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
33
- #Refactor this whole stuff as well!!! It looks awful...
34
- parse_and_set_proxy(proxy) if proxy
35
- set_user_agent(user_agent)
36
- parse_and_set_basic_auth(basic_auth) if basic_auth
37
- if !mechanize_doc
38
- @@current_doc_url = doc_url
39
- @@current_doc_protocol = determine_protocol
27
+
28
+ if args.size > 0
29
+ proxy = args[0][:proxy]
30
+ mechanize_doc = args[0][:mechanize_doc]
31
+ resolve = args[0][:resolve]
32
+ basic_auth = args[0][:basic_auth]
33
+ user_agent = args[0][:user_agent] || "Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)"
34
+ #Refactor this whole stuff as well!!! It looks awful...
35
+ parse_and_set_proxy(proxy) if proxy
36
+ set_user_agent(user_agent)
37
+ parse_and_set_basic_auth(basic_auth) if basic_auth
38
+ else
39
+ mechanize_doc = nil
40
+ resolve = :full
41
+ end
42
+
43
+ @@current_doc_url = doc_url
44
+ @@current_doc_protocol = determine_protocol
45
+
46
+ if mechanize_doc.nil? && @@current_doc_protocol != 'file'
40
47
  handle_relative_path(doc_url)
41
- handle_relative_url(doc_url,resolve)
42
- puts "[ACTION] fetching document: #{@@current_doc_url}"
43
- if @@current_doc_protocol != 'file'
48
+ handle_relative_url(doc_url, resolve)
49
+
50
+ Scrubyt.log :ACTION, "fetching document: #{@@current_doc_url}"
51
+
52
+ unless 'file' == @@current_doc_protocol
44
53
  @@mechanize_doc = @@agent.get(@@current_doc_url)
45
54
  end
46
55
  else
47
- @@current_doc_url = doc_url
48
56
  @@mechanize_doc = mechanize_doc
49
- @@current_doc_protocol = determine_protocol
50
57
  end
58
+
51
59
  if @@current_doc_protocol == 'file'
52
60
  @@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(open(@@current_doc_url).read))
53
61
  else
@@ -58,22 +66,24 @@ module Scrubyt
58
66
 
59
67
  ##
60
68
  #Submit the last form;
61
- def self.submit(current_form, button=nil)
62
- puts '[ACTION] submitting form...'
69
+ def self.submit(current_form, button=nil, type=nil)
70
+ Scrubyt.log :ACTION, 'Submitting form...'
63
71
  if button == nil
64
72
  result_page = @@agent.submit(current_form)
73
+ elsif type
74
+ result_page = current_form.submit(button)
65
75
  else
66
76
  result_page = @@agent.submit(current_form, button)
67
77
  end
68
78
  @@current_doc_url = result_page.uri.to_s
69
- puts "[ACTION] fetched #{@@current_doc_url}"
79
+ Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
70
80
  fetch(@@current_doc_url, :mechanize_doc => result_page)
71
81
  end
72
82
 
73
83
  ##
74
84
  #Click the link specified by the text
75
85
  def self.click_link(link_spec,index = 0)
76
- print "[ACTION] clicking link specified by: "; p link_spec
86
+ Scrubyt.log :ACTION, "Clicking link specified by: %p" % link_spec
77
87
  if link_spec.is_a? Hash
78
88
  clicked_elem = CompoundExampleLookup.find_node_from_compund_example(@@hpricot_doc, link_spec, false, index)
79
89
  else
@@ -82,7 +92,16 @@ module Scrubyt
82
92
  clicked_elem = XPathUtils.find_nearest_node_with_attribute(clicked_elem, 'href')
83
93
  result_page = @@agent.click(clicked_elem)
84
94
  @@current_doc_url = result_page.uri.to_s
85
- puts "[ACTION] fetched #{@@current_doc_url}"
95
+ Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
96
+ fetch(@@current_doc_url, :mechanize_doc => result_page)
97
+ end
98
+
99
+ def self.click_image_map(index = 0)
100
+ Scrubyt.log :ACTION, "Clicking image map at index: %p" % index
101
+ uri = @@mechanize_doc.search("//area")[index]['href']
102
+ result_page = @@agent.get(uri)
103
+ @@current_doc_url = result_page.uri.to_s
104
+ Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
86
105
  fetch(@@current_doc_url, :mechanize_doc => result_page)
87
106
  end
88
107
 
@@ -118,6 +137,14 @@ module Scrubyt
118
137
  @@hpricot_doc = @@history.pop
119
138
  end
120
139
 
140
+ def self.store_host_name(doc_url)
141
+ @@host_name = 'http://' + @@mechanize_doc.uri.to_s.scan(/http:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'http'
142
+ @@host_name = 'https://' + @@mechanize_doc.uri.to_s.scan(/https:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'https'
143
+ @@host_name = doc_url if @@host_name == nil
144
+ @@host_name = @@host_name[0..-2] if @@host_name[-1].chr == '/'
145
+ @@original_host_name ||= @@host_name
146
+ end #end of method store_host_name
147
+
121
148
  def self.determine_protocol
122
149
  old_protocol = @@current_doc_protocol
123
150
  new_protocol = case @@current_doc_url
@@ -149,18 +176,18 @@ module Scrubyt
149
176
  exit
150
177
  end
151
178
  end
152
- puts "[ACTION] Setting proxy: host=<#{@@host}>, port=<#{@@port}>"
179
+ Scrubyt.log :ACTION, "Setting proxy: host=<#{@@host}>, port=<#{@@port}>"
153
180
  @@agent.set_proxy(@@host, @@port)
154
181
  end
155
182
 
156
183
  def self.parse_and_set_basic_auth(basic_auth)
157
184
  login, pass = basic_auth.split('@')
158
- puts "[ACTION] Basic authentication: login=<#{login}>, pass=<#{pass}>"
185
+ Scrubyt.log :ACTION, "Basic authentication: login=<#{login}>, pass=<#{pass}>"
159
186
  @@agent.basic_auth(login, pass)
160
187
  end
161
188
 
162
189
  def self.set_user_agent(user_agent)
163
- #puts "[ACTION] Setting user-agent to #{user_agent}"
190
+ Scrubyt.log :ACTION, "Setting user-agent to #{user_agent}"
164
191
  @@agent.user_agent = user_agent
165
192
  end
166
193
 
@@ -172,22 +199,29 @@ module Scrubyt
172
199
  end
173
200
  end
174
201
 
175
- def self.store_host_name(doc_url)
176
- @@host_name = 'http://' + @@mechanize_doc.uri.to_s.scan(/http:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'http'
177
- @@host_name = 'https://' + @@mechanize_doc.uri.to_s.scan(/https:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'https'
178
- @@host_name = doc_url if @@host_name == nil
179
- @@host_name = @@host_name[0..-2] if @@host_name[-1].chr == '/'
180
- @@original_host_name ||= @@host_name
181
- end #end of method store_host_name
182
-
183
202
  def self.handle_relative_url(doc_url, resolve)
184
203
  return if doc_url =~ /^http/
204
+ if doc_url !~ /^\//
205
+ first_char = doc_url[0..0]
206
+ doc_url = ( first_char == '?' ? '' : '/' ) + doc_url
207
+ if first_char == '?' #This is an ugly hack... really have to throw this shit out and go with mechanize's
208
+ current_uri = @@mechanize_doc.uri.to_s
209
+ current_uri = @@agent.history.first.uri.to_s if current_uri =~ /\/popup\//
210
+ if (current_uri.include? '?')
211
+ current_uri = current_uri.scan(/.+\//)[0]
212
+ else
213
+ current_uri += '/' unless current_uri[-1..-1] == '/'
214
+ end
215
+ @@current_doc_url = current_uri + doc_url
216
+ return
217
+ end
218
+ end
185
219
  case resolve
186
220
  when :full
187
221
  @@current_doc_url = (@@host_name + doc_url) if ( @@host_name != nil && (doc_url !~ /#{@@host_name}/))
188
222
  @@current_doc_url = @@current_doc_url.split('/').uniq.join('/')
189
223
  when :host
190
- base_host_name = @@host_name.scan(/(http.+?\/\/.+?)\//)[0][0]
224
+ base_host_name = (@@host_name.count("/") == 2 ? @@host_name : @@host_name.scan(/(http.+?\/\/.+?)\//)[0][0])
191
225
  @@current_doc_url = base_host_name + doc_url
192
226
  else
193
227
  #custom resilving
@@ -13,8 +13,10 @@ module Scrubyt
13
13
  'fill_textarea',
14
14
  'submit',
15
15
  'click_link',
16
+ 'click_image_map',
16
17
  'select_option',
17
18
  'check_checkbox',
19
+ 'check_radiobutton',
18
20
  'end']
19
21
 
20
22
  def initialize
@@ -48,16 +50,20 @@ module Scrubyt
48
50
  def self.select_option(selectlist_name, option)
49
51
  lookup_form_for_tag('select','select list',selectlist_name,option)
50
52
  select_list = @@current_form.fields.find {|f| f.name == selectlist_name}
51
- searched_option = select_list.options.find{|f| f.text == option}
53
+ searched_option = select_list.options.find{|f| f.text.strip == option}
52
54
  searched_option.click
53
55
  end
54
56
 
55
57
  def self.check_checkbox(checkbox_name)
56
- puts checkbox_name
57
58
  lookup_form_for_tag('input','checkbox',checkbox_name, '')
58
59
  @@current_form.checkboxes.name(checkbox_name).check
59
60
  end
60
61
 
62
+ def self.check_radiobutton(checkbox_name, index=0)
63
+ lookup_form_for_tag('input','radiobutton',checkbox_name, '',index)
64
+ @@current_form.radiobuttons.name(checkbox_name)[index].check
65
+ end
66
+
61
67
  ##
62
68
  #Fetch the document
63
69
  def self.fetch(*args)
@@ -65,9 +71,14 @@ module Scrubyt
65
71
  end
66
72
  ##
67
73
  #Submit the current form (delegate it to NavigationActions)
68
- def self.submit(index=nil)
74
+ def self.submit(index=nil, type=nil)
69
75
  if index == nil
70
76
  FetchAction.submit(@@current_form)
77
+ #----- added by nickmerwin@gmail.com -----
78
+ elsif index.class == String
79
+ button = @@current_form.buttons.detect{|b| b.name == index}
80
+ FetchAction.submit(@@current_form, button,type)
81
+ #-----------------------------------------
71
82
  else
72
83
  FetchAction.submit(@@current_form, @@current_form.buttons[index])
73
84
  end
@@ -79,6 +90,10 @@ module Scrubyt
79
90
  FetchAction.click_link(link_spec,index)
80
91
  end
81
92
 
93
+ def self.click_image_map(index=0)
94
+ FetchAction.click_image_map(index)
95
+ end
96
+
82
97
  def self.get_hpricot_doc
83
98
  FetchAction.get_hpricot_doc
84
99
  end
@@ -92,10 +107,12 @@ module Scrubyt
92
107
  end
93
108
 
94
109
  private
95
- def self.lookup_form_for_tag(tag,widget_name,name_attribute,query_string)
96
- puts "[ACTION] typing #{query_string} into the #{widget_name} named '#{name_attribute}'"
97
- widget = (FetchAction.get_hpricot_doc/"#{tag}[@name=#{name_attribute}]").map()[0]
110
+ def self.lookup_form_for_tag(tag, widget_name, name_attribute, query_string, index=0)
111
+ Scrubyt.log :ACTION, "typing #{query_string} into the #{widget_name} named '#{name_attribute}'"
112
+ widget = (FetchAction.get_hpricot_doc/"#{tag}[@name=#{name_attribute}]").map()[index]
113
+ p widget
98
114
  form_tag = Scrubyt::XPathUtils.traverse_up_until_name(widget, 'form')
115
+ p form_tag
99
116
  find_form_based_on_tag(form_tag, ['name', 'id', 'action'])
100
117
  end
101
118
 
@@ -112,6 +129,7 @@ private
112
129
  loop do
113
130
  @@current_form = FetchAction.get_mechanize_doc.forms[i]
114
131
  return nil if @@current_form == nil
132
+ puts i
115
133
  break if @@current_form.form_node.attributes[lookup_attribute_name] == lookup_attribute_value
116
134
  i+= 1
117
135
  end