scrubyt 0.2.8 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +32 -2
- data/Rakefile +25 -20
- data/lib/scrubyt.rb +24 -5
- data/lib/scrubyt/core/navigation/fetch_action.rb +76 -42
- data/lib/scrubyt/core/navigation/navigation_actions.rb +24 -6
- data/lib/scrubyt/core/scraping/filters/base_filter.rb +5 -5
- data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +2 -2
- data/lib/scrubyt/core/scraping/filters/download_filter.rb +2 -1
- data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +7 -2
- data/lib/scrubyt/core/scraping/filters/tree_filter.rb +37 -12
- data/lib/scrubyt/core/scraping/pattern.rb +82 -90
- data/lib/scrubyt/core/scraping/pre_filter_document.rb +2 -1
- data/lib/scrubyt/core/shared/evaluation_context.rb +14 -37
- data/lib/scrubyt/core/shared/extractor.rb +55 -54
- data/lib/scrubyt/logging.rb +16 -0
- data/lib/scrubyt/output/export.rb +1 -1
- data/lib/scrubyt/output/post_processor.rb +6 -5
- data/lib/scrubyt/output/result.rb +1 -0
- data/lib/scrubyt/output/result_dumper.rb +4 -3
- data/lib/scrubyt/output/result_node.rb +73 -0
- data/lib/scrubyt/output/scrubyt_result.rb +28 -0
- data/lib/scrubyt/utils/ruby_extensions.rb +8 -0
- data/lib/scrubyt/utils/simple_example_lookup.rb +14 -1
- data/lib/scrubyt/utils/xpathutils.rb +11 -0
- metadata +7 -12
- data/test/unittests/constraint_test.rb +0 -107
- data/test/unittests/extractor_test.rb +0 -91
- data/test/unittests/filter_test.rb +0 -79
- data/test/unittests/input/constraint_test.html +0 -55
- data/test/unittests/input/test.html +0 -39
- data/test/unittests/pattern_test.rb +0 -27
- data/test/unittests/simple_example_lookup_test.rb +0 -68
- data/test/unittests/xpathutils_test.rb +0 -152
data/CHANGELOG
CHANGED
@@ -1,7 +1,38 @@
|
|
1
1
|
= scRUBYt! Changelog
|
2
2
|
|
3
|
+
== 0.3.0
|
4
|
+
=== 21st May, 2007
|
5
|
+
|
6
|
+
=<tt>changes:</tt>
|
7
|
+
|
8
|
+
[NEW] complete rewrite of the output system, creating
|
9
|
+
a solid foundation for more robust output functions
|
10
|
+
(credit: Neelance)
|
11
|
+
[NEW] logging - no annoying puts messages anymore! (credit: Tim Fletcher)
|
12
|
+
[NEW] can index an example - e.g.
|
13
|
+
link 'more[5]'
|
14
|
+
semantics: give me the 6th element with the text 'link'
|
15
|
+
[NEW] can use XPath checking an attribute value, like "//div[@id='content']"
|
16
|
+
[NEW] default values for missing elements (first version was done in 0.2.8
|
17
|
+
but it did not work for all cases)
|
18
|
+
[NEW] possibility to click button with it's text (instead of it's index)
|
19
|
+
(credit: Nick Merwin)
|
20
|
+
[NEW] can click on image buttons (by specifying the name of the button)
|
21
|
+
[NEW] possibility to extract an URL with one step, like so:
|
22
|
+
link 'The Difference/@href'
|
23
|
+
i.e. give me the href attribute of the element matched by the example 'The Difference'
|
24
|
+
[NEW] new way to match an element of the page:
|
25
|
+
div 'div[The Difference]'
|
26
|
+
means 'return the div which contains the string "The Difference"'. This is
|
27
|
+
useful if the XPath of the element is non-constant across the same site (e.g.
|
28
|
+
sometimes a banner or add is added, sometimes not etc.)
|
29
|
+
[FIX] Replacing \240 ( ) with space in the preprocessing phase automatically
|
30
|
+
[FIX] Fixed: correctly downloading image if the src
|
31
|
+
attribute had a leading space, as in
|
32
|
+
<img src=' /files/downloads/images/image.jpg'/>
|
33
|
+
|
3
34
|
== 0.2.7
|
4
|
-
===
|
35
|
+
=== 12th April, 2007
|
5
36
|
|
6
37
|
=<tt>changes:</tt>
|
7
38
|
|
@@ -9,7 +40,6 @@
|
|
9
40
|
parent pattern
|
10
41
|
[NEW] checking checkboxes
|
11
42
|
[NEW] basic authentication support
|
12
|
-
[NEW] default values for missing elements
|
13
43
|
[NEW] possibility to resolve relative paths against a custom url
|
14
44
|
[NEW] first simple version of to_csv and to_hash
|
15
45
|
[NEW] complete rewrite of the exporting system (Credit: Neelance)
|
data/Rakefile
CHANGED
@@ -7,8 +7,7 @@ require 'rake/packagetask'
|
|
7
7
|
# Dependencies
|
8
8
|
###################################################
|
9
9
|
|
10
|
-
task "default" => ["
|
11
|
-
task "fulltest" => ["test", "blackbox"]
|
10
|
+
task "default" => ["test_all"]
|
12
11
|
task "generate_rdoc" => ["cleanup_readme"]
|
13
12
|
task "cleanup_readme" => ["rdoc"]
|
14
13
|
|
@@ -16,22 +15,24 @@ task "cleanup_readme" => ["rdoc"]
|
|
16
15
|
# Gem specification
|
17
16
|
###################################################
|
18
17
|
|
19
|
-
gem_spec = Gem::Specification.new do |s|
|
20
|
-
s.name = 'scrubyt'
|
21
|
-
s.version = '0.
|
18
|
+
gem_spec = Gem::Specification.new do |s|
|
19
|
+
s.name = 'scrubyt'
|
20
|
+
s.version = '0.3.0'
|
22
21
|
s.summary = 'A powerful Web-scraping framework'
|
23
|
-
s.description = %{scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!}
|
24
|
-
# Files containing Test::Unit test cases.
|
25
|
-
s.test_files = FileList['test/unittests/**/*']
|
26
|
-
# List of other files to be included.
|
22
|
+
s.description = %{scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!}
|
23
|
+
# Files containing Test::Unit test cases.
|
24
|
+
s.test_files = FileList['test/unittests/**/*']
|
25
|
+
# List of other files to be included.
|
27
26
|
s.files = FileList['COPYING', 'README', 'CHANGELOG', 'Rakefile', 'lib/**/*.rb']
|
28
27
|
s.author = 'Peter Szinek'
|
29
|
-
s.email = 'peter@rubyrailways.com'
|
28
|
+
s.email = 'peter@rubyrailways.com'
|
30
29
|
s.homepage = 'http://www.scrubyt.org'
|
31
30
|
s.add_dependency('hpricot', '>= 0.5')
|
32
|
-
s.add_dependency('mechanize', '>= 0.6.3')
|
31
|
+
s.add_dependency('mechanize', '>= 0.6.3')
|
32
|
+
#s.add_dependency('parsetree', '>= 1.7.0')
|
33
|
+
#s.add_dependency('ruby2ruby', '>= 1.1.5')
|
33
34
|
s.has_rdoc = 'true'
|
34
|
-
end
|
35
|
+
end
|
35
36
|
|
36
37
|
###################################################
|
37
38
|
# Tasks
|
@@ -47,12 +48,16 @@ Rake::RDocTask.new do |generate_rdoc|
|
|
47
48
|
generate_rdoc.options << '--line-numbers' << '--inline-source'
|
48
49
|
end
|
49
50
|
|
50
|
-
Rake::TestTask.new do |
|
51
|
-
|
52
|
-
end
|
51
|
+
Rake::TestTask.new(:test_all) do |task|
|
52
|
+
task.pattern = 'test/*_test.rb'
|
53
|
+
end
|
54
|
+
|
55
|
+
Rake::TestTask.new(:test_blackbox) do |task|
|
56
|
+
task.test_files = ['test/blackbox_test.rb']
|
57
|
+
end
|
53
58
|
|
54
|
-
|
55
|
-
|
59
|
+
Rake::TestTask.new(:test_non_blackbox) do |task|
|
60
|
+
task.test_files = FileList['test/*_test.rb'] - ['test/blackbox_test.rb']
|
56
61
|
end
|
57
62
|
|
58
63
|
task "cleanup_readme" do
|
@@ -77,12 +82,12 @@ end
|
|
77
82
|
task "generate_rdoc" do
|
78
83
|
end
|
79
84
|
|
80
|
-
Rake::GemPackageTask.new(gem_spec) do |pkg|
|
85
|
+
Rake::GemPackageTask.new(gem_spec) do |pkg|
|
81
86
|
pkg.need_zip = false
|
82
|
-
pkg.need_tar = false
|
87
|
+
pkg.need_tar = false
|
83
88
|
end
|
84
89
|
|
85
|
-
Rake::PackageTask.new('scrubyt-examples', '0.
|
90
|
+
Rake::PackageTask.new('scrubyt-examples', '0.3.0') do |pkg|
|
86
91
|
pkg.need_zip = true
|
87
92
|
pkg.need_tar = true
|
88
93
|
pkg.package_files.include("examples/**/*")
|
data/lib/scrubyt.rb
CHANGED
@@ -1,14 +1,34 @@
|
|
1
1
|
#ruby core
|
2
2
|
require 'open-uri'
|
3
|
+
require 'erb'
|
3
4
|
|
4
5
|
#gems
|
5
6
|
require 'rubygems'
|
6
7
|
require 'mechanize'
|
7
8
|
require 'hpricot'
|
8
|
-
require '
|
9
|
+
require 'parse_tree_reloaded'
|
10
|
+
|
11
|
+
#little hack to avoid that ruby2ruby tries to load the original parse_tree
|
12
|
+
if Gem
|
13
|
+
module Gem
|
14
|
+
class << self
|
15
|
+
alias_method :activate_orig, :activate
|
16
|
+
def activate(gem, autorequire, *version_requirements)
|
17
|
+
activate_orig(gem, autorequire, *version_requirements) unless gem.is_a?(Gem::Dependency) && gem.name == 'ParseTree'
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
module Kernel
|
23
|
+
alias_method :require_orig, :require
|
24
|
+
def require(path)
|
25
|
+
require_orig(path) unless path == 'parse_tree'
|
26
|
+
end
|
27
|
+
end
|
9
28
|
require 'ruby2ruby'
|
10
29
|
|
11
30
|
#scrubyt
|
31
|
+
require 'scrubyt/logging'
|
12
32
|
require 'scrubyt/utils/ruby_extensions.rb'
|
13
33
|
require 'scrubyt/utils/xpathutils.rb'
|
14
34
|
require 'scrubyt/utils/shared_utils.rb'
|
@@ -19,6 +39,8 @@ require 'scrubyt/core/scraping/constraint.rb'
|
|
19
39
|
require 'scrubyt/core/scraping/result_indexer.rb'
|
20
40
|
require 'scrubyt/core/scraping/pre_filter_document.rb'
|
21
41
|
require 'scrubyt/core/scraping/compound_example.rb'
|
42
|
+
require 'scrubyt/output/result_node.rb'
|
43
|
+
require 'scrubyt/output/scrubyt_result.rb'
|
22
44
|
require 'scrubyt/output/export.rb'
|
23
45
|
require 'scrubyt/core/shared/extractor.rb'
|
24
46
|
require 'scrubyt/core/scraping/filters/base_filter.rb'
|
@@ -29,10 +51,7 @@ require 'scrubyt/core/scraping/filters/html_subtree_filter.rb'
|
|
29
51
|
require 'scrubyt/core/scraping/filters/regexp_filter.rb'
|
30
52
|
require 'scrubyt/core/scraping/filters/tree_filter.rb'
|
31
53
|
require 'scrubyt/core/scraping/pattern.rb'
|
32
|
-
require 'scrubyt/output/result_dumper.rb'
|
33
|
-
require 'scrubyt/output/result.rb'
|
34
|
-
require 'scrubyt/output/post_processor.rb'
|
35
54
|
require 'scrubyt/core/navigation/navigation_actions.rb'
|
36
55
|
require 'scrubyt/core/navigation/fetch_action.rb'
|
37
56
|
require 'scrubyt/core/shared/evaluation_context.rb'
|
38
|
-
require 'scrubyt/core/shared/u_r_i_builder.rb'
|
57
|
+
require 'scrubyt/core/shared/u_r_i_builder.rb'
|
@@ -8,14 +8,13 @@ module Scrubyt
|
|
8
8
|
#which is loading a document (even by submitting a form or clicking a link)
|
9
9
|
#and related things like setting a proxy etc. you should find it here.
|
10
10
|
class FetchAction
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
end
|
11
|
+
|
12
|
+
@@current_doc_url = nil
|
13
|
+
@@current_doc_protocol = nil
|
14
|
+
@@base_dir = nil
|
15
|
+
@@host_name = nil
|
16
|
+
@@agent = WWW::Mechanize.new
|
17
|
+
@@history = []
|
19
18
|
|
20
19
|
##
|
21
20
|
#Action to fetch a document (either a file or a http address)
|
@@ -25,29 +24,38 @@ module Scrubyt
|
|
25
24
|
#_doc_url_ - the url or file name to fetch
|
26
25
|
def self.fetch(doc_url, *args)
|
27
26
|
#Refactor this crap!!! with option_accessor stuff
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
27
|
+
|
28
|
+
if args.size > 0
|
29
|
+
proxy = args[0][:proxy]
|
30
|
+
mechanize_doc = args[0][:mechanize_doc]
|
31
|
+
resolve = args[0][:resolve]
|
32
|
+
basic_auth = args[0][:basic_auth]
|
33
|
+
user_agent = args[0][:user_agent] || "Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)"
|
34
|
+
#Refactor this whole stuff as well!!! It looks awful...
|
35
|
+
parse_and_set_proxy(proxy) if proxy
|
36
|
+
set_user_agent(user_agent)
|
37
|
+
parse_and_set_basic_auth(basic_auth) if basic_auth
|
38
|
+
else
|
39
|
+
mechanize_doc = nil
|
40
|
+
resolve = :full
|
41
|
+
end
|
42
|
+
|
43
|
+
@@current_doc_url = doc_url
|
44
|
+
@@current_doc_protocol = determine_protocol
|
45
|
+
|
46
|
+
if mechanize_doc.nil? && @@current_doc_protocol != 'file'
|
40
47
|
handle_relative_path(doc_url)
|
41
|
-
handle_relative_url(doc_url,resolve)
|
42
|
-
|
43
|
-
|
48
|
+
handle_relative_url(doc_url, resolve)
|
49
|
+
|
50
|
+
Scrubyt.log :ACTION, "fetching document: #{@@current_doc_url}"
|
51
|
+
|
52
|
+
unless 'file' == @@current_doc_protocol
|
44
53
|
@@mechanize_doc = @@agent.get(@@current_doc_url)
|
45
54
|
end
|
46
55
|
else
|
47
|
-
@@current_doc_url = doc_url
|
48
56
|
@@mechanize_doc = mechanize_doc
|
49
|
-
@@current_doc_protocol = determine_protocol
|
50
57
|
end
|
58
|
+
|
51
59
|
if @@current_doc_protocol == 'file'
|
52
60
|
@@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(open(@@current_doc_url).read))
|
53
61
|
else
|
@@ -58,22 +66,24 @@ module Scrubyt
|
|
58
66
|
|
59
67
|
##
|
60
68
|
#Submit the last form;
|
61
|
-
def self.submit(current_form, button=nil)
|
62
|
-
|
69
|
+
def self.submit(current_form, button=nil, type=nil)
|
70
|
+
Scrubyt.log :ACTION, 'Submitting form...'
|
63
71
|
if button == nil
|
64
72
|
result_page = @@agent.submit(current_form)
|
73
|
+
elsif type
|
74
|
+
result_page = current_form.submit(button)
|
65
75
|
else
|
66
76
|
result_page = @@agent.submit(current_form, button)
|
67
77
|
end
|
68
78
|
@@current_doc_url = result_page.uri.to_s
|
69
|
-
|
79
|
+
Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
|
70
80
|
fetch(@@current_doc_url, :mechanize_doc => result_page)
|
71
81
|
end
|
72
82
|
|
73
83
|
##
|
74
84
|
#Click the link specified by the text
|
75
85
|
def self.click_link(link_spec,index = 0)
|
76
|
-
|
86
|
+
Scrubyt.log :ACTION, "Clicking link specified by: %p" % link_spec
|
77
87
|
if link_spec.is_a? Hash
|
78
88
|
clicked_elem = CompoundExampleLookup.find_node_from_compund_example(@@hpricot_doc, link_spec, false, index)
|
79
89
|
else
|
@@ -82,7 +92,16 @@ module Scrubyt
|
|
82
92
|
clicked_elem = XPathUtils.find_nearest_node_with_attribute(clicked_elem, 'href')
|
83
93
|
result_page = @@agent.click(clicked_elem)
|
84
94
|
@@current_doc_url = result_page.uri.to_s
|
85
|
-
|
95
|
+
Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
|
96
|
+
fetch(@@current_doc_url, :mechanize_doc => result_page)
|
97
|
+
end
|
98
|
+
|
99
|
+
def self.click_image_map(index = 0)
|
100
|
+
Scrubyt.log :ACTION, "Clicking image map at index: %p" % index
|
101
|
+
uri = @@mechanize_doc.search("//area")[index]['href']
|
102
|
+
result_page = @@agent.get(uri)
|
103
|
+
@@current_doc_url = result_page.uri.to_s
|
104
|
+
Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
|
86
105
|
fetch(@@current_doc_url, :mechanize_doc => result_page)
|
87
106
|
end
|
88
107
|
|
@@ -118,6 +137,14 @@ module Scrubyt
|
|
118
137
|
@@hpricot_doc = @@history.pop
|
119
138
|
end
|
120
139
|
|
140
|
+
def self.store_host_name(doc_url)
|
141
|
+
@@host_name = 'http://' + @@mechanize_doc.uri.to_s.scan(/http:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'http'
|
142
|
+
@@host_name = 'https://' + @@mechanize_doc.uri.to_s.scan(/https:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'https'
|
143
|
+
@@host_name = doc_url if @@host_name == nil
|
144
|
+
@@host_name = @@host_name[0..-2] if @@host_name[-1].chr == '/'
|
145
|
+
@@original_host_name ||= @@host_name
|
146
|
+
end #end of method store_host_name
|
147
|
+
|
121
148
|
def self.determine_protocol
|
122
149
|
old_protocol = @@current_doc_protocol
|
123
150
|
new_protocol = case @@current_doc_url
|
@@ -149,18 +176,18 @@ module Scrubyt
|
|
149
176
|
exit
|
150
177
|
end
|
151
178
|
end
|
152
|
-
|
179
|
+
Scrubyt.log :ACTION, "Setting proxy: host=<#{@@host}>, port=<#{@@port}>"
|
153
180
|
@@agent.set_proxy(@@host, @@port)
|
154
181
|
end
|
155
182
|
|
156
183
|
def self.parse_and_set_basic_auth(basic_auth)
|
157
184
|
login, pass = basic_auth.split('@')
|
158
|
-
|
185
|
+
Scrubyt.log :ACTION, "Basic authentication: login=<#{login}>, pass=<#{pass}>"
|
159
186
|
@@agent.basic_auth(login, pass)
|
160
187
|
end
|
161
188
|
|
162
189
|
def self.set_user_agent(user_agent)
|
163
|
-
|
190
|
+
Scrubyt.log :ACTION, "Setting user-agent to #{user_agent}"
|
164
191
|
@@agent.user_agent = user_agent
|
165
192
|
end
|
166
193
|
|
@@ -172,22 +199,29 @@ module Scrubyt
|
|
172
199
|
end
|
173
200
|
end
|
174
201
|
|
175
|
-
def self.store_host_name(doc_url)
|
176
|
-
@@host_name = 'http://' + @@mechanize_doc.uri.to_s.scan(/http:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'http'
|
177
|
-
@@host_name = 'https://' + @@mechanize_doc.uri.to_s.scan(/https:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'https'
|
178
|
-
@@host_name = doc_url if @@host_name == nil
|
179
|
-
@@host_name = @@host_name[0..-2] if @@host_name[-1].chr == '/'
|
180
|
-
@@original_host_name ||= @@host_name
|
181
|
-
end #end of method store_host_name
|
182
|
-
|
183
202
|
def self.handle_relative_url(doc_url, resolve)
|
184
203
|
return if doc_url =~ /^http/
|
204
|
+
if doc_url !~ /^\//
|
205
|
+
first_char = doc_url[0..0]
|
206
|
+
doc_url = ( first_char == '?' ? '' : '/' ) + doc_url
|
207
|
+
if first_char == '?' #This is an ugly hack... really have to throw this shit out and go with mechanize's
|
208
|
+
current_uri = @@mechanize_doc.uri.to_s
|
209
|
+
current_uri = @@agent.history.first.uri.to_s if current_uri =~ /\/popup\//
|
210
|
+
if (current_uri.include? '?')
|
211
|
+
current_uri = current_uri.scan(/.+\//)[0]
|
212
|
+
else
|
213
|
+
current_uri += '/' unless current_uri[-1..-1] == '/'
|
214
|
+
end
|
215
|
+
@@current_doc_url = current_uri + doc_url
|
216
|
+
return
|
217
|
+
end
|
218
|
+
end
|
185
219
|
case resolve
|
186
220
|
when :full
|
187
221
|
@@current_doc_url = (@@host_name + doc_url) if ( @@host_name != nil && (doc_url !~ /#{@@host_name}/))
|
188
222
|
@@current_doc_url = @@current_doc_url.split('/').uniq.join('/')
|
189
223
|
when :host
|
190
|
-
base_host_name = @@host_name.scan(/(http.+?\/\/.+?)\//)[0][0]
|
224
|
+
base_host_name = (@@host_name.count("/") == 2 ? @@host_name : @@host_name.scan(/(http.+?\/\/.+?)\//)[0][0])
|
191
225
|
@@current_doc_url = base_host_name + doc_url
|
192
226
|
else
|
193
227
|
#custom resilving
|
@@ -13,8 +13,10 @@ module Scrubyt
|
|
13
13
|
'fill_textarea',
|
14
14
|
'submit',
|
15
15
|
'click_link',
|
16
|
+
'click_image_map',
|
16
17
|
'select_option',
|
17
18
|
'check_checkbox',
|
19
|
+
'check_radiobutton',
|
18
20
|
'end']
|
19
21
|
|
20
22
|
def initialize
|
@@ -48,16 +50,20 @@ module Scrubyt
|
|
48
50
|
def self.select_option(selectlist_name, option)
|
49
51
|
lookup_form_for_tag('select','select list',selectlist_name,option)
|
50
52
|
select_list = @@current_form.fields.find {|f| f.name == selectlist_name}
|
51
|
-
searched_option = select_list.options.find{|f| f.text == option}
|
53
|
+
searched_option = select_list.options.find{|f| f.text.strip == option}
|
52
54
|
searched_option.click
|
53
55
|
end
|
54
56
|
|
55
57
|
def self.check_checkbox(checkbox_name)
|
56
|
-
puts checkbox_name
|
57
58
|
lookup_form_for_tag('input','checkbox',checkbox_name, '')
|
58
59
|
@@current_form.checkboxes.name(checkbox_name).check
|
59
60
|
end
|
60
61
|
|
62
|
+
def self.check_radiobutton(checkbox_name, index=0)
|
63
|
+
lookup_form_for_tag('input','radiobutton',checkbox_name, '',index)
|
64
|
+
@@current_form.radiobuttons.name(checkbox_name)[index].check
|
65
|
+
end
|
66
|
+
|
61
67
|
##
|
62
68
|
#Fetch the document
|
63
69
|
def self.fetch(*args)
|
@@ -65,9 +71,14 @@ module Scrubyt
|
|
65
71
|
end
|
66
72
|
##
|
67
73
|
#Submit the current form (delegate it to NavigationActions)
|
68
|
-
def self.submit(index=nil)
|
74
|
+
def self.submit(index=nil, type=nil)
|
69
75
|
if index == nil
|
70
76
|
FetchAction.submit(@@current_form)
|
77
|
+
#----- added by nickmerwin@gmail.com -----
|
78
|
+
elsif index.class == String
|
79
|
+
button = @@current_form.buttons.detect{|b| b.name == index}
|
80
|
+
FetchAction.submit(@@current_form, button,type)
|
81
|
+
#-----------------------------------------
|
71
82
|
else
|
72
83
|
FetchAction.submit(@@current_form, @@current_form.buttons[index])
|
73
84
|
end
|
@@ -79,6 +90,10 @@ module Scrubyt
|
|
79
90
|
FetchAction.click_link(link_spec,index)
|
80
91
|
end
|
81
92
|
|
93
|
+
def self.click_image_map(index=0)
|
94
|
+
FetchAction.click_image_map(index)
|
95
|
+
end
|
96
|
+
|
82
97
|
def self.get_hpricot_doc
|
83
98
|
FetchAction.get_hpricot_doc
|
84
99
|
end
|
@@ -92,10 +107,12 @@ module Scrubyt
|
|
92
107
|
end
|
93
108
|
|
94
109
|
private
|
95
|
-
def self.lookup_form_for_tag(tag,widget_name,name_attribute,query_string)
|
96
|
-
|
97
|
-
widget = (FetchAction.get_hpricot_doc/"#{tag}[@name=#{name_attribute}]").map()[
|
110
|
+
def self.lookup_form_for_tag(tag, widget_name, name_attribute, query_string, index=0)
|
111
|
+
Scrubyt.log :ACTION, "typing #{query_string} into the #{widget_name} named '#{name_attribute}'"
|
112
|
+
widget = (FetchAction.get_hpricot_doc/"#{tag}[@name=#{name_attribute}]").map()[index]
|
113
|
+
p widget
|
98
114
|
form_tag = Scrubyt::XPathUtils.traverse_up_until_name(widget, 'form')
|
115
|
+
p form_tag
|
99
116
|
find_form_based_on_tag(form_tag, ['name', 'id', 'action'])
|
100
117
|
end
|
101
118
|
|
@@ -112,6 +129,7 @@ private
|
|
112
129
|
loop do
|
113
130
|
@@current_form = FetchAction.get_mechanize_doc.forms[i]
|
114
131
|
return nil if @@current_form == nil
|
132
|
+
puts i
|
115
133
|
break if @@current_form.form_node.attributes[lookup_attribute_name] == lookup_attribute_value
|
116
134
|
i+= 1
|
117
135
|
end
|