scrubyt 0.2.8 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +32 -2
- data/Rakefile +25 -20
- data/lib/scrubyt.rb +24 -5
- data/lib/scrubyt/core/navigation/fetch_action.rb +76 -42
- data/lib/scrubyt/core/navigation/navigation_actions.rb +24 -6
- data/lib/scrubyt/core/scraping/filters/base_filter.rb +5 -5
- data/lib/scrubyt/core/scraping/filters/detail_page_filter.rb +2 -2
- data/lib/scrubyt/core/scraping/filters/download_filter.rb +2 -1
- data/lib/scrubyt/core/scraping/filters/html_subtree_filter.rb +7 -2
- data/lib/scrubyt/core/scraping/filters/tree_filter.rb +37 -12
- data/lib/scrubyt/core/scraping/pattern.rb +82 -90
- data/lib/scrubyt/core/scraping/pre_filter_document.rb +2 -1
- data/lib/scrubyt/core/shared/evaluation_context.rb +14 -37
- data/lib/scrubyt/core/shared/extractor.rb +55 -54
- data/lib/scrubyt/logging.rb +16 -0
- data/lib/scrubyt/output/export.rb +1 -1
- data/lib/scrubyt/output/post_processor.rb +6 -5
- data/lib/scrubyt/output/result.rb +1 -0
- data/lib/scrubyt/output/result_dumper.rb +4 -3
- data/lib/scrubyt/output/result_node.rb +73 -0
- data/lib/scrubyt/output/scrubyt_result.rb +28 -0
- data/lib/scrubyt/utils/ruby_extensions.rb +8 -0
- data/lib/scrubyt/utils/simple_example_lookup.rb +14 -1
- data/lib/scrubyt/utils/xpathutils.rb +11 -0
- metadata +7 -12
- data/test/unittests/constraint_test.rb +0 -107
- data/test/unittests/extractor_test.rb +0 -91
- data/test/unittests/filter_test.rb +0 -79
- data/test/unittests/input/constraint_test.html +0 -55
- data/test/unittests/input/test.html +0 -39
- data/test/unittests/pattern_test.rb +0 -27
- data/test/unittests/simple_example_lookup_test.rb +0 -68
- data/test/unittests/xpathutils_test.rb +0 -152
data/CHANGELOG
CHANGED
@@ -1,7 +1,38 @@
|
|
1
1
|
= scRUBYt! Changelog
|
2
2
|
|
3
|
+
== 0.3.0
|
4
|
+
=== 21st May, 2007
|
5
|
+
|
6
|
+
=<tt>changes:</tt>
|
7
|
+
|
8
|
+
[NEW] complete rewrite of the output system, creating
|
9
|
+
a solid foundation for more robust output functions
|
10
|
+
(credit: Neelance)
|
11
|
+
[NEW] logging - no annoying puts messages anymore! (credit: Tim Fletcher)
|
12
|
+
[NEW] can index an example - e.g.
|
13
|
+
link 'more[5]'
|
14
|
+
semantics: give me the 6th element with the text 'link'
|
15
|
+
[NEW] can use XPath checking an attribute value, like "//div[@id='content']"
|
16
|
+
[NEW] default values for missing elements (first version was done in 0.2.8
|
17
|
+
but it did not work for all cases)
|
18
|
+
[NEW] possibility to click button with it's text (instead of it's index)
|
19
|
+
(credit: Nick Merwin)
|
20
|
+
[NEW] can click on image buttons (by specifying the name of the button)
|
21
|
+
[NEW] possibility to extract an URL with one step, like so:
|
22
|
+
link 'The Difference/@href'
|
23
|
+
i.e. give me the href attribute of the element matched by the example 'The Difference'
|
24
|
+
[NEW] new way to match an element of the page:
|
25
|
+
div 'div[The Difference]'
|
26
|
+
means 'return the div which contains the string "The Difference"'. This is
|
27
|
+
useful if the XPath of the element is non-constant across the same site (e.g.
|
28
|
+
sometimes a banner or add is added, sometimes not etc.)
|
29
|
+
[FIX] Replacing \240 ( ) with space in the preprocessing phase automatically
|
30
|
+
[FIX] Fixed: correctly downloading image if the src
|
31
|
+
attribute had a leading space, as in
|
32
|
+
<img src=' /files/downloads/images/image.jpg'/>
|
33
|
+
|
3
34
|
== 0.2.7
|
4
|
-
===
|
35
|
+
=== 12th April, 2007
|
5
36
|
|
6
37
|
=<tt>changes:</tt>
|
7
38
|
|
@@ -9,7 +40,6 @@
|
|
9
40
|
parent pattern
|
10
41
|
[NEW] checking checkboxes
|
11
42
|
[NEW] basic authentication support
|
12
|
-
[NEW] default values for missing elements
|
13
43
|
[NEW] possibility to resolve relative paths against a custom url
|
14
44
|
[NEW] first simple version of to_csv and to_hash
|
15
45
|
[NEW] complete rewrite of the exporting system (Credit: Neelance)
|
data/Rakefile
CHANGED
@@ -7,8 +7,7 @@ require 'rake/packagetask'
|
|
7
7
|
# Dependencies
|
8
8
|
###################################################
|
9
9
|
|
10
|
-
task "default" => ["
|
11
|
-
task "fulltest" => ["test", "blackbox"]
|
10
|
+
task "default" => ["test_all"]
|
12
11
|
task "generate_rdoc" => ["cleanup_readme"]
|
13
12
|
task "cleanup_readme" => ["rdoc"]
|
14
13
|
|
@@ -16,22 +15,24 @@ task "cleanup_readme" => ["rdoc"]
|
|
16
15
|
# Gem specification
|
17
16
|
###################################################
|
18
17
|
|
19
|
-
gem_spec = Gem::Specification.new do |s|
|
20
|
-
s.name = 'scrubyt'
|
21
|
-
s.version = '0.
|
18
|
+
gem_spec = Gem::Specification.new do |s|
|
19
|
+
s.name = 'scrubyt'
|
20
|
+
s.version = '0.3.0'
|
22
21
|
s.summary = 'A powerful Web-scraping framework'
|
23
|
-
s.description = %{scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!}
|
24
|
-
# Files containing Test::Unit test cases.
|
25
|
-
s.test_files = FileList['test/unittests/**/*']
|
26
|
-
# List of other files to be included.
|
22
|
+
s.description = %{scRUBYt! is an easy to learn and use, yet powerful and effective web scraping framework. It's most interesting part is a Web-scraping DSL built on HPricot and WWW::Mechanize, which allows to navigate to the page of interest, then extract and query data records with a few lines of code. It is hard to describe scRUBYt! in a few sentences - you have to see it for yourself!}
|
23
|
+
# Files containing Test::Unit test cases.
|
24
|
+
s.test_files = FileList['test/unittests/**/*']
|
25
|
+
# List of other files to be included.
|
27
26
|
s.files = FileList['COPYING', 'README', 'CHANGELOG', 'Rakefile', 'lib/**/*.rb']
|
28
27
|
s.author = 'Peter Szinek'
|
29
|
-
s.email = 'peter@rubyrailways.com'
|
28
|
+
s.email = 'peter@rubyrailways.com'
|
30
29
|
s.homepage = 'http://www.scrubyt.org'
|
31
30
|
s.add_dependency('hpricot', '>= 0.5')
|
32
|
-
s.add_dependency('mechanize', '>= 0.6.3')
|
31
|
+
s.add_dependency('mechanize', '>= 0.6.3')
|
32
|
+
#s.add_dependency('parsetree', '>= 1.7.0')
|
33
|
+
#s.add_dependency('ruby2ruby', '>= 1.1.5')
|
33
34
|
s.has_rdoc = 'true'
|
34
|
-
end
|
35
|
+
end
|
35
36
|
|
36
37
|
###################################################
|
37
38
|
# Tasks
|
@@ -47,12 +48,16 @@ Rake::RDocTask.new do |generate_rdoc|
|
|
47
48
|
generate_rdoc.options << '--line-numbers' << '--inline-source'
|
48
49
|
end
|
49
50
|
|
50
|
-
Rake::TestTask.new do |
|
51
|
-
|
52
|
-
end
|
51
|
+
Rake::TestTask.new(:test_all) do |task|
|
52
|
+
task.pattern = 'test/*_test.rb'
|
53
|
+
end
|
54
|
+
|
55
|
+
Rake::TestTask.new(:test_blackbox) do |task|
|
56
|
+
task.test_files = ['test/blackbox_test.rb']
|
57
|
+
end
|
53
58
|
|
54
|
-
|
55
|
-
|
59
|
+
Rake::TestTask.new(:test_non_blackbox) do |task|
|
60
|
+
task.test_files = FileList['test/*_test.rb'] - ['test/blackbox_test.rb']
|
56
61
|
end
|
57
62
|
|
58
63
|
task "cleanup_readme" do
|
@@ -77,12 +82,12 @@ end
|
|
77
82
|
task "generate_rdoc" do
|
78
83
|
end
|
79
84
|
|
80
|
-
Rake::GemPackageTask.new(gem_spec) do |pkg|
|
85
|
+
Rake::GemPackageTask.new(gem_spec) do |pkg|
|
81
86
|
pkg.need_zip = false
|
82
|
-
pkg.need_tar = false
|
87
|
+
pkg.need_tar = false
|
83
88
|
end
|
84
89
|
|
85
|
-
Rake::PackageTask.new('scrubyt-examples', '0.
|
90
|
+
Rake::PackageTask.new('scrubyt-examples', '0.3.0') do |pkg|
|
86
91
|
pkg.need_zip = true
|
87
92
|
pkg.need_tar = true
|
88
93
|
pkg.package_files.include("examples/**/*")
|
data/lib/scrubyt.rb
CHANGED
@@ -1,14 +1,34 @@
|
|
1
1
|
#ruby core
|
2
2
|
require 'open-uri'
|
3
|
+
require 'erb'
|
3
4
|
|
4
5
|
#gems
|
5
6
|
require 'rubygems'
|
6
7
|
require 'mechanize'
|
7
8
|
require 'hpricot'
|
8
|
-
require '
|
9
|
+
require 'parse_tree_reloaded'
|
10
|
+
|
11
|
+
#little hack to avoid that ruby2ruby tries to load the original parse_tree
|
12
|
+
if Gem
|
13
|
+
module Gem
|
14
|
+
class << self
|
15
|
+
alias_method :activate_orig, :activate
|
16
|
+
def activate(gem, autorequire, *version_requirements)
|
17
|
+
activate_orig(gem, autorequire, *version_requirements) unless gem.is_a?(Gem::Dependency) && gem.name == 'ParseTree'
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
module Kernel
|
23
|
+
alias_method :require_orig, :require
|
24
|
+
def require(path)
|
25
|
+
require_orig(path) unless path == 'parse_tree'
|
26
|
+
end
|
27
|
+
end
|
9
28
|
require 'ruby2ruby'
|
10
29
|
|
11
30
|
#scrubyt
|
31
|
+
require 'scrubyt/logging'
|
12
32
|
require 'scrubyt/utils/ruby_extensions.rb'
|
13
33
|
require 'scrubyt/utils/xpathutils.rb'
|
14
34
|
require 'scrubyt/utils/shared_utils.rb'
|
@@ -19,6 +39,8 @@ require 'scrubyt/core/scraping/constraint.rb'
|
|
19
39
|
require 'scrubyt/core/scraping/result_indexer.rb'
|
20
40
|
require 'scrubyt/core/scraping/pre_filter_document.rb'
|
21
41
|
require 'scrubyt/core/scraping/compound_example.rb'
|
42
|
+
require 'scrubyt/output/result_node.rb'
|
43
|
+
require 'scrubyt/output/scrubyt_result.rb'
|
22
44
|
require 'scrubyt/output/export.rb'
|
23
45
|
require 'scrubyt/core/shared/extractor.rb'
|
24
46
|
require 'scrubyt/core/scraping/filters/base_filter.rb'
|
@@ -29,10 +51,7 @@ require 'scrubyt/core/scraping/filters/html_subtree_filter.rb'
|
|
29
51
|
require 'scrubyt/core/scraping/filters/regexp_filter.rb'
|
30
52
|
require 'scrubyt/core/scraping/filters/tree_filter.rb'
|
31
53
|
require 'scrubyt/core/scraping/pattern.rb'
|
32
|
-
require 'scrubyt/output/result_dumper.rb'
|
33
|
-
require 'scrubyt/output/result.rb'
|
34
|
-
require 'scrubyt/output/post_processor.rb'
|
35
54
|
require 'scrubyt/core/navigation/navigation_actions.rb'
|
36
55
|
require 'scrubyt/core/navigation/fetch_action.rb'
|
37
56
|
require 'scrubyt/core/shared/evaluation_context.rb'
|
38
|
-
require 'scrubyt/core/shared/u_r_i_builder.rb'
|
57
|
+
require 'scrubyt/core/shared/u_r_i_builder.rb'
|
@@ -8,14 +8,13 @@ module Scrubyt
|
|
8
8
|
#which is loading a document (even by submitting a form or clicking a link)
|
9
9
|
#and related things like setting a proxy etc. you should find it here.
|
10
10
|
class FetchAction
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
end
|
11
|
+
|
12
|
+
@@current_doc_url = nil
|
13
|
+
@@current_doc_protocol = nil
|
14
|
+
@@base_dir = nil
|
15
|
+
@@host_name = nil
|
16
|
+
@@agent = WWW::Mechanize.new
|
17
|
+
@@history = []
|
19
18
|
|
20
19
|
##
|
21
20
|
#Action to fetch a document (either a file or a http address)
|
@@ -25,29 +24,38 @@ module Scrubyt
|
|
25
24
|
#_doc_url_ - the url or file name to fetch
|
26
25
|
def self.fetch(doc_url, *args)
|
27
26
|
#Refactor this crap!!! with option_accessor stuff
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
27
|
+
|
28
|
+
if args.size > 0
|
29
|
+
proxy = args[0][:proxy]
|
30
|
+
mechanize_doc = args[0][:mechanize_doc]
|
31
|
+
resolve = args[0][:resolve]
|
32
|
+
basic_auth = args[0][:basic_auth]
|
33
|
+
user_agent = args[0][:user_agent] || "Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)"
|
34
|
+
#Refactor this whole stuff as well!!! It looks awful...
|
35
|
+
parse_and_set_proxy(proxy) if proxy
|
36
|
+
set_user_agent(user_agent)
|
37
|
+
parse_and_set_basic_auth(basic_auth) if basic_auth
|
38
|
+
else
|
39
|
+
mechanize_doc = nil
|
40
|
+
resolve = :full
|
41
|
+
end
|
42
|
+
|
43
|
+
@@current_doc_url = doc_url
|
44
|
+
@@current_doc_protocol = determine_protocol
|
45
|
+
|
46
|
+
if mechanize_doc.nil? && @@current_doc_protocol != 'file'
|
40
47
|
handle_relative_path(doc_url)
|
41
|
-
handle_relative_url(doc_url,resolve)
|
42
|
-
|
43
|
-
|
48
|
+
handle_relative_url(doc_url, resolve)
|
49
|
+
|
50
|
+
Scrubyt.log :ACTION, "fetching document: #{@@current_doc_url}"
|
51
|
+
|
52
|
+
unless 'file' == @@current_doc_protocol
|
44
53
|
@@mechanize_doc = @@agent.get(@@current_doc_url)
|
45
54
|
end
|
46
55
|
else
|
47
|
-
@@current_doc_url = doc_url
|
48
56
|
@@mechanize_doc = mechanize_doc
|
49
|
-
@@current_doc_protocol = determine_protocol
|
50
57
|
end
|
58
|
+
|
51
59
|
if @@current_doc_protocol == 'file'
|
52
60
|
@@hpricot_doc = Hpricot(PreFilterDocument.br_to_newline(open(@@current_doc_url).read))
|
53
61
|
else
|
@@ -58,22 +66,24 @@ module Scrubyt
|
|
58
66
|
|
59
67
|
##
|
60
68
|
#Submit the last form;
|
61
|
-
def self.submit(current_form, button=nil)
|
62
|
-
|
69
|
+
def self.submit(current_form, button=nil, type=nil)
|
70
|
+
Scrubyt.log :ACTION, 'Submitting form...'
|
63
71
|
if button == nil
|
64
72
|
result_page = @@agent.submit(current_form)
|
73
|
+
elsif type
|
74
|
+
result_page = current_form.submit(button)
|
65
75
|
else
|
66
76
|
result_page = @@agent.submit(current_form, button)
|
67
77
|
end
|
68
78
|
@@current_doc_url = result_page.uri.to_s
|
69
|
-
|
79
|
+
Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
|
70
80
|
fetch(@@current_doc_url, :mechanize_doc => result_page)
|
71
81
|
end
|
72
82
|
|
73
83
|
##
|
74
84
|
#Click the link specified by the text
|
75
85
|
def self.click_link(link_spec,index = 0)
|
76
|
-
|
86
|
+
Scrubyt.log :ACTION, "Clicking link specified by: %p" % link_spec
|
77
87
|
if link_spec.is_a? Hash
|
78
88
|
clicked_elem = CompoundExampleLookup.find_node_from_compund_example(@@hpricot_doc, link_spec, false, index)
|
79
89
|
else
|
@@ -82,7 +92,16 @@ module Scrubyt
|
|
82
92
|
clicked_elem = XPathUtils.find_nearest_node_with_attribute(clicked_elem, 'href')
|
83
93
|
result_page = @@agent.click(clicked_elem)
|
84
94
|
@@current_doc_url = result_page.uri.to_s
|
85
|
-
|
95
|
+
Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
|
96
|
+
fetch(@@current_doc_url, :mechanize_doc => result_page)
|
97
|
+
end
|
98
|
+
|
99
|
+
def self.click_image_map(index = 0)
|
100
|
+
Scrubyt.log :ACTION, "Clicking image map at index: %p" % index
|
101
|
+
uri = @@mechanize_doc.search("//area")[index]['href']
|
102
|
+
result_page = @@agent.get(uri)
|
103
|
+
@@current_doc_url = result_page.uri.to_s
|
104
|
+
Scrubyt.log :ACTION, "Fetching #{@@current_doc_url}"
|
86
105
|
fetch(@@current_doc_url, :mechanize_doc => result_page)
|
87
106
|
end
|
88
107
|
|
@@ -118,6 +137,14 @@ module Scrubyt
|
|
118
137
|
@@hpricot_doc = @@history.pop
|
119
138
|
end
|
120
139
|
|
140
|
+
def self.store_host_name(doc_url)
|
141
|
+
@@host_name = 'http://' + @@mechanize_doc.uri.to_s.scan(/http:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'http'
|
142
|
+
@@host_name = 'https://' + @@mechanize_doc.uri.to_s.scan(/https:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'https'
|
143
|
+
@@host_name = doc_url if @@host_name == nil
|
144
|
+
@@host_name = @@host_name[0..-2] if @@host_name[-1].chr == '/'
|
145
|
+
@@original_host_name ||= @@host_name
|
146
|
+
end #end of method store_host_name
|
147
|
+
|
121
148
|
def self.determine_protocol
|
122
149
|
old_protocol = @@current_doc_protocol
|
123
150
|
new_protocol = case @@current_doc_url
|
@@ -149,18 +176,18 @@ module Scrubyt
|
|
149
176
|
exit
|
150
177
|
end
|
151
178
|
end
|
152
|
-
|
179
|
+
Scrubyt.log :ACTION, "Setting proxy: host=<#{@@host}>, port=<#{@@port}>"
|
153
180
|
@@agent.set_proxy(@@host, @@port)
|
154
181
|
end
|
155
182
|
|
156
183
|
def self.parse_and_set_basic_auth(basic_auth)
|
157
184
|
login, pass = basic_auth.split('@')
|
158
|
-
|
185
|
+
Scrubyt.log :ACTION, "Basic authentication: login=<#{login}>, pass=<#{pass}>"
|
159
186
|
@@agent.basic_auth(login, pass)
|
160
187
|
end
|
161
188
|
|
162
189
|
def self.set_user_agent(user_agent)
|
163
|
-
|
190
|
+
Scrubyt.log :ACTION, "Setting user-agent to #{user_agent}"
|
164
191
|
@@agent.user_agent = user_agent
|
165
192
|
end
|
166
193
|
|
@@ -172,22 +199,29 @@ module Scrubyt
|
|
172
199
|
end
|
173
200
|
end
|
174
201
|
|
175
|
-
def self.store_host_name(doc_url)
|
176
|
-
@@host_name = 'http://' + @@mechanize_doc.uri.to_s.scan(/http:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'http'
|
177
|
-
@@host_name = 'https://' + @@mechanize_doc.uri.to_s.scan(/https:\/\/(.+\/)+/).flatten[0] if @@current_doc_protocol == 'https'
|
178
|
-
@@host_name = doc_url if @@host_name == nil
|
179
|
-
@@host_name = @@host_name[0..-2] if @@host_name[-1].chr == '/'
|
180
|
-
@@original_host_name ||= @@host_name
|
181
|
-
end #end of method store_host_name
|
182
|
-
|
183
202
|
def self.handle_relative_url(doc_url, resolve)
|
184
203
|
return if doc_url =~ /^http/
|
204
|
+
if doc_url !~ /^\//
|
205
|
+
first_char = doc_url[0..0]
|
206
|
+
doc_url = ( first_char == '?' ? '' : '/' ) + doc_url
|
207
|
+
if first_char == '?' #This is an ugly hack... really have to throw this shit out and go with mechanize's
|
208
|
+
current_uri = @@mechanize_doc.uri.to_s
|
209
|
+
current_uri = @@agent.history.first.uri.to_s if current_uri =~ /\/popup\//
|
210
|
+
if (current_uri.include? '?')
|
211
|
+
current_uri = current_uri.scan(/.+\//)[0]
|
212
|
+
else
|
213
|
+
current_uri += '/' unless current_uri[-1..-1] == '/'
|
214
|
+
end
|
215
|
+
@@current_doc_url = current_uri + doc_url
|
216
|
+
return
|
217
|
+
end
|
218
|
+
end
|
185
219
|
case resolve
|
186
220
|
when :full
|
187
221
|
@@current_doc_url = (@@host_name + doc_url) if ( @@host_name != nil && (doc_url !~ /#{@@host_name}/))
|
188
222
|
@@current_doc_url = @@current_doc_url.split('/').uniq.join('/')
|
189
223
|
when :host
|
190
|
-
base_host_name = @@host_name.scan(/(http.+?\/\/.+?)\//)[0][0]
|
224
|
+
base_host_name = (@@host_name.count("/") == 2 ? @@host_name : @@host_name.scan(/(http.+?\/\/.+?)\//)[0][0])
|
191
225
|
@@current_doc_url = base_host_name + doc_url
|
192
226
|
else
|
193
227
|
#custom resilving
|
@@ -13,8 +13,10 @@ module Scrubyt
|
|
13
13
|
'fill_textarea',
|
14
14
|
'submit',
|
15
15
|
'click_link',
|
16
|
+
'click_image_map',
|
16
17
|
'select_option',
|
17
18
|
'check_checkbox',
|
19
|
+
'check_radiobutton',
|
18
20
|
'end']
|
19
21
|
|
20
22
|
def initialize
|
@@ -48,16 +50,20 @@ module Scrubyt
|
|
48
50
|
def self.select_option(selectlist_name, option)
|
49
51
|
lookup_form_for_tag('select','select list',selectlist_name,option)
|
50
52
|
select_list = @@current_form.fields.find {|f| f.name == selectlist_name}
|
51
|
-
searched_option = select_list.options.find{|f| f.text == option}
|
53
|
+
searched_option = select_list.options.find{|f| f.text.strip == option}
|
52
54
|
searched_option.click
|
53
55
|
end
|
54
56
|
|
55
57
|
def self.check_checkbox(checkbox_name)
|
56
|
-
puts checkbox_name
|
57
58
|
lookup_form_for_tag('input','checkbox',checkbox_name, '')
|
58
59
|
@@current_form.checkboxes.name(checkbox_name).check
|
59
60
|
end
|
60
61
|
|
62
|
+
def self.check_radiobutton(checkbox_name, index=0)
|
63
|
+
lookup_form_for_tag('input','radiobutton',checkbox_name, '',index)
|
64
|
+
@@current_form.radiobuttons.name(checkbox_name)[index].check
|
65
|
+
end
|
66
|
+
|
61
67
|
##
|
62
68
|
#Fetch the document
|
63
69
|
def self.fetch(*args)
|
@@ -65,9 +71,14 @@ module Scrubyt
|
|
65
71
|
end
|
66
72
|
##
|
67
73
|
#Submit the current form (delegate it to NavigationActions)
|
68
|
-
def self.submit(index=nil)
|
74
|
+
def self.submit(index=nil, type=nil)
|
69
75
|
if index == nil
|
70
76
|
FetchAction.submit(@@current_form)
|
77
|
+
#----- added by nickmerwin@gmail.com -----
|
78
|
+
elsif index.class == String
|
79
|
+
button = @@current_form.buttons.detect{|b| b.name == index}
|
80
|
+
FetchAction.submit(@@current_form, button,type)
|
81
|
+
#-----------------------------------------
|
71
82
|
else
|
72
83
|
FetchAction.submit(@@current_form, @@current_form.buttons[index])
|
73
84
|
end
|
@@ -79,6 +90,10 @@ module Scrubyt
|
|
79
90
|
FetchAction.click_link(link_spec,index)
|
80
91
|
end
|
81
92
|
|
93
|
+
def self.click_image_map(index=0)
|
94
|
+
FetchAction.click_image_map(index)
|
95
|
+
end
|
96
|
+
|
82
97
|
def self.get_hpricot_doc
|
83
98
|
FetchAction.get_hpricot_doc
|
84
99
|
end
|
@@ -92,10 +107,12 @@ module Scrubyt
|
|
92
107
|
end
|
93
108
|
|
94
109
|
private
|
95
|
-
def self.lookup_form_for_tag(tag,widget_name,name_attribute,query_string)
|
96
|
-
|
97
|
-
widget = (FetchAction.get_hpricot_doc/"#{tag}[@name=#{name_attribute}]").map()[
|
110
|
+
def self.lookup_form_for_tag(tag, widget_name, name_attribute, query_string, index=0)
|
111
|
+
Scrubyt.log :ACTION, "typing #{query_string} into the #{widget_name} named '#{name_attribute}'"
|
112
|
+
widget = (FetchAction.get_hpricot_doc/"#{tag}[@name=#{name_attribute}]").map()[index]
|
113
|
+
p widget
|
98
114
|
form_tag = Scrubyt::XPathUtils.traverse_up_until_name(widget, 'form')
|
115
|
+
p form_tag
|
99
116
|
find_form_based_on_tag(form_tag, ['name', 'id', 'action'])
|
100
117
|
end
|
101
118
|
|
@@ -112,6 +129,7 @@ private
|
|
112
129
|
loop do
|
113
130
|
@@current_form = FetchAction.get_mechanize_doc.forms[i]
|
114
131
|
return nil if @@current_form == nil
|
132
|
+
puts i
|
115
133
|
break if @@current_form.form_node.attributes[lookup_attribute_name] == lookup_attribute_value
|
116
134
|
i+= 1
|
117
135
|
end
|