mechanize 0.4.5 → 0.4.6
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- data/CHANGELOG +11 -0
- data/EXAMPLES +9 -0
- data/NOTES +29 -0
- data/lib/mechanize.rb +56 -11
- data/lib/mechanize/cookie.rb +6 -1
- data/lib/mechanize/form.rb +18 -5
- data/lib/mechanize/form_elements.rb +52 -2
- data/lib/mechanize/list.rb +23 -0
- data/lib/mechanize/mech_version.rb +1 -1
- data/lib/mechanize/module.rb +0 -16
- data/lib/mechanize/page.rb +33 -6
- data/lib/mechanize/page_elements.rb +23 -0
- data/test/htdocs/alt_text.html +9 -0
- data/test/htdocs/bad_form_test.html +9 -0
- data/test/htdocs/form_test.html +1 -0
- data/test/server.rb +1 -0
- data/test/servlets.rb +8 -1
- data/test/tc_errors.rb +62 -0
- data/test/tc_forms.rb +32 -0
- data/test/tc_links.rb +13 -0
- data/test/tc_mech.rb +6 -0
- data/test/ts_mech.rb +1 -0
- metadata +86 -79
data/CHANGELOG
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
== 0.4.6
|
2
|
+
|
3
|
+
* Added support for proxies
|
4
|
+
* Added a uri field to WWW::Link
|
5
|
+
* Added a error class WWW::Mechanize::ContentTypeError
|
6
|
+
* Added image alt text to link text
|
7
|
+
* Added an visited? method to WWW::Mechanize
|
8
|
+
* Added Array#value= which will set the first value to the argument. That
|
9
|
+
allows syntax as such: form.fields.name('q').value = 'xyz'
|
10
|
+
Before it was like this: form.fields.name('q').first.value = 'xyz'
|
11
|
+
|
1
12
|
== 0.4.5
|
2
13
|
|
3
14
|
* Added support for multiple values of the same name
|
data/EXAMPLES
CHANGED
@@ -96,3 +96,12 @@ the original filter.
|
|
96
96
|
page = agent.get('http://google.com/')
|
97
97
|
page.links.each { |l| puts l.text }
|
98
98
|
|
99
|
+
== Using a proxy
|
100
|
+
|
101
|
+
require 'rubygems'
|
102
|
+
require 'mechanize'
|
103
|
+
|
104
|
+
agent = WWW::Mechanize.new
|
105
|
+
agent.set_proxy('localhost', '8000')
|
106
|
+
page = agent.get(ARGV[0])
|
107
|
+
puts page.body
|
data/NOTES
CHANGED
@@ -1,5 +1,34 @@
|
|
1
1
|
= Mechanize Release Notes
|
2
2
|
|
3
|
+
== 0.4.6
|
4
|
+
|
5
|
+
The 0.4.6 release comes with proxy support which can be enabled by calling
|
6
|
+
the set_proxy method on your WWW::Mechanize object. Once you have set your
|
7
|
+
proxy settings, all mechanize requests will go through the proxy.
|
8
|
+
|
9
|
+
A new "visited?" method has been added to WWW::Mechanize so that you can see
|
10
|
+
if any particular URL is in your history.
|
11
|
+
|
12
|
+
Image alt text support has been added to links. If a link contains an image
|
13
|
+
with no text, the alt text of the image will be used. For example:
|
14
|
+
|
15
|
+
<a href="foo.html><img src="foo.gif" alt="Foo Image"></a>
|
16
|
+
|
17
|
+
This link will contain the text "Foo Image", and can be found like this:
|
18
|
+
|
19
|
+
link = page.links.text('Foo Image')
|
20
|
+
|
21
|
+
Lists of things have been updated so that you can set a value without
|
22
|
+
specifying the position in the array. It will just assume that you want to
|
23
|
+
set the value on the first element. For example, the following two statements
|
24
|
+
are equivalent:
|
25
|
+
|
26
|
+
form.fields.name('q').first.value = 'xyz' # Old syntax
|
27
|
+
form.fields.name('q').value = 'xyz' # New syntax
|
28
|
+
|
29
|
+
This new syntax comes with a note of caution; make sure you know you want to
|
30
|
+
set only the first value. There could be multiple fields with the name 'q'.
|
31
|
+
|
3
32
|
== 0.4.5
|
4
33
|
|
5
34
|
This release comes with a new filtering system. You can now manipulate the
|
data/lib/mechanize.rb
CHANGED
@@ -20,6 +20,7 @@ require 'webrick'
|
|
20
20
|
require 'date'
|
21
21
|
require 'web/htmltools/xmltree' # narf
|
22
22
|
require 'mechanize/module'
|
23
|
+
require 'mechanize/list'
|
23
24
|
require 'mechanize/parsing'
|
24
25
|
require 'mechanize/cookie'
|
25
26
|
require 'mechanize/form'
|
@@ -51,8 +52,8 @@ end
|
|
51
52
|
# agent = WWW::Mechanize.new { |a| a.log = Logger.new("mech.log") }
|
52
53
|
# agent.user_agent_alias = 'Mac Safari'
|
53
54
|
# page = agent.get("http://www.google.com/")
|
54
|
-
# search_form = page.forms.
|
55
|
-
# search_form.fields.
|
55
|
+
# search_form = page.forms.name("f").first
|
56
|
+
# search_form.fields.name("q").value = "Hello"
|
56
57
|
# search_results = agent.submit(search_form)
|
57
58
|
# puts search_results.body
|
58
59
|
class Mechanize
|
@@ -60,7 +61,8 @@ class Mechanize
|
|
60
61
|
AGENT_ALIASES = {
|
61
62
|
'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
|
62
63
|
'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
|
63
|
-
'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en
|
64
|
+
'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/418 (KHTML, like Gecko) Safari/417.9.3',
|
65
|
+
'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3',
|
64
66
|
'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
|
65
67
|
'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
|
66
68
|
'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
|
@@ -87,13 +89,25 @@ class Mechanize
|
|
87
89
|
@body_filter = lambda { |body| body }
|
88
90
|
@cookie_jar = CookieJar.new
|
89
91
|
@log = Logger.new(nil)
|
92
|
+
@proxy_addr = nil
|
93
|
+
@proxy_port = nil
|
94
|
+
@proxy_user = nil
|
95
|
+
@proxy_pass = nil
|
90
96
|
yield self if block_given?
|
91
97
|
end
|
92
98
|
|
99
|
+
# Sets the proxy address, port, user, and password
|
100
|
+
def set_proxy(addr, port, user = nil, pass = nil)
|
101
|
+
@proxy_addr, @proxy_port, @proxy_user, @proxy_pass = addr, port, user, pass
|
102
|
+
end
|
103
|
+
|
104
|
+
# Set the user agent for the Mechanize object.
|
105
|
+
# See AGENT_ALIASES
|
93
106
|
def user_agent_alias=(al)
|
94
107
|
self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias")
|
95
108
|
end
|
96
109
|
|
110
|
+
# Returns a list of cookies stored in the cookie jar.
|
97
111
|
def cookies
|
98
112
|
cookies = []
|
99
113
|
@cookie_jar.jar.each_key do |domain|
|
@@ -104,6 +118,7 @@ class Mechanize
|
|
104
118
|
cookies
|
105
119
|
end
|
106
120
|
|
121
|
+
# Sets the user and password to be used for basic authentication.
|
107
122
|
def basic_auth(user, password)
|
108
123
|
@user = user
|
109
124
|
@password = password
|
@@ -114,6 +129,7 @@ class Mechanize
|
|
114
129
|
basic_auth(user, password)
|
115
130
|
end
|
116
131
|
|
132
|
+
# Fetches the URL passed in.
|
117
133
|
def get(url)
|
118
134
|
cur_page = current_page() || Page.new
|
119
135
|
|
@@ -123,6 +139,7 @@ class Mechanize
|
|
123
139
|
page
|
124
140
|
end
|
125
141
|
|
142
|
+
# Posts to the given URL wht the query parameters passed in.
|
126
143
|
def post(url, query={})
|
127
144
|
cur_page = current_page() || Page.new
|
128
145
|
|
@@ -141,11 +158,14 @@ class Mechanize
|
|
141
158
|
page
|
142
159
|
end
|
143
160
|
|
161
|
+
# Clicks the WWW::Link object passed in.
|
144
162
|
def click(link)
|
145
163
|
uri = to_absolute_uri(link.href)
|
146
164
|
get(uri)
|
147
165
|
end
|
148
166
|
|
167
|
+
# Equivalent to the browser back button. Returns the most recent page
|
168
|
+
# visited.
|
149
169
|
def back
|
150
170
|
@history.pop
|
151
171
|
end
|
@@ -154,25 +174,36 @@ class Mechanize
|
|
154
174
|
form.add_button_to_query(button) if button
|
155
175
|
query = form.build_query
|
156
176
|
|
157
|
-
uri = to_absolute_uri(
|
177
|
+
uri = to_absolute_uri(form.action)
|
158
178
|
case form.method.upcase
|
159
179
|
when 'POST'
|
160
180
|
post_form(uri, form)
|
161
181
|
when 'GET'
|
162
182
|
if uri.query.nil?
|
163
|
-
|
183
|
+
uri.query = WWW::Mechanize.build_query_string(query)
|
164
184
|
else
|
165
|
-
|
185
|
+
uri.query = uri.query + "&" + WWW::Mechanize.build_query_string(query)
|
166
186
|
end
|
187
|
+
get(uri)
|
167
188
|
else
|
168
189
|
raise 'unsupported method'
|
169
190
|
end
|
170
191
|
end
|
171
192
|
|
193
|
+
# Returns the current page loaded by Mechanize
|
172
194
|
def current_page
|
173
195
|
@history.last
|
174
196
|
end
|
175
197
|
|
198
|
+
# Returns whether or not a url has been visited
|
199
|
+
def visited?(url)
|
200
|
+
if url.is_a?(WWW::Link)
|
201
|
+
url = url.uri
|
202
|
+
end
|
203
|
+
uri = to_absolute_uri(url)
|
204
|
+
! @history.find { |h| h.uri.to_s == uri.to_s }.nil?
|
205
|
+
end
|
206
|
+
|
176
207
|
alias page current_page
|
177
208
|
|
178
209
|
private
|
@@ -181,13 +212,13 @@ class Mechanize
|
|
181
212
|
if url.is_a?(URI)
|
182
213
|
uri = url
|
183
214
|
else
|
184
|
-
uri = URI.parse(url)
|
215
|
+
uri = URI.parse(url.gsub(/\s/, '%20'))
|
185
216
|
end
|
186
217
|
|
187
218
|
# construct an absolute uri
|
188
219
|
if uri.relative?
|
189
|
-
if cur_page
|
190
|
-
uri = cur_page.uri + url
|
220
|
+
if cur_page.uri
|
221
|
+
uri = cur_page.uri + (url.is_a?(URI) ? url : URI::escape(url))
|
191
222
|
else
|
192
223
|
raise 'no history. please specify an absolute URL'
|
193
224
|
end
|
@@ -222,7 +253,13 @@ class Mechanize
|
|
222
253
|
|
223
254
|
page = Page.new(uri)
|
224
255
|
|
225
|
-
http = Net::HTTP.new(uri.host,
|
256
|
+
http = Net::HTTP.new( uri.host,
|
257
|
+
uri.port,
|
258
|
+
@proxy_addr,
|
259
|
+
@proxy_port,
|
260
|
+
@proxy_user,
|
261
|
+
@proxy_pass
|
262
|
+
)
|
226
263
|
|
227
264
|
if uri.scheme == 'https'
|
228
265
|
http.use_ssl = true
|
@@ -306,7 +343,7 @@ class Mechanize
|
|
306
343
|
return page
|
307
344
|
when "301", "302"
|
308
345
|
log.info("follow redirect to: #{ response['Location'] }")
|
309
|
-
return fetch_page(to_absolute_uri(response['Location'], page), :get, page)
|
346
|
+
return fetch_page(to_absolute_uri(URI.parse(response['Location'].gsub(/ /, '%20')), page), :get, page)
|
310
347
|
else
|
311
348
|
raise ResponseCodeError.new(page.code), "Unhandled response", caller
|
312
349
|
end
|
@@ -333,6 +370,14 @@ class Mechanize
|
|
333
370
|
end
|
334
371
|
end
|
335
372
|
|
373
|
+
class ContentTypeError < RuntimeError
|
374
|
+
attr_reader :content_type
|
375
|
+
|
376
|
+
def initialize(content_type)
|
377
|
+
@content_type = content_type
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
336
381
|
end
|
337
382
|
|
338
383
|
end # module WWW
|
data/lib/mechanize/cookie.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'date'
|
2
2
|
|
3
3
|
module WWW
|
4
|
+
# This class is used to represent an HTTP Cookie.
|
4
5
|
class Cookie
|
5
6
|
attr_reader :name, :value, :path, :domain, :expires, :secure
|
6
7
|
def initialize(cookie)
|
@@ -21,7 +22,7 @@ module WWW
|
|
21
22
|
cookie_text.split(/; ?/).each do |data|
|
22
23
|
name, value = data.split('=', 2)
|
23
24
|
next unless name
|
24
|
-
cookie[name] = value
|
25
|
+
cookie[name.strip] = value
|
25
26
|
end
|
26
27
|
|
27
28
|
cookie_values[:path] = cookie.delete(
|
@@ -116,12 +117,15 @@ module WWW
|
|
116
117
|
end
|
117
118
|
end
|
118
119
|
|
120
|
+
# This class is used to manage the Cookies that have been returned from
|
121
|
+
# any particular website.
|
119
122
|
class CookieJar
|
120
123
|
attr_accessor :jar
|
121
124
|
def initialize
|
122
125
|
@jar = {}
|
123
126
|
end
|
124
127
|
|
128
|
+
# Add a cookie to the Jar.
|
125
129
|
def add(cookie)
|
126
130
|
unless @jar.has_key?(cookie.domain)
|
127
131
|
@jar[cookie.domain] = Hash.new
|
@@ -130,6 +134,7 @@ module WWW
|
|
130
134
|
@jar[cookie.domain][cookie.name] = cookie
|
131
135
|
end
|
132
136
|
|
137
|
+
# Fetch the cookies that should be used for the URI object passed in.
|
133
138
|
def cookies(url)
|
134
139
|
cookies = []
|
135
140
|
@jar.each_key do |domain|
|
data/lib/mechanize/form.rb
CHANGED
@@ -110,11 +110,11 @@ module WWW
|
|
110
110
|
end
|
111
111
|
|
112
112
|
def parse
|
113
|
-
@fields
|
114
|
-
@buttons
|
115
|
-
@file_uploads =
|
116
|
-
@radiobuttons =
|
117
|
-
@checkboxes
|
113
|
+
@fields = WWW::Mechanize::List.new
|
114
|
+
@buttons = WWW::Mechanize::List.new
|
115
|
+
@file_uploads = WWW::Mechanize::List.new
|
116
|
+
@radiobuttons = WWW::Mechanize::List.new
|
117
|
+
@checkboxes = WWW::Mechanize::List.new
|
118
118
|
|
119
119
|
@elements_node.each_recursive {|node|
|
120
120
|
case node.name.downcase
|
@@ -140,6 +140,19 @@ module WWW
|
|
140
140
|
end
|
141
141
|
}
|
142
142
|
end
|
143
|
+
|
144
|
+
def inspect
|
145
|
+
string = "Form: ['#{@name}' -> #{@action}]\n"
|
146
|
+
string << "[radiobuttons]\n"
|
147
|
+
@radiobuttons.each { |f| string << f.inspect }
|
148
|
+
string << "[checkboxes]\n"
|
149
|
+
@checkboxes.each { |f| string << f.inspect }
|
150
|
+
string << "[fields]\n"
|
151
|
+
@fields.each { |f| string << f.inspect }
|
152
|
+
string << "[buttons]\n"
|
153
|
+
@buttons.each { |f| string << f.inspect }
|
154
|
+
string
|
155
|
+
end
|
143
156
|
|
144
157
|
private
|
145
158
|
def rand_string(len = 10)
|
@@ -1,4 +1,10 @@
|
|
1
1
|
module WWW
|
2
|
+
# This class represents a field in a form. It handles the following input
|
3
|
+
# tags found in a form:
|
4
|
+
# text, password, hidden, int, textarea
|
5
|
+
#
|
6
|
+
# To set the value of a field, just use the value method:
|
7
|
+
# field.value = "foo"
|
2
8
|
class Field
|
3
9
|
attr_accessor :name, :value
|
4
10
|
|
@@ -19,8 +25,17 @@ module WWW
|
|
19
25
|
}
|
20
26
|
return fields
|
21
27
|
end
|
28
|
+
|
29
|
+
def inspect
|
30
|
+
"#{name} = #{@value}\n"
|
31
|
+
end
|
22
32
|
end
|
23
33
|
|
34
|
+
# This class represents a file upload field found in a form. To use this
|
35
|
+
# class, set WWW::FileUpload#file_data= to the data of the file you want
|
36
|
+
# to upload and WWW::FileUpload#mime_type= to the appropriate mime type
|
37
|
+
# of the file.
|
38
|
+
# See the example in EXAMPLES[link://files/EXAMPLES.html]
|
24
39
|
class FileUpload
|
25
40
|
# value is the file-name, not the file-content
|
26
41
|
attr_accessor :name
|
@@ -33,6 +48,7 @@ module WWW
|
|
33
48
|
end
|
34
49
|
end
|
35
50
|
|
51
|
+
# This class represents a Submit button in a form.
|
36
52
|
class Button
|
37
53
|
attr_accessor :name, :value
|
38
54
|
|
@@ -55,8 +71,14 @@ module WWW
|
|
55
71
|
}
|
56
72
|
return buttons
|
57
73
|
end
|
74
|
+
|
75
|
+
def inspect
|
76
|
+
"#{name} = #{@value}\n"
|
77
|
+
end
|
58
78
|
end
|
59
79
|
|
80
|
+
# This class represents an image button in a form. Use the x and y methods
|
81
|
+
# to set the x and y positions for where the mouse "clicked".
|
60
82
|
class ImageButton < Button
|
61
83
|
attr_accessor :x, :y
|
62
84
|
|
@@ -69,29 +91,47 @@ module WWW
|
|
69
91
|
end
|
70
92
|
end
|
71
93
|
|
94
|
+
# This class represents a radio button found in a Form. To activate the
|
95
|
+
# RadioButton in the Form, set the checked method to true.
|
72
96
|
class RadioButton
|
73
97
|
attr_accessor :name, :value, :checked
|
74
98
|
|
75
99
|
def initialize(name, value, checked)
|
76
100
|
@name, @value, @checked = name, value, checked
|
77
101
|
end
|
102
|
+
|
103
|
+
def inspect
|
104
|
+
"#{name} = #{@value}\n"
|
105
|
+
end
|
78
106
|
end
|
79
107
|
|
108
|
+
# This class represents a check box found in a Form. To activate the
|
109
|
+
# CheckBox in the Form, set the checked method to true.
|
80
110
|
class CheckBox
|
81
111
|
attr_accessor :name, :value, :checked
|
82
112
|
|
83
113
|
def initialize(name, value, checked)
|
84
114
|
@name, @value, @checked = name, value, checked
|
85
115
|
end
|
116
|
+
|
117
|
+
def inspect
|
118
|
+
"#{name} = #{@value}\n"
|
119
|
+
end
|
86
120
|
end
|
87
121
|
|
122
|
+
# This class represents a select list or drop down box in a Form. Set the
|
123
|
+
# value for the list by calling SelectList#value=. SelectList contains a
|
124
|
+
# list of Option that were found. After finding the correct option, set
|
125
|
+
# the select lists value to the option value:
|
126
|
+
# selectlist.value = selectlist.options.first.value
|
88
127
|
class SelectList
|
89
|
-
attr_accessor :name, :
|
128
|
+
attr_accessor :name, :options
|
129
|
+
attr_reader :value
|
90
130
|
|
91
131
|
def initialize(name, node)
|
92
132
|
@name = name
|
93
133
|
@value = nil
|
94
|
-
@options =
|
134
|
+
@options = WWW::Mechanize::List.new
|
95
135
|
|
96
136
|
# parse
|
97
137
|
node.each_recursive {|n|
|
@@ -103,8 +143,18 @@ module WWW
|
|
103
143
|
}
|
104
144
|
@value = @options.first.value if (@value == nil && @options.first)
|
105
145
|
end
|
146
|
+
|
147
|
+
def value=(value)
|
148
|
+
@value = value.to_s
|
149
|
+
end
|
150
|
+
|
151
|
+
def inspect
|
152
|
+
"#{name} = #{@value}\n"
|
153
|
+
end
|
106
154
|
end
|
107
155
|
|
156
|
+
# This class contains option an option found within SelectList. A
|
157
|
+
# SelectList can have many Option classes associated with it.
|
108
158
|
class Option
|
109
159
|
attr_reader :value, :selected, :text
|
110
160
|
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class List < Array
|
4
|
+
def with
|
5
|
+
self
|
6
|
+
end
|
7
|
+
|
8
|
+
def value=(arg)
|
9
|
+
first().value=(arg)
|
10
|
+
end
|
11
|
+
|
12
|
+
alias :and :with
|
13
|
+
|
14
|
+
def method_missing(meth_sym, arg)
|
15
|
+
if arg.class == Regexp
|
16
|
+
WWW::Mechanize::List.new(find_all { |e| e.send(meth_sym) =~ arg })
|
17
|
+
else
|
18
|
+
WWW::Mechanize::List.new(find_all { |e| e.send(meth_sym) == arg })
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/mechanize/module.rb
CHANGED
@@ -19,19 +19,3 @@ class Module
|
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
-
class Array
|
23
|
-
def with
|
24
|
-
self
|
25
|
-
end
|
26
|
-
|
27
|
-
alias :and :with
|
28
|
-
|
29
|
-
def method_missing(meth_sym, arg)
|
30
|
-
if arg.class == Regexp
|
31
|
-
find_all { |e| e.send(meth_sym) =~ arg }
|
32
|
-
else
|
33
|
-
find_all { |e| e.send(meth_sym) == arg }
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
data/lib/mechanize/page.rb
CHANGED
@@ -44,29 +44,38 @@ module WWW
|
|
44
44
|
@body_filter = lambda { |body| body }
|
45
45
|
end
|
46
46
|
|
47
|
+
# Set the body filter for the page. The body should be a Proc object that
|
48
|
+
# returns what the body should be set to. For example, replace all
|
49
|
+
# occurrences of 'foo' with 'bar':
|
50
|
+
# page.body_filter = lambda { |body| body.gsub(/foo/, bar) }
|
47
51
|
def body_filter=(filter)
|
48
52
|
@body_filter = filter
|
49
53
|
parse_html()
|
50
54
|
end
|
51
55
|
|
56
|
+
# Get the response header
|
52
57
|
def header
|
53
58
|
@response.header
|
54
59
|
end
|
55
60
|
|
61
|
+
# Get the content type
|
56
62
|
def content_type
|
57
63
|
@response['Content-Type']
|
58
64
|
end
|
59
65
|
|
66
|
+
# Get a list of Form associated with this page.
|
60
67
|
def forms(*args)
|
61
68
|
parse_html() unless @forms
|
62
69
|
find_forms(*args)
|
63
70
|
end
|
64
71
|
|
72
|
+
# Get a list of Link associated with this page.
|
65
73
|
def links(*args)
|
66
74
|
parse_html() unless @links
|
67
75
|
find_links(*args)
|
68
76
|
end
|
69
77
|
|
78
|
+
# Get the root XML parse tree for this page.
|
70
79
|
def root
|
71
80
|
parse_html() unless @root
|
72
81
|
@root
|
@@ -80,25 +89,43 @@ module WWW
|
|
80
89
|
find_watches(*args)
|
81
90
|
end
|
82
91
|
|
92
|
+
# Get a list of Meta links, usually used for refreshing the page.
|
83
93
|
def meta(*args)
|
84
94
|
parse_html() unless @meta
|
85
95
|
find_meta(*args)
|
86
96
|
end
|
87
97
|
|
98
|
+
# Get a list of Frame from the page
|
88
99
|
def frames(*args)
|
89
100
|
parse_html() unless @frames
|
90
101
|
find_frames(*args)
|
91
102
|
end
|
92
103
|
|
104
|
+
# Get a list of IFrame from the page
|
93
105
|
def iframes(*args)
|
94
106
|
parse_html() unless @iframes
|
95
107
|
find_iframes(*args)
|
96
108
|
end
|
97
109
|
|
110
|
+
def inspect
|
111
|
+
string = "[meta]\n"
|
112
|
+
meta.each { |l| string << l.inspect }
|
113
|
+
string << "[frames]\n"
|
114
|
+
frames.each { |l| string << l.inspect }
|
115
|
+
string << "[iframes]\n"
|
116
|
+
iframes.each { |l| string << l.inspect }
|
117
|
+
string << "[links]\n"
|
118
|
+
links.each { |l| string << l.inspect }
|
119
|
+
string << "[forms]\n"
|
120
|
+
forms.each { |l| string << l.inspect }
|
121
|
+
string
|
122
|
+
end
|
123
|
+
|
98
124
|
private
|
99
125
|
|
100
126
|
def parse_html
|
101
|
-
raise
|
127
|
+
raise Mechanize::ContentTypeError.new(content_type()) unless
|
128
|
+
content_type() =~ /^text\/html/
|
102
129
|
|
103
130
|
# construct parser and feed with HTML
|
104
131
|
parser = HTMLTree::XMLParser.new
|
@@ -120,11 +147,11 @@ module WWW
|
|
120
147
|
|
121
148
|
@root = parser.document
|
122
149
|
|
123
|
-
@forms =
|
124
|
-
@links =
|
125
|
-
@meta =
|
126
|
-
@frames =
|
127
|
-
@iframes =
|
150
|
+
@forms = WWW::Mechanize::List.new
|
151
|
+
@links = WWW::Mechanize::List.new
|
152
|
+
@meta = WWW::Mechanize::List.new
|
153
|
+
@frames = WWW::Mechanize::List.new
|
154
|
+
@iframes = WWW::Mechanize::List.new
|
128
155
|
@watches = {}
|
129
156
|
|
130
157
|
@root.each_recursive {|node|
|
@@ -3,11 +3,30 @@ module WWW
|
|
3
3
|
attr_reader :node
|
4
4
|
attr_reader :href
|
5
5
|
attr_reader :text
|
6
|
+
alias :to_s :text
|
6
7
|
|
7
8
|
def initialize(node)
|
8
9
|
@node = node
|
9
10
|
@href = node.attributes['href']
|
10
11
|
@text = node.all_text
|
12
|
+
|
13
|
+
# If there is no text, try to find an image and use it's alt text
|
14
|
+
if (@text.nil? || @text.length == 0) && @node.has_elements?
|
15
|
+
@text = ''
|
16
|
+
@node.each_element { |e|
|
17
|
+
if e.name == 'img'
|
18
|
+
@text << (e.has_attributes? ? e.attributes['alt'] || '' : '')
|
19
|
+
end
|
20
|
+
}
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def uri
|
25
|
+
URI.parse(@href)
|
26
|
+
end
|
27
|
+
|
28
|
+
def inspect
|
29
|
+
"'#{@text}' -> #{@href}\n"
|
11
30
|
end
|
12
31
|
end
|
13
32
|
|
@@ -24,5 +43,9 @@ module WWW
|
|
24
43
|
@name = node.attributes['name']
|
25
44
|
@src = node.attributes['src']
|
26
45
|
end
|
46
|
+
|
47
|
+
def inspect
|
48
|
+
"'#{@name}' -> #{@src}\n"
|
49
|
+
end
|
27
50
|
end
|
28
51
|
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<html>
|
2
|
+
<meta http-equiv="Refresh" content="0; url=http://www.drphil.com/">
|
3
|
+
<body>
|
4
|
+
<a href="alt_text.html"><img alt="alt text" src="hello"></a>
|
5
|
+
<a href="no_alt_text.html"><img src="hello"></a>
|
6
|
+
<a href="no_image.html">no image</a>
|
7
|
+
<a href="no_text.html"></a>
|
8
|
+
</body>
|
9
|
+
</html>
|
data/test/htdocs/form_test.html
CHANGED
data/test/server.rb
CHANGED
@@ -19,6 +19,7 @@ s.mount("/form_post", FormTest)
|
|
19
19
|
s.mount("/form post", FormTest)
|
20
20
|
s.mount("/response_code", ResponseCodeTest)
|
21
21
|
s.mount("/file_upload", FileUploadTest)
|
22
|
+
s.mount("/bad_content_type", BadContentTypeTest)
|
22
23
|
|
23
24
|
htpasswd = WEBrick::HTTPAuth::Htpasswd.new(base_dir + '/data/htpasswd')
|
24
25
|
auth = WEBrick::HTTPAuth::BasicAuth.new(
|
data/test/servlets.rb
CHANGED
@@ -2,6 +2,13 @@ require 'webrick'
|
|
2
2
|
require 'logger'
|
3
3
|
require 'date'
|
4
4
|
|
5
|
+
class BadContentTypeTest < WEBrick::HTTPServlet::AbstractServlet
|
6
|
+
def do_GET(req, res)
|
7
|
+
res['Content-Type'] = "text/xml"
|
8
|
+
res.body = "Hello World"
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
5
12
|
class FileUploadTest < WEBrick::HTTPServlet::AbstractServlet
|
6
13
|
def do_POST(req, res)
|
7
14
|
res.body = req.body
|
@@ -29,7 +36,7 @@ class FormTest < WEBrick::HTTPServlet::AbstractServlet
|
|
29
36
|
res.body = "<HTML><body>"
|
30
37
|
req.query.each_key { |k|
|
31
38
|
req.query[k].each_data { |data|
|
32
|
-
res.body << "<a href=\"#\">#{k}:#{data}</a><br />"
|
39
|
+
res.body << "<a href=\"#\">#{URI.unescape(k)}:#{URI.unescape(data)}</a><br />"
|
33
40
|
}
|
34
41
|
}
|
35
42
|
res.body << "</body></HTML>"
|
data/test/tc_errors.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'rubygems'
|
5
|
+
require 'mechanize'
|
6
|
+
require 'test_includes'
|
7
|
+
|
8
|
+
class MechErrorsTest < Test::Unit::TestCase
|
9
|
+
include TestMethods
|
10
|
+
|
11
|
+
def test_content_type_error
|
12
|
+
agent = WWW::Mechanize.new { |a| a.log = Logger.new(nil) }
|
13
|
+
page = agent.get("http://localhost:#{@port}/bad_content_type")
|
14
|
+
assert_raise(WWW::Mechanize::ContentTypeError) {
|
15
|
+
page.root
|
16
|
+
}
|
17
|
+
begin
|
18
|
+
page.root
|
19
|
+
rescue WWW::Mechanize::ContentTypeError => ex
|
20
|
+
assert_equal('text/xml', ex.content_type)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_bad_form_method
|
25
|
+
agent = WWW::Mechanize.new { |a| a.log = Logger.new(nil) }
|
26
|
+
page = agent.get("http://localhost:#{@port}/bad_form_test.html")
|
27
|
+
assert_raise(RuntimeError) {
|
28
|
+
agent.submit(page.forms.first)
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_too_many_radio
|
33
|
+
agent = WWW::Mechanize.new { |a| a.log = Logger.new(nil) }
|
34
|
+
page = agent.get("http://localhost:#{@port}/form_test.html")
|
35
|
+
form = page.forms.name('post_form1').first
|
36
|
+
form.radiobuttons.each { |r| r.checked = true }
|
37
|
+
assert_raise(RuntimeError) {
|
38
|
+
agent.submit(form)
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_unknown_agent
|
43
|
+
agent = WWW::Mechanize.new { |a| a.log = Logger.new(nil) }
|
44
|
+
assert_raise(RuntimeError) {
|
45
|
+
agent.user_agent_alias = "Aaron's Browser"
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_bad_url
|
50
|
+
agent = WWW::Mechanize.new { |a| a.log = Logger.new(nil) }
|
51
|
+
assert_raise(RuntimeError) {
|
52
|
+
agent.get('/foo.html')
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_unsupported_scheme
|
57
|
+
agent = WWW::Mechanize.new { |a| a.log = Logger.new(nil) }
|
58
|
+
assert_raise(RuntimeError) {
|
59
|
+
agent.get('ftp://server.com/foo.html')
|
60
|
+
}
|
61
|
+
end
|
62
|
+
end
|
data/test/tc_forms.rb
CHANGED
@@ -129,6 +129,38 @@ class FormsMechTest < Test::Unit::TestCase
|
|
129
129
|
)
|
130
130
|
end
|
131
131
|
|
132
|
+
def test_select_box
|
133
|
+
agent = WWW::Mechanize.new { |a| a.log = Logger.new(nil) }
|
134
|
+
page = agent.get("http://localhost:#{@port}/form_test.html")
|
135
|
+
post_form = page.forms.find { |f| f.name == "post_form1" }
|
136
|
+
assert_not_nil(post_form, "Post form is null")
|
137
|
+
assert_not_nil(page.header)
|
138
|
+
assert_not_nil(page.root)
|
139
|
+
assert_equal(0, page.iframes.length)
|
140
|
+
assert_equal("post", post_form.method.downcase)
|
141
|
+
assert_equal("/form_post", post_form.action)
|
142
|
+
|
143
|
+
# Find the select list
|
144
|
+
s = post_form.fields.name(/country/).first
|
145
|
+
assert_not_nil(s, "Couldn't find country select list")
|
146
|
+
assert_equal(2, s.options.length)
|
147
|
+
assert_equal("USA", s.value)
|
148
|
+
assert_equal("USA", s.options.first.value)
|
149
|
+
assert_equal("USA", s.options.first.text)
|
150
|
+
assert_equal("CANADA", s.options[1].value)
|
151
|
+
assert_equal("CANADA", s.options[1].text)
|
152
|
+
|
153
|
+
# Now set all the fields
|
154
|
+
post_form.fields.name(/country/).value = s.options[1]
|
155
|
+
page = agent.submit(post_form, post_form.buttons.first)
|
156
|
+
|
157
|
+
# Check that the submitted fields exist
|
158
|
+
assert_not_nil(
|
159
|
+
page.links.find { |l| l.text == "country:CANADA" },
|
160
|
+
"select box not submitted"
|
161
|
+
)
|
162
|
+
end
|
163
|
+
|
132
164
|
def test_get
|
133
165
|
agent = WWW::Mechanize.new { |a| a.log = Logger.new(nil) }
|
134
166
|
page = agent.get("http://localhost:#{@port}/form_test.html")
|
data/test/tc_links.rb
CHANGED
@@ -21,4 +21,17 @@ class LinksMechTest < Test::Unit::TestCase
|
|
21
21
|
page = agent.get("http://localhost:#{@port}/find_link.html")
|
22
22
|
assert_equal(15, page.links.length)
|
23
23
|
end
|
24
|
+
|
25
|
+
def test_alt_text
|
26
|
+
agent = WWW::Mechanize.new { |a| a.log = Logger.new(nil) }
|
27
|
+
page = agent.get("http://localhost:#{@port}/alt_text.html")
|
28
|
+
assert_equal(4, page.links.length)
|
29
|
+
assert_equal(1, page.meta.length)
|
30
|
+
|
31
|
+
assert_equal('', page.meta.first.text)
|
32
|
+
assert_equal('alt text', page.links.href('alt_text.html').first.text)
|
33
|
+
assert_equal('', page.links.href('no_alt_text.html').first.text)
|
34
|
+
assert_equal('no image', page.links.href('no_image.html').first.text)
|
35
|
+
assert_equal('', page.links.href('no_text.html').first.text)
|
36
|
+
end
|
24
37
|
end
|
data/test/tc_mech.rb
CHANGED
@@ -21,6 +21,12 @@ class MechMethodsTest < Test::Unit::TestCase
|
|
21
21
|
agent.history.last.uri.to_s)
|
22
22
|
assert_equal("http://localhost:#{@port}/",
|
23
23
|
agent.history[-2].uri.to_s)
|
24
|
+
|
25
|
+
assert_equal(true, agent.visited?("http://localhost:#{@port}/"))
|
26
|
+
assert_equal(true, agent.visited?("/form_test.html"))
|
27
|
+
assert_equal(false, agent.visited?("http://google.com/"))
|
28
|
+
assert_equal(true, agent.visited?(page.links.first))
|
29
|
+
|
24
30
|
end
|
25
31
|
|
26
32
|
def test_max_history
|
data/test/ts_mech.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.8.11
|
2
|
+
rubygems_version: 0.8.11.13
|
3
3
|
specification_version: 1
|
4
4
|
name: mechanize
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.4.
|
7
|
-
date: 2006-
|
6
|
+
version: 0.4.6
|
7
|
+
date: 2006-06-02 00:00:00 -07:00
|
8
8
|
summary: Mechanize provides automated web-browsing
|
9
9
|
require_paths:
|
10
|
-
|
10
|
+
- lib
|
11
11
|
email: aaronp@rubyforge.org
|
12
12
|
homepage: mechanize.rubyforge.org
|
13
13
|
rubyforge_project: mechanize
|
@@ -18,91 +18,98 @@ bindir: bin
|
|
18
18
|
has_rdoc: true
|
19
19
|
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
20
|
requirements:
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
version: 0.0.0
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
25
24
|
version:
|
26
25
|
platform: ruby
|
27
26
|
signing_key:
|
28
27
|
cert_chain:
|
28
|
+
post_install_message:
|
29
29
|
authors:
|
30
|
-
|
30
|
+
- Aaron Patterson
|
31
31
|
files:
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
32
|
+
- test/tc_errors.rb
|
33
|
+
- test/server.rb
|
34
|
+
- test/tc_response_code.rb
|
35
|
+
- test/servlets.rb
|
36
|
+
- test/tc_cookies.rb
|
37
|
+
- test/htdocs
|
38
|
+
- test/tc_forms.rb
|
39
|
+
- test/tc_upload.rb
|
40
|
+
- test/tc_links.rb
|
41
|
+
- test/data
|
42
|
+
- test/tc_filter.rb
|
43
|
+
- test/tc_cookie_class.rb
|
44
|
+
- test/tc_watches.rb
|
45
|
+
- test/tc_parsing.rb
|
46
|
+
- test/test_includes.rb
|
47
|
+
- test/tc_frames.rb
|
48
|
+
- test/tc_mech.rb
|
49
|
+
- test/test_mech.rb
|
50
|
+
- test/ts_mech.rb
|
51
|
+
- test/tc_cookie_jar.rb
|
52
|
+
- test/README
|
53
|
+
- test/tc_authenticate.rb
|
54
|
+
- test/htdocs/frame_test.html
|
55
|
+
- test/htdocs/button.jpg
|
56
|
+
- test/htdocs/form_test.html
|
57
|
+
- test/htdocs/form_multival.html
|
58
|
+
- test/htdocs/alt_text.html
|
59
|
+
- test/htdocs/bad_form_test.html
|
60
|
+
- test/htdocs/find_link.html
|
61
|
+
- test/htdocs/index.html
|
62
|
+
- test/htdocs/google.html
|
63
|
+
- test/htdocs/file_upload.html
|
64
|
+
- test/htdocs/iframe_test.html
|
65
|
+
- test/data/htpasswd
|
66
|
+
- lib/mechanize
|
67
|
+
- lib/mechanize.rb
|
68
|
+
- lib/mechanize/net-overrides
|
69
|
+
- lib/mechanize/mech_version.rb
|
70
|
+
- lib/mechanize/parsing.rb
|
71
|
+
- lib/mechanize/module.rb
|
72
|
+
- lib/mechanize/page.rb
|
73
|
+
- lib/mechanize/cookie.rb
|
74
|
+
- lib/mechanize/form_elements.rb
|
75
|
+
- lib/mechanize/list.rb
|
76
|
+
- lib/mechanize/form.rb
|
77
|
+
- lib/mechanize/page_elements.rb
|
78
|
+
- lib/mechanize/net-overrides/net
|
79
|
+
- lib/mechanize/net-overrides/net/http.rb
|
80
|
+
- lib/mechanize/net-overrides/net/protocol.rb
|
81
|
+
- lib/mechanize/net-overrides/net/https.rb
|
82
|
+
- README
|
83
|
+
- EXAMPLES
|
84
|
+
- CHANGELOG
|
85
|
+
- LICENSE
|
86
|
+
- NOTES
|
83
87
|
test_files: []
|
88
|
+
|
84
89
|
rdoc_options:
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
90
|
+
- --main
|
91
|
+
- README
|
92
|
+
- --title
|
93
|
+
- "'WWW::Mechanize RDoc'"
|
89
94
|
extra_rdoc_files:
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
+
- README
|
96
|
+
- EXAMPLES
|
97
|
+
- CHANGELOG
|
98
|
+
- LICENSE
|
99
|
+
- NOTES
|
95
100
|
executables: []
|
101
|
+
|
96
102
|
extensions: []
|
103
|
+
|
97
104
|
requirements: []
|
105
|
+
|
98
106
|
dependencies:
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
version:
|
107
|
+
- !ruby/object:Gem::Dependency
|
108
|
+
name: ruby-web
|
109
|
+
version_requirement:
|
110
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
111
|
+
requirements:
|
112
|
+
- - ">="
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: 1.1.0
|
115
|
+
version:
|