mechanize 2.5.1 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/.travis.yml +16 -0
- data/CHANGELOG.rdoc +54 -1
- data/Manifest.txt +5 -0
- data/README.rdoc +5 -5
- data/Rakefile +4 -2
- data/lib/mechanize.rb +53 -10
- data/lib/mechanize/cookie.rb +8 -2
- data/lib/mechanize/cookie_jar.rb +33 -7
- data/lib/mechanize/directory_saver.rb +14 -2
- data/lib/mechanize/download.rb +2 -0
- data/lib/mechanize/element_matcher.rb +17 -6
- data/lib/mechanize/file.rb +25 -1
- data/lib/mechanize/form.rb +73 -28
- data/lib/mechanize/form/field.rb +6 -2
- data/lib/mechanize/form/select_list.rb +4 -4
- data/lib/mechanize/http/agent.rb +36 -15
- data/lib/mechanize/http/auth_challenge.rb +7 -8
- data/lib/mechanize/http/content_disposition_parser.rb +1 -1
- data/lib/mechanize/http/www_authenticate_parser.rb +9 -4
- data/lib/mechanize/page.rb +38 -12
- data/lib/mechanize/page/image.rb +1 -1
- data/lib/mechanize/parser.rb +12 -3
- data/lib/mechanize/pluggable_parsers.rb +8 -3
- data/lib/mechanize/test_case.rb +13 -0
- data/lib/mechanize/test_case/digest_auth_servlet.rb +4 -4
- data/lib/mechanize/util.rb +2 -2
- data/lib/mechanize/xml_file.rb +47 -0
- data/test/htdocs/tc_follow_meta_loop_1.html +8 -0
- data/test/htdocs/tc_follow_meta_loop_2.html +8 -0
- data/test/test_mechanize.rb +66 -12
- data/test/test_mechanize_cookie.rb +34 -0
- data/test/test_mechanize_cookie_jar.rb +67 -1
- data/test/test_mechanize_directory_saver.rb +10 -0
- data/test/test_mechanize_file.rb +22 -4
- data/test/test_mechanize_form.rb +14 -0
- data/test/test_mechanize_form_field.rb +14 -0
- data/test/test_mechanize_form_multi_select_list.rb +9 -0
- data/test/test_mechanize_form_option.rb +4 -0
- data/test/test_mechanize_form_select_list.rb +4 -0
- data/test/test_mechanize_http_agent.rb +59 -11
- data/test/test_mechanize_http_auth_challenge.rb +1 -1
- data/test/test_mechanize_http_content_disposition_parser.rb +8 -0
- data/test/test_mechanize_http_www_authenticate_parser.rb +29 -12
- data/test/test_mechanize_page.rb +58 -0
- data/test/test_mechanize_page_encoding.rb +1 -1
- data/test/test_mechanize_page_image.rb +2 -1
- data/test/test_mechanize_pluggable_parser.rb +4 -4
- data/test/test_mechanize_xml_file.rb +29 -0
- metadata +173 -229
- data.tar.gz.sig +0 -0
- metadata.gz.sig +0 -0
data/lib/mechanize/download.rb
CHANGED
@@ -6,14 +6,14 @@ module Mechanize::ElementMatcher
|
|
6
6
|
criteria = if String === criteria then
|
7
7
|
{:name => criteria}
|
8
8
|
else
|
9
|
-
criteria.map do |k, v|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
Hash[criteria.map do |k, v|
|
10
|
+
k = :dom_id if k.to_sym == :id
|
11
|
+
k = :dom_class if k.to_sym == :class
|
12
|
+
[k, v]
|
13
|
+
end]
|
14
14
|
end
|
15
15
|
|
16
|
-
f = #{plural}.find_all do |thing|
|
16
|
+
f = select_#{plural}(criteria.delete(:search)).find_all do |thing|
|
17
17
|
criteria.all? do |k,v|
|
18
18
|
v === thing.send(k)
|
19
19
|
end
|
@@ -28,6 +28,17 @@ module Mechanize::ElementMatcher
|
|
28
28
|
f
|
29
29
|
end
|
30
30
|
|
31
|
+
def select_#{plural} selector
|
32
|
+
if selector.nil? then
|
33
|
+
#{plural}
|
34
|
+
else
|
35
|
+
nodes = search(selector)
|
36
|
+
#{plural}.find_all do |element|
|
37
|
+
nodes.include?(element.node)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
31
42
|
alias :#{singular} :#{singular}_with
|
32
43
|
CODE
|
33
44
|
end
|
data/lib/mechanize/file.rb
CHANGED
@@ -49,11 +49,18 @@ class Mechanize::File
|
|
49
49
|
end
|
50
50
|
|
51
51
|
##
|
52
|
-
# Use this method to save the content of this object to +filename
|
52
|
+
# Use this method to save the content of this object to +filename+.
|
53
|
+
#
|
54
|
+
# file.save 'index.html'
|
55
|
+
# file.save 'index.html' # saves index.html.1
|
56
|
+
# file.save 'index.html'
|
53
57
|
|
54
58
|
def save filename = nil
|
55
59
|
filename = find_free_name filename
|
56
60
|
|
61
|
+
dirname = File.dirname filename
|
62
|
+
FileUtils.mkdir_p dirname
|
63
|
+
|
57
64
|
open filename, 'wb' do |f|
|
58
65
|
f.write body
|
59
66
|
end
|
@@ -61,5 +68,22 @@ class Mechanize::File
|
|
61
68
|
|
62
69
|
alias save_as save
|
63
70
|
|
71
|
+
##
|
72
|
+
# Use this method to save the content of this object to +filename+.
|
73
|
+
# This method will overwrite any existing filename that exists with the
|
74
|
+
# same name.
|
75
|
+
#
|
76
|
+
# file.save 'index.html'
|
77
|
+
# file.save! 'index.html' # overwrite original file
|
78
|
+
|
79
|
+
def save! filename = nil
|
80
|
+
dirname = File.dirname filename
|
81
|
+
FileUtils.mkdir_p dirname
|
82
|
+
|
83
|
+
open filename, 'wb' do |f|
|
84
|
+
f.write body
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
64
88
|
end
|
65
89
|
|
data/lib/mechanize/form.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'mechanize/element_matcher'
|
2
2
|
|
3
3
|
# This class encapsulates a form parsed out of an HTML page. Each type of
|
4
|
-
# input
|
4
|
+
# input field available in a form can be accessed through this object.
|
5
5
|
#
|
6
6
|
# == Examples
|
7
7
|
#
|
@@ -38,7 +38,7 @@ class Mechanize::Form
|
|
38
38
|
attr_reader :form_node
|
39
39
|
attr_reader :page
|
40
40
|
|
41
|
-
def initialize(node, mech=nil, page=nil)
|
41
|
+
def initialize(node, mech = nil, page = nil)
|
42
42
|
@enctype = node['enctype'] || 'application/x-www-form-urlencoded'
|
43
43
|
@form_node = node
|
44
44
|
@action = Mechanize::Util.html_unescape(node['action'])
|
@@ -60,26 +60,75 @@ class Mechanize::Form
|
|
60
60
|
|
61
61
|
alias :has_key? :has_field?
|
62
62
|
|
63
|
+
# Returns whether or not the form contains a field with +value+
|
63
64
|
def has_value?(value)
|
64
65
|
fields.find { |f| f.value == value }
|
65
66
|
end
|
66
67
|
|
67
|
-
|
68
|
+
# Returns all field names (keys) for this form
|
69
|
+
def keys
|
70
|
+
fields.map { |f| f.name }
|
71
|
+
end
|
72
|
+
|
73
|
+
# Returns all field values for this form
|
74
|
+
def values
|
75
|
+
fields.map { |f| f.value }
|
76
|
+
end
|
77
|
+
|
78
|
+
# Returns all buttons of type Submit
|
79
|
+
def submits
|
80
|
+
@submits ||= buttons.select { |f| f.class == Submit }
|
81
|
+
end
|
82
|
+
|
83
|
+
# Returns all buttons of type Reset
|
84
|
+
def resets
|
85
|
+
@resets ||= buttons.select { |f| f.class == Reset }
|
86
|
+
end
|
87
|
+
|
88
|
+
# Returns all fields of type Text
|
89
|
+
def texts
|
90
|
+
@texts ||= fields.select { |f| f.class == Text }
|
91
|
+
end
|
92
|
+
|
93
|
+
# Returns all fields of type Hidden
|
94
|
+
def hiddens
|
95
|
+
@hiddens ||= fields.select { |f| f.class == Hidden }
|
96
|
+
end
|
97
|
+
|
98
|
+
# Returns all fields of type Textarea
|
99
|
+
def textareas
|
100
|
+
@textareas ||= fields.select { |f| f.class == Textarea }
|
101
|
+
end
|
68
102
|
|
69
|
-
|
103
|
+
# Returns all fields of type Keygen
|
104
|
+
def keygens
|
105
|
+
@keygens ||= fields.select { |f| f.class == Keygen }
|
106
|
+
end
|
107
|
+
|
108
|
+
# Returns whether or not the form contains a Submit button named +button_name+
|
109
|
+
def submit_button?(button_name)
|
110
|
+
submits.find { |f| f.name == button_name }
|
111
|
+
end
|
70
112
|
|
71
|
-
|
72
|
-
def
|
73
|
-
|
74
|
-
|
75
|
-
def textareas; @textareas ||= fields.select { |f| f.class == Textarea }; end
|
76
|
-
def keygens ; @keygens ||= fields.select { |f| f.class == Keygen }; end
|
113
|
+
# Returns whether or not the form contains a Reset button named +button_name+
|
114
|
+
def reset_button?(button_name)
|
115
|
+
resets.find { |f| f.name == button_name }
|
116
|
+
end
|
77
117
|
|
78
|
-
|
79
|
-
def
|
80
|
-
|
81
|
-
|
82
|
-
|
118
|
+
# Returns whether or not the form contains a Text field named +field_name+
|
119
|
+
def text_field?(field_name)
|
120
|
+
texts.find { |f| f.name == field_name }
|
121
|
+
end
|
122
|
+
|
123
|
+
# Returns whether or not the form contains a Hidden field named +field_name+
|
124
|
+
def hidden_field?(field_name)
|
125
|
+
hiddens.find { |f| f.name == field_name }
|
126
|
+
end
|
127
|
+
|
128
|
+
# Returns whether or not the form contains a Textarea named +field_name+
|
129
|
+
def textarea_field?(field_name)
|
130
|
+
textareas.find { |f| f.name == field_name }
|
131
|
+
end
|
83
132
|
|
84
133
|
# This method is a shortcut to get form's DOM id.
|
85
134
|
# Common usage:
|
@@ -117,7 +166,6 @@ class Mechanize::Form
|
|
117
166
|
# following:
|
118
167
|
#
|
119
168
|
# form.set_fields :foo => { 1 => 'bar' }
|
120
|
-
|
121
169
|
def set_fields fields = {}
|
122
170
|
fields.each do |name, v|
|
123
171
|
case v
|
@@ -139,18 +187,14 @@ class Mechanize::Form
|
|
139
187
|
end
|
140
188
|
end
|
141
189
|
|
142
|
-
# Fetch the value of the first input field with the name passed in
|
143
|
-
# ==Example
|
144
|
-
# Fetch the value set in the input field 'name'
|
190
|
+
# Fetch the value of the first input field with the name passed in. Example:
|
145
191
|
# puts form['name']
|
146
192
|
def [](field_name)
|
147
193
|
f = field(field_name)
|
148
194
|
f && f.value
|
149
195
|
end
|
150
196
|
|
151
|
-
# Set the value of the first input field with the name passed in
|
152
|
-
# ==Example
|
153
|
-
# Set the value in the input field 'name' to "Aaron"
|
197
|
+
# Set the value of the first input field with the name passed in. Example:
|
154
198
|
# form['name'] = 'Aaron'
|
155
199
|
def []=(field_name, value)
|
156
200
|
f = field(field_name)
|
@@ -173,8 +217,9 @@ class Mechanize::Form
|
|
173
217
|
super
|
174
218
|
end
|
175
219
|
|
176
|
-
# Submit
|
177
|
-
|
220
|
+
# Submit the form. Does not include the +button+ as a form parameter.
|
221
|
+
# Use +click_button+ or provide button as a parameter.
|
222
|
+
def submit button = nil, headers = {}
|
178
223
|
@mech.submit(self, button, headers)
|
179
224
|
end
|
180
225
|
|
@@ -209,7 +254,9 @@ class Mechanize::Form
|
|
209
254
|
|
210
255
|
successful_controls = []
|
211
256
|
|
212
|
-
(fields + checkboxes).
|
257
|
+
(fields + checkboxes).reject do |f|
|
258
|
+
f.node["disabled"]
|
259
|
+
end.sort.each do |f|
|
213
260
|
case f
|
214
261
|
when Mechanize::Form::CheckBox
|
215
262
|
if f.checked
|
@@ -481,7 +528,7 @@ class Mechanize::Form
|
|
481
528
|
next if type == 'reset'
|
482
529
|
@buttons << Button.new(node)
|
483
530
|
end
|
484
|
-
|
531
|
+
|
485
532
|
# Find all keygen tags
|
486
533
|
form_node.search('keygen').each do |node|
|
487
534
|
@fields << Keygen.new(node, node['value'] || '')
|
@@ -532,7 +579,6 @@ class Mechanize::Form
|
|
532
579
|
|
533
580
|
body
|
534
581
|
end
|
535
|
-
|
536
582
|
end
|
537
583
|
|
538
584
|
require 'mechanize/form/field'
|
@@ -550,4 +596,3 @@ require 'mechanize/form/option'
|
|
550
596
|
require 'mechanize/form/radio_button'
|
551
597
|
require 'mechanize/form/check_box'
|
552
598
|
require 'mechanize/form/select_list'
|
553
|
-
|
data/lib/mechanize/form/field.rb
CHANGED
@@ -16,9 +16,13 @@
|
|
16
16
|
class Mechanize::Form::Field
|
17
17
|
attr_accessor :name, :value, :node, :type
|
18
18
|
|
19
|
+
# This fields value before it's sent through Util.html_unescape.
|
20
|
+
attr_reader :raw_value
|
21
|
+
|
19
22
|
def initialize node, value = node['value']
|
20
23
|
@node = node
|
21
24
|
@name = Mechanize::Util.html_unescape(node['name'])
|
25
|
+
@raw_value = value
|
22
26
|
@value = if value.is_a? String
|
23
27
|
Mechanize::Util.html_unescape(value)
|
24
28
|
else
|
@@ -45,7 +49,7 @@ class Mechanize::Form::Field
|
|
45
49
|
node['id']
|
46
50
|
end
|
47
51
|
|
48
|
-
# This method is a shortcut to get field's DOM
|
52
|
+
# This method is a shortcut to get field's DOM class.
|
49
53
|
# Common usage: form.field_with(:dom_class => "foo")
|
50
54
|
def dom_class
|
51
55
|
node['class']
|
@@ -54,7 +58,7 @@ class Mechanize::Form::Field
|
|
54
58
|
def inspect # :nodoc:
|
55
59
|
"[%s:0x%x type: %s name: %s value: %s]" % [
|
56
60
|
self.class.name.sub(/Mechanize::Form::/, '').downcase,
|
57
|
-
object_id,
|
61
|
+
object_id, type, name, value
|
58
62
|
]
|
59
63
|
end
|
60
64
|
|
@@ -28,11 +28,11 @@ class Mechanize::Form::SelectList < Mechanize::Form::MultiSelectList
|
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
31
|
-
def value=(
|
32
|
-
if
|
33
|
-
super([
|
31
|
+
def value=(new_value)
|
32
|
+
if new_value != new_value.to_s and new_value.respond_to? :first
|
33
|
+
super([new_value.first])
|
34
34
|
else
|
35
|
-
super([
|
35
|
+
super([new_value.to_s])
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
data/lib/mechanize/http/agent.rb
CHANGED
@@ -69,6 +69,12 @@ class Mechanize::HTTP::Agent
|
|
69
69
|
# Maximum number of redirects to follow
|
70
70
|
attr_accessor :redirection_limit
|
71
71
|
|
72
|
+
# :section: Allowed error codes
|
73
|
+
|
74
|
+
# List of error codes to handle without raising an exception.
|
75
|
+
|
76
|
+
attr_accessor :allowed_error_codes
|
77
|
+
|
72
78
|
# :section: Robots
|
73
79
|
|
74
80
|
# When true, this agent will consult the site's robots.txt for each access.
|
@@ -120,6 +126,7 @@ class Mechanize::HTTP::Agent
|
|
120
126
|
# implementation detail of mechanize and its API may change at any time.
|
121
127
|
|
122
128
|
def initialize
|
129
|
+
@allowed_error_codes = []
|
123
130
|
@conditional_requests = true
|
124
131
|
@context = nil
|
125
132
|
@content_encoding_hooks = []
|
@@ -239,11 +246,9 @@ class Mechanize::HTTP::Agent
|
|
239
246
|
end
|
240
247
|
|
241
248
|
# Add If-Modified-Since if page is in history
|
242
|
-
page = visited_page(uri)
|
243
|
-
|
244
|
-
if
|
245
|
-
request['If-Modified-Since'] = page.response['Last-Modified']
|
246
|
-
end if(@conditional_requests)
|
249
|
+
if page = visited_page(uri) and last_modified = page.response['Last-Modified']
|
250
|
+
request['If-Modified-Since'] = last_modified
|
251
|
+
end if @conditional_requests
|
247
252
|
|
248
253
|
# Specify timeouts if given
|
249
254
|
connection.open_timeout = @open_timeout if @open_timeout
|
@@ -283,12 +288,12 @@ class Mechanize::HTTP::Agent
|
|
283
288
|
meta = response_follow_meta_refresh response, uri, page, redirects
|
284
289
|
return meta if meta
|
285
290
|
|
291
|
+
if robots && page.is_a?(Mechanize::Page)
|
292
|
+
page.parser.noindex? and raise Mechanize::RobotsDisallowedError.new(uri)
|
293
|
+
end
|
294
|
+
|
286
295
|
case response
|
287
296
|
when Net::HTTPSuccess
|
288
|
-
if robots && page.is_a?(Mechanize::Page)
|
289
|
-
page.parser.noindex? and raise Mechanize::RobotsDisallowedError.new(uri)
|
290
|
-
end
|
291
|
-
|
292
297
|
page
|
293
298
|
when Mechanize::FileResponse
|
294
299
|
page
|
@@ -301,7 +306,11 @@ class Mechanize::HTTP::Agent
|
|
301
306
|
response_authenticate(response, page, uri, request, headers, params,
|
302
307
|
referer)
|
303
308
|
else
|
304
|
-
|
309
|
+
if @allowed_error_codes.any? {|code| code.to_s == page.code} then
|
310
|
+
page
|
311
|
+
else
|
312
|
+
raise Mechanize::ResponseCodeError.new(page, 'unhandled response')
|
313
|
+
end
|
305
314
|
end
|
306
315
|
end
|
307
316
|
|
@@ -403,6 +412,11 @@ class Mechanize::HTTP::Agent
|
|
403
412
|
end
|
404
413
|
end
|
405
414
|
|
415
|
+
# Closes all open connections for this agent.
|
416
|
+
def shutdown
|
417
|
+
http.shutdown
|
418
|
+
end
|
419
|
+
|
406
420
|
##
|
407
421
|
# Decodes a gzip-encoded +body_io+. If it cannot be decoded, inflate is
|
408
422
|
# tried followed by raising an error.
|
@@ -431,7 +445,7 @@ class Mechanize::HTTP::Agent
|
|
431
445
|
end
|
432
446
|
ensure
|
433
447
|
# do not close a second time if we failed the first time
|
434
|
-
zio.close if zio and
|
448
|
+
zio.close if zio and !(zio.closed? or gz_error)
|
435
449
|
body_io.close unless body_io.closed?
|
436
450
|
end
|
437
451
|
|
@@ -622,6 +636,8 @@ class Mechanize::HTTP::Agent
|
|
622
636
|
end
|
623
637
|
end
|
624
638
|
|
639
|
+
uri.host = referer_uri.host if referer_uri && URI::HTTP === uri && uri.host.nil?
|
640
|
+
|
625
641
|
scheme = uri.relative? ? 'relative' : uri.scheme.downcase
|
626
642
|
uri = @scheme_handlers[scheme].call(uri, referer)
|
627
643
|
|
@@ -696,7 +712,7 @@ class Mechanize::HTTP::Agent
|
|
696
712
|
message = 'WWW-Authenticate header missing in response'
|
697
713
|
raise Mechanize::UnauthorizedError.new(page, nil, message)
|
698
714
|
end
|
699
|
-
|
715
|
+
|
700
716
|
challenges = @authenticate_parser.parse www_authenticate
|
701
717
|
|
702
718
|
unless @auth_store.credentials? uri, challenges then
|
@@ -800,7 +816,7 @@ class Mechanize::HTTP::Agent
|
|
800
816
|
begin
|
801
817
|
if Tempfile === body_io and
|
802
818
|
(StringIO === out_io or out_io.path != body_io.path) then
|
803
|
-
body_io.close!
|
819
|
+
body_io.close!
|
804
820
|
end
|
805
821
|
rescue IOError
|
806
822
|
# HACK ruby 1.8 raises IOError when closing the stream
|
@@ -845,7 +861,7 @@ class Mechanize::HTTP::Agent
|
|
845
861
|
sleep delay
|
846
862
|
@history.push(page, page.uri)
|
847
863
|
fetch new_url, :get, {}, [],
|
848
|
-
Mechanize::Page.new, redirects
|
864
|
+
Mechanize::Page.new, redirects + 1
|
849
865
|
end
|
850
866
|
|
851
867
|
def response_log response
|
@@ -1147,7 +1163,7 @@ class Mechanize::HTTP::Agent
|
|
1147
1163
|
|
1148
1164
|
out_io
|
1149
1165
|
ensure
|
1150
|
-
inflate.close
|
1166
|
+
inflate.close if inflate.finished?
|
1151
1167
|
end
|
1152
1168
|
|
1153
1169
|
def log
|
@@ -1200,6 +1216,11 @@ class Mechanize::HTTP::Agent
|
|
1200
1216
|
size >= @max_file_buffer
|
1201
1217
|
end
|
1202
1218
|
|
1219
|
+
def reset
|
1220
|
+
@cookie_jar.clear!
|
1221
|
+
@history.clear
|
1222
|
+
end
|
1223
|
+
|
1203
1224
|
end
|
1204
1225
|
|
1205
1226
|
require 'mechanize/http/auth_store'
|