mechanize 2.5.1 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (52) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +16 -0
  3. data/CHANGELOG.rdoc +54 -1
  4. data/Manifest.txt +5 -0
  5. data/README.rdoc +5 -5
  6. data/Rakefile +4 -2
  7. data/lib/mechanize.rb +53 -10
  8. data/lib/mechanize/cookie.rb +8 -2
  9. data/lib/mechanize/cookie_jar.rb +33 -7
  10. data/lib/mechanize/directory_saver.rb +14 -2
  11. data/lib/mechanize/download.rb +2 -0
  12. data/lib/mechanize/element_matcher.rb +17 -6
  13. data/lib/mechanize/file.rb +25 -1
  14. data/lib/mechanize/form.rb +73 -28
  15. data/lib/mechanize/form/field.rb +6 -2
  16. data/lib/mechanize/form/select_list.rb +4 -4
  17. data/lib/mechanize/http/agent.rb +36 -15
  18. data/lib/mechanize/http/auth_challenge.rb +7 -8
  19. data/lib/mechanize/http/content_disposition_parser.rb +1 -1
  20. data/lib/mechanize/http/www_authenticate_parser.rb +9 -4
  21. data/lib/mechanize/page.rb +38 -12
  22. data/lib/mechanize/page/image.rb +1 -1
  23. data/lib/mechanize/parser.rb +12 -3
  24. data/lib/mechanize/pluggable_parsers.rb +8 -3
  25. data/lib/mechanize/test_case.rb +13 -0
  26. data/lib/mechanize/test_case/digest_auth_servlet.rb +4 -4
  27. data/lib/mechanize/util.rb +2 -2
  28. data/lib/mechanize/xml_file.rb +47 -0
  29. data/test/htdocs/tc_follow_meta_loop_1.html +8 -0
  30. data/test/htdocs/tc_follow_meta_loop_2.html +8 -0
  31. data/test/test_mechanize.rb +66 -12
  32. data/test/test_mechanize_cookie.rb +34 -0
  33. data/test/test_mechanize_cookie_jar.rb +67 -1
  34. data/test/test_mechanize_directory_saver.rb +10 -0
  35. data/test/test_mechanize_file.rb +22 -4
  36. data/test/test_mechanize_form.rb +14 -0
  37. data/test/test_mechanize_form_field.rb +14 -0
  38. data/test/test_mechanize_form_multi_select_list.rb +9 -0
  39. data/test/test_mechanize_form_option.rb +4 -0
  40. data/test/test_mechanize_form_select_list.rb +4 -0
  41. data/test/test_mechanize_http_agent.rb +59 -11
  42. data/test/test_mechanize_http_auth_challenge.rb +1 -1
  43. data/test/test_mechanize_http_content_disposition_parser.rb +8 -0
  44. data/test/test_mechanize_http_www_authenticate_parser.rb +29 -12
  45. data/test/test_mechanize_page.rb +58 -0
  46. data/test/test_mechanize_page_encoding.rb +1 -1
  47. data/test/test_mechanize_page_image.rb +2 -1
  48. data/test/test_mechanize_pluggable_parser.rb +4 -4
  49. data/test/test_mechanize_xml_file.rb +29 -0
  50. metadata +173 -229
  51. data.tar.gz.sig +0 -0
  52. metadata.gz.sig +0 -0
@@ -65,5 +65,7 @@ class Mechanize::Download
65
65
  end
66
66
  end
67
67
 
68
+ alias save_as save
69
+
68
70
  end
69
71
 
@@ -6,14 +6,14 @@ module Mechanize::ElementMatcher
6
6
  criteria = if String === criteria then
7
7
  {:name => criteria}
8
8
  else
9
- criteria.map do |k, v|
10
- k = :dom_id if k.to_sym == :id
11
- k = :dom_class if k.to_sym == :class
12
- [k, v]
13
- end
9
+ Hash[criteria.map do |k, v|
10
+ k = :dom_id if k.to_sym == :id
11
+ k = :dom_class if k.to_sym == :class
12
+ [k, v]
13
+ end]
14
14
  end
15
15
 
16
- f = #{plural}.find_all do |thing|
16
+ f = select_#{plural}(criteria.delete(:search)).find_all do |thing|
17
17
  criteria.all? do |k,v|
18
18
  v === thing.send(k)
19
19
  end
@@ -28,6 +28,17 @@ module Mechanize::ElementMatcher
28
28
  f
29
29
  end
30
30
 
31
+ def select_#{plural} selector
32
+ if selector.nil? then
33
+ #{plural}
34
+ else
35
+ nodes = search(selector)
36
+ #{plural}.find_all do |element|
37
+ nodes.include?(element.node)
38
+ end
39
+ end
40
+ end
41
+
31
42
  alias :#{singular} :#{singular}_with
32
43
  CODE
33
44
  end
@@ -49,11 +49,18 @@ class Mechanize::File
49
49
  end
50
50
 
51
51
  ##
52
- # Use this method to save the content of this object to +filename+
52
+ # Use this method to save the content of this object to +filename+.
53
+ #
54
+ # file.save 'index.html'
55
+ # file.save 'index.html' # saves index.html.1
56
+ # file.save 'index.html'
53
57
 
54
58
  def save filename = nil
55
59
  filename = find_free_name filename
56
60
 
61
+ dirname = File.dirname filename
62
+ FileUtils.mkdir_p dirname
63
+
57
64
  open filename, 'wb' do |f|
58
65
  f.write body
59
66
  end
@@ -61,5 +68,22 @@ class Mechanize::File
61
68
 
62
69
  alias save_as save
63
70
 
71
+ ##
72
+ # Use this method to save the content of this object to +filename+.
73
+ # This method will overwrite any existing filename that exists with the
74
+ # same name.
75
+ #
76
+ # file.save 'index.html'
77
+ # file.save! 'index.html' # overwrite original file
78
+
79
+ def save! filename = nil
80
+ dirname = File.dirname filename
81
+ FileUtils.mkdir_p dirname
82
+
83
+ open filename, 'wb' do |f|
84
+ f.write body
85
+ end
86
+ end
87
+
64
88
  end
65
89
 
@@ -1,7 +1,7 @@
1
1
  require 'mechanize/element_matcher'
2
2
 
3
3
  # This class encapsulates a form parsed out of an HTML page. Each type of
4
- # input fields available in a form can be accessed through this object.
4
+ # input field available in a form can be accessed through this object.
5
5
  #
6
6
  # == Examples
7
7
  #
@@ -38,7 +38,7 @@ class Mechanize::Form
38
38
  attr_reader :form_node
39
39
  attr_reader :page
40
40
 
41
- def initialize(node, mech=nil, page=nil)
41
+ def initialize(node, mech = nil, page = nil)
42
42
  @enctype = node['enctype'] || 'application/x-www-form-urlencoded'
43
43
  @form_node = node
44
44
  @action = Mechanize::Util.html_unescape(node['action'])
@@ -60,26 +60,75 @@ class Mechanize::Form
60
60
 
61
61
  alias :has_key? :has_field?
62
62
 
63
+ # Returns whether or not the form contains a field with +value+
63
64
  def has_value?(value)
64
65
  fields.find { |f| f.value == value }
65
66
  end
66
67
 
67
- def keys; fields.map { |f| f.name }; end
68
+ # Returns all field names (keys) for this form
69
+ def keys
70
+ fields.map { |f| f.name }
71
+ end
72
+
73
+ # Returns all field values for this form
74
+ def values
75
+ fields.map { |f| f.value }
76
+ end
77
+
78
+ # Returns all buttons of type Submit
79
+ def submits
80
+ @submits ||= buttons.select { |f| f.class == Submit }
81
+ end
82
+
83
+ # Returns all buttons of type Reset
84
+ def resets
85
+ @resets ||= buttons.select { |f| f.class == Reset }
86
+ end
87
+
88
+ # Returns all fields of type Text
89
+ def texts
90
+ @texts ||= fields.select { |f| f.class == Text }
91
+ end
92
+
93
+ # Returns all fields of type Hidden
94
+ def hiddens
95
+ @hiddens ||= fields.select { |f| f.class == Hidden }
96
+ end
97
+
98
+ # Returns all fields of type Textarea
99
+ def textareas
100
+ @textareas ||= fields.select { |f| f.class == Textarea }
101
+ end
68
102
 
69
- def values; fields.map { |f| f.value }; end
103
+ # Returns all fields of type Keygen
104
+ def keygens
105
+ @keygens ||= fields.select { |f| f.class == Keygen }
106
+ end
107
+
108
+ # Returns whether or not the form contains a Submit button named +button_name+
109
+ def submit_button?(button_name)
110
+ submits.find { |f| f.name == button_name }
111
+ end
70
112
 
71
- def submits ; @submits ||= buttons.select { |f| f.class == Submit }; end
72
- def resets ; @resets ||= buttons.select { |f| f.class == Reset }; end
73
- def texts ; @texts ||= fields.select { |f| f.class == Text }; end
74
- def hiddens ; @hiddens ||= fields.select { |f| f.class == Hidden }; end
75
- def textareas; @textareas ||= fields.select { |f| f.class == Textarea }; end
76
- def keygens ; @keygens ||= fields.select { |f| f.class == Keygen }; end
113
+ # Returns whether or not the form contains a Reset button named +button_name+
114
+ def reset_button?(button_name)
115
+ resets.find { |f| f.name == button_name }
116
+ end
77
117
 
78
- def submit_button?(button_name) submits.find{|f| f.name == button_name}; end
79
- def reset_button?(button_name) resets.find{|f| f.name == button_name}; end
80
- def text_field?(field_name) texts.find{|f| f.name == field_name}; end
81
- def hidden_field?(field_name) hiddens.find{|f| f.name == field_name}; end
82
- def textarea_field?(field_name) textareas.find{|f| f.name == field_name}; end
118
+ # Returns whether or not the form contains a Text field named +field_name+
119
+ def text_field?(field_name)
120
+ texts.find { |f| f.name == field_name }
121
+ end
122
+
123
+ # Returns whether or not the form contains a Hidden field named +field_name+
124
+ def hidden_field?(field_name)
125
+ hiddens.find { |f| f.name == field_name }
126
+ end
127
+
128
+ # Returns whether or not the form contains a Textarea named +field_name+
129
+ def textarea_field?(field_name)
130
+ textareas.find { |f| f.name == field_name }
131
+ end
83
132
 
84
133
  # This method is a shortcut to get form's DOM id.
85
134
  # Common usage:
@@ -117,7 +166,6 @@ class Mechanize::Form
117
166
  # following:
118
167
  #
119
168
  # form.set_fields :foo => { 1 => 'bar' }
120
-
121
169
  def set_fields fields = {}
122
170
  fields.each do |name, v|
123
171
  case v
@@ -139,18 +187,14 @@ class Mechanize::Form
139
187
  end
140
188
  end
141
189
 
142
- # Fetch the value of the first input field with the name passed in
143
- # ==Example
144
- # Fetch the value set in the input field 'name'
190
+ # Fetch the value of the first input field with the name passed in. Example:
145
191
  # puts form['name']
146
192
  def [](field_name)
147
193
  f = field(field_name)
148
194
  f && f.value
149
195
  end
150
196
 
151
- # Set the value of the first input field with the name passed in
152
- # ==Example
153
- # Set the value in the input field 'name' to "Aaron"
197
+ # Set the value of the first input field with the name passed in. Example:
154
198
  # form['name'] = 'Aaron'
155
199
  def []=(field_name, value)
156
200
  f = field(field_name)
@@ -173,8 +217,9 @@ class Mechanize::Form
173
217
  super
174
218
  end
175
219
 
176
- # Submit this form with the button passed in
177
- def submit button=nil, headers = {}
220
+ # Submit the form. Does not include the +button+ as a form parameter.
221
+ # Use +click_button+ or provide button as a parameter.
222
+ def submit button = nil, headers = {}
178
223
  @mech.submit(self, button, headers)
179
224
  end
180
225
 
@@ -209,7 +254,9 @@ class Mechanize::Form
209
254
 
210
255
  successful_controls = []
211
256
 
212
- (fields + checkboxes).sort.each do |f|
257
+ (fields + checkboxes).reject do |f|
258
+ f.node["disabled"]
259
+ end.sort.each do |f|
213
260
  case f
214
261
  when Mechanize::Form::CheckBox
215
262
  if f.checked
@@ -481,7 +528,7 @@ class Mechanize::Form
481
528
  next if type == 'reset'
482
529
  @buttons << Button.new(node)
483
530
  end
484
-
531
+
485
532
  # Find all keygen tags
486
533
  form_node.search('keygen').each do |node|
487
534
  @fields << Keygen.new(node, node['value'] || '')
@@ -532,7 +579,6 @@ class Mechanize::Form
532
579
 
533
580
  body
534
581
  end
535
-
536
582
  end
537
583
 
538
584
  require 'mechanize/form/field'
@@ -550,4 +596,3 @@ require 'mechanize/form/option'
550
596
  require 'mechanize/form/radio_button'
551
597
  require 'mechanize/form/check_box'
552
598
  require 'mechanize/form/select_list'
553
-
@@ -16,9 +16,13 @@
16
16
  class Mechanize::Form::Field
17
17
  attr_accessor :name, :value, :node, :type
18
18
 
19
+ # This fields value before it's sent through Util.html_unescape.
20
+ attr_reader :raw_value
21
+
19
22
  def initialize node, value = node['value']
20
23
  @node = node
21
24
  @name = Mechanize::Util.html_unescape(node['name'])
25
+ @raw_value = value
22
26
  @value = if value.is_a? String
23
27
  Mechanize::Util.html_unescape(value)
24
28
  else
@@ -45,7 +49,7 @@ class Mechanize::Form::Field
45
49
  node['id']
46
50
  end
47
51
 
48
- # This method is a shortcut to get field's DOM id.
52
+ # This method is a shortcut to get field's DOM class.
49
53
  # Common usage: form.field_with(:dom_class => "foo")
50
54
  def dom_class
51
55
  node['class']
@@ -54,7 +58,7 @@ class Mechanize::Form::Field
54
58
  def inspect # :nodoc:
55
59
  "[%s:0x%x type: %s name: %s value: %s]" % [
56
60
  self.class.name.sub(/Mechanize::Form::/, '').downcase,
57
- object_id, @type, @name, @value
61
+ object_id, type, name, value
58
62
  ]
59
63
  end
60
64
 
@@ -28,11 +28,11 @@ class Mechanize::Form::SelectList < Mechanize::Form::MultiSelectList
28
28
  end
29
29
  end
30
30
 
31
- def value=(new)
32
- if new != new.to_s and new.respond_to? :first
33
- super([new.first])
31
+ def value=(new_value)
32
+ if new_value != new_value.to_s and new_value.respond_to? :first
33
+ super([new_value.first])
34
34
  else
35
- super([new.to_s])
35
+ super([new_value.to_s])
36
36
  end
37
37
  end
38
38
 
@@ -69,6 +69,12 @@ class Mechanize::HTTP::Agent
69
69
  # Maximum number of redirects to follow
70
70
  attr_accessor :redirection_limit
71
71
 
72
+ # :section: Allowed error codes
73
+
74
+ # List of error codes to handle without raising an exception.
75
+
76
+ attr_accessor :allowed_error_codes
77
+
72
78
  # :section: Robots
73
79
 
74
80
  # When true, this agent will consult the site's robots.txt for each access.
@@ -120,6 +126,7 @@ class Mechanize::HTTP::Agent
120
126
  # implementation detail of mechanize and its API may change at any time.
121
127
 
122
128
  def initialize
129
+ @allowed_error_codes = []
123
130
  @conditional_requests = true
124
131
  @context = nil
125
132
  @content_encoding_hooks = []
@@ -239,11 +246,9 @@ class Mechanize::HTTP::Agent
239
246
  end
240
247
 
241
248
  # Add If-Modified-Since if page is in history
242
- page = visited_page(uri)
243
-
244
- if (page = visited_page(uri)) and page.response['Last-Modified']
245
- request['If-Modified-Since'] = page.response['Last-Modified']
246
- end if(@conditional_requests)
249
+ if page = visited_page(uri) and last_modified = page.response['Last-Modified']
250
+ request['If-Modified-Since'] = last_modified
251
+ end if @conditional_requests
247
252
 
248
253
  # Specify timeouts if given
249
254
  connection.open_timeout = @open_timeout if @open_timeout
@@ -283,12 +288,12 @@ class Mechanize::HTTP::Agent
283
288
  meta = response_follow_meta_refresh response, uri, page, redirects
284
289
  return meta if meta
285
290
 
291
+ if robots && page.is_a?(Mechanize::Page)
292
+ page.parser.noindex? and raise Mechanize::RobotsDisallowedError.new(uri)
293
+ end
294
+
286
295
  case response
287
296
  when Net::HTTPSuccess
288
- if robots && page.is_a?(Mechanize::Page)
289
- page.parser.noindex? and raise Mechanize::RobotsDisallowedError.new(uri)
290
- end
291
-
292
297
  page
293
298
  when Mechanize::FileResponse
294
299
  page
@@ -301,7 +306,11 @@ class Mechanize::HTTP::Agent
301
306
  response_authenticate(response, page, uri, request, headers, params,
302
307
  referer)
303
308
  else
304
- raise Mechanize::ResponseCodeError.new(page, 'unhandled response')
309
+ if @allowed_error_codes.any? {|code| code.to_s == page.code} then
310
+ page
311
+ else
312
+ raise Mechanize::ResponseCodeError.new(page, 'unhandled response')
313
+ end
305
314
  end
306
315
  end
307
316
 
@@ -403,6 +412,11 @@ class Mechanize::HTTP::Agent
403
412
  end
404
413
  end
405
414
 
415
+ # Closes all open connections for this agent.
416
+ def shutdown
417
+ http.shutdown
418
+ end
419
+
406
420
  ##
407
421
  # Decodes a gzip-encoded +body_io+. If it cannot be decoded, inflate is
408
422
  # tried followed by raising an error.
@@ -431,7 +445,7 @@ class Mechanize::HTTP::Agent
431
445
  end
432
446
  ensure
433
447
  # do not close a second time if we failed the first time
434
- zio.close if zio and not (zio.closed? or gz_error)
448
+ zio.close if zio and !(zio.closed? or gz_error)
435
449
  body_io.close unless body_io.closed?
436
450
  end
437
451
 
@@ -622,6 +636,8 @@ class Mechanize::HTTP::Agent
622
636
  end
623
637
  end
624
638
 
639
+ uri.host = referer_uri.host if referer_uri && URI::HTTP === uri && uri.host.nil?
640
+
625
641
  scheme = uri.relative? ? 'relative' : uri.scheme.downcase
626
642
  uri = @scheme_handlers[scheme].call(uri, referer)
627
643
 
@@ -696,7 +712,7 @@ class Mechanize::HTTP::Agent
696
712
  message = 'WWW-Authenticate header missing in response'
697
713
  raise Mechanize::UnauthorizedError.new(page, nil, message)
698
714
  end
699
-
715
+
700
716
  challenges = @authenticate_parser.parse www_authenticate
701
717
 
702
718
  unless @auth_store.credentials? uri, challenges then
@@ -800,7 +816,7 @@ class Mechanize::HTTP::Agent
800
816
  begin
801
817
  if Tempfile === body_io and
802
818
  (StringIO === out_io or out_io.path != body_io.path) then
803
- body_io.close!
819
+ body_io.close!
804
820
  end
805
821
  rescue IOError
806
822
  # HACK ruby 1.8 raises IOError when closing the stream
@@ -845,7 +861,7 @@ class Mechanize::HTTP::Agent
845
861
  sleep delay
846
862
  @history.push(page, page.uri)
847
863
  fetch new_url, :get, {}, [],
848
- Mechanize::Page.new, redirects
864
+ Mechanize::Page.new, redirects + 1
849
865
  end
850
866
 
851
867
  def response_log response
@@ -1147,7 +1163,7 @@ class Mechanize::HTTP::Agent
1147
1163
 
1148
1164
  out_io
1149
1165
  ensure
1150
- inflate.close
1166
+ inflate.close if inflate.finished?
1151
1167
  end
1152
1168
 
1153
1169
  def log
@@ -1200,6 +1216,11 @@ class Mechanize::HTTP::Agent
1200
1216
  size >= @max_file_buffer
1201
1217
  end
1202
1218
 
1219
+ def reset
1220
+ @cookie_jar.clear!
1221
+ @history.clear
1222
+ end
1223
+
1203
1224
  end
1204
1225
 
1205
1226
  require 'mechanize/http/auth_store'