mechanize 2.5.1 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (52) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +16 -0
  3. data/CHANGELOG.rdoc +54 -1
  4. data/Manifest.txt +5 -0
  5. data/README.rdoc +5 -5
  6. data/Rakefile +4 -2
  7. data/lib/mechanize.rb +53 -10
  8. data/lib/mechanize/cookie.rb +8 -2
  9. data/lib/mechanize/cookie_jar.rb +33 -7
  10. data/lib/mechanize/directory_saver.rb +14 -2
  11. data/lib/mechanize/download.rb +2 -0
  12. data/lib/mechanize/element_matcher.rb +17 -6
  13. data/lib/mechanize/file.rb +25 -1
  14. data/lib/mechanize/form.rb +73 -28
  15. data/lib/mechanize/form/field.rb +6 -2
  16. data/lib/mechanize/form/select_list.rb +4 -4
  17. data/lib/mechanize/http/agent.rb +36 -15
  18. data/lib/mechanize/http/auth_challenge.rb +7 -8
  19. data/lib/mechanize/http/content_disposition_parser.rb +1 -1
  20. data/lib/mechanize/http/www_authenticate_parser.rb +9 -4
  21. data/lib/mechanize/page.rb +38 -12
  22. data/lib/mechanize/page/image.rb +1 -1
  23. data/lib/mechanize/parser.rb +12 -3
  24. data/lib/mechanize/pluggable_parsers.rb +8 -3
  25. data/lib/mechanize/test_case.rb +13 -0
  26. data/lib/mechanize/test_case/digest_auth_servlet.rb +4 -4
  27. data/lib/mechanize/util.rb +2 -2
  28. data/lib/mechanize/xml_file.rb +47 -0
  29. data/test/htdocs/tc_follow_meta_loop_1.html +8 -0
  30. data/test/htdocs/tc_follow_meta_loop_2.html +8 -0
  31. data/test/test_mechanize.rb +66 -12
  32. data/test/test_mechanize_cookie.rb +34 -0
  33. data/test/test_mechanize_cookie_jar.rb +67 -1
  34. data/test/test_mechanize_directory_saver.rb +10 -0
  35. data/test/test_mechanize_file.rb +22 -4
  36. data/test/test_mechanize_form.rb +14 -0
  37. data/test/test_mechanize_form_field.rb +14 -0
  38. data/test/test_mechanize_form_multi_select_list.rb +9 -0
  39. data/test/test_mechanize_form_option.rb +4 -0
  40. data/test/test_mechanize_form_select_list.rb +4 -0
  41. data/test/test_mechanize_http_agent.rb +59 -11
  42. data/test/test_mechanize_http_auth_challenge.rb +1 -1
  43. data/test/test_mechanize_http_content_disposition_parser.rb +8 -0
  44. data/test/test_mechanize_http_www_authenticate_parser.rb +29 -12
  45. data/test/test_mechanize_page.rb +58 -0
  46. data/test/test_mechanize_page_encoding.rb +1 -1
  47. data/test/test_mechanize_page_image.rb +2 -1
  48. data/test/test_mechanize_pluggable_parser.rb +4 -4
  49. data/test/test_mechanize_xml_file.rb +29 -0
  50. metadata +173 -229
  51. data.tar.gz.sig +0 -0
  52. metadata.gz.sig +0 -0
@@ -1,6 +1,6 @@
1
1
  class Mechanize::HTTP
2
2
 
3
- AuthChallenge = Struct.new :scheme, :params
3
+ AuthChallenge = Struct.new :scheme, :params, :raw
4
4
 
5
5
  ##
6
6
  # A parsed WWW-Authenticate header
@@ -18,7 +18,10 @@ class Mechanize::HTTP
18
18
  # The authentication parameters
19
19
 
20
20
  ##
21
- # :method: initialize(scheme = nil, params = nil)
21
+ # :method: initialize
22
+ #
23
+ # :call-seq:
24
+ # initialize(scheme = nil, params = nil)
22
25
  #
23
26
  # Creates a new AuthChallenge header with the given scheme and parameters
24
27
 
@@ -52,13 +55,9 @@ class Mechanize::HTTP
52
55
  end
53
56
 
54
57
  ##
55
- # The reconstructed, normalized challenge
56
-
57
- def to_s
58
- auth_params = params.map { |name, value| "#{name}=\"#{value}\"" }
58
+ # The raw authentication challenge
59
59
 
60
- "#{scheme} #{auth_params.join ', '}"
61
- end
60
+ alias to_s raw
62
61
 
63
62
  end
64
63
 
@@ -86,7 +86,7 @@ class Mechanize::HTTP::ContentDispositionParser
86
86
 
87
87
  while true do
88
88
  return nil unless param = rfc_2045_token
89
- param.downcase
89
+ param.downcase!
90
90
  return nil unless @scanner.scan(/=/)
91
91
 
92
92
  value = case param
@@ -25,6 +25,7 @@ class Mechanize::HTTP::WWWAuthenticateParser
25
25
 
26
26
  while true do
27
27
  break if @scanner.eos?
28
+ start = @scanner.pos
28
29
  challenge = Mechanize::HTTP::AuthChallenge.new
29
30
 
30
31
  scheme = auth_scheme
@@ -43,6 +44,7 @@ class Mechanize::HTTP::WWWAuthenticateParser
43
44
  challenge.params = @scanner.scan(/.*/)
44
45
  end
45
46
 
47
+ challenge.raw = www_authenticate[start, @scanner.pos]
46
48
  challenges << challenge
47
49
  next
48
50
  else
@@ -62,10 +64,15 @@ class Mechanize::HTTP::WWWAuthenticateParser
62
64
  unless name then
63
65
  challenge.params = params
64
66
  challenges << challenge
65
- break if @scanner.eos?
67
+
68
+ if @scanner.eos? then
69
+ challenge.raw = www_authenticate[start, @scanner.pos]
70
+ break
71
+ end
66
72
 
67
73
  @scanner.pos = pos # rewind
68
- challenge = '' # a token should be next, new challenge
74
+ challenge.raw = www_authenticate[start, @scanner.pos].sub(/(,+)? *$/, '')
75
+ challenge = nil # a token should be next, new challenge
69
76
  break
70
77
  else
71
78
  params[name] = value
@@ -73,8 +80,6 @@ class Mechanize::HTTP::WWWAuthenticateParser
73
80
 
74
81
  spaces
75
82
 
76
- return nil unless ',' == @scanner.peek(1) or @scanner.eos?
77
-
78
83
  @scanner.scan(/(, *)+/)
79
84
  end
80
85
  end
@@ -212,7 +212,10 @@ class Mechanize::Page < Mechanize::File
212
212
  def_delegator :parser, :at, :at
213
213
 
214
214
  ##
215
- # :method: form_with(criteria)
215
+ # :method: form_with
216
+ #
217
+ # :call-seq:
218
+ # form_with(criteria)
216
219
  #
217
220
  # Find a single form matching +criteria+.
218
221
  # Example:
@@ -221,7 +224,9 @@ class Mechanize::Page < Mechanize::File
221
224
  # end
222
225
 
223
226
  ##
224
- # :method: forms_with(criteria)
227
+ # :method: forms_with
228
+ #
229
+ # :call-seq: forms_with(criteria)
225
230
  #
226
231
  # Find all forms form matching +criteria+.
227
232
  # Example:
@@ -232,14 +237,19 @@ class Mechanize::Page < Mechanize::File
232
237
  elements_with :form
233
238
 
234
239
  ##
235
- # :method: link_with(criteria)
240
+ # :method: link_with
241
+ #
242
+ # :call-seq: link_with(criteria)
236
243
  #
237
244
  # Find a single link matching +criteria+.
238
245
  # Example:
239
246
  # page.link_with(:href => /foo/).click
240
247
 
241
248
  ##
242
- # :method: links_with(criteria)
249
+ # :method: links_with
250
+ #
251
+ # :call-seq:
252
+ # links_with(criteria)
243
253
  #
244
254
  # Find all links matching +criteria+.
245
255
  # Example:
@@ -250,14 +260,18 @@ class Mechanize::Page < Mechanize::File
250
260
  elements_with :link
251
261
 
252
262
  ##
253
- # :method: base_with(criteria)
263
+ # :method: base_with
264
+ #
265
+ # :call-seq: base_with(criteria)
254
266
  #
255
267
  # Find a single base tag matching +criteria+.
256
268
  # Example:
257
269
  # page.base_with(:href => /foo/).click
258
270
 
259
271
  ##
260
- # :method: bases_with(criteria)
272
+ # :method: bases_with
273
+ #
274
+ # :call-seq: bases_with(criteria)
261
275
  #
262
276
  # Find all base tags matching +criteria+.
263
277
  # Example:
@@ -268,14 +282,18 @@ class Mechanize::Page < Mechanize::File
268
282
  elements_with :base
269
283
 
270
284
  ##
271
- # :method: frame_with(criteria)
285
+ # :method: frame_with
286
+ #
287
+ # :call-seq: frame_with(criteria)
272
288
  #
273
289
  # Find a single frame tag matching +criteria+.
274
290
  # Example:
275
291
  # page.frame_with(:src => /foo/).click
276
292
 
277
293
  ##
278
- # :method: frames_with(criteria)
294
+ # :method: frames_with
295
+ #
296
+ # :call-seq: frames_with(criteria)
279
297
  #
280
298
  # Find all frame tags matching +criteria+.
281
299
  # Example:
@@ -286,14 +304,18 @@ class Mechanize::Page < Mechanize::File
286
304
  elements_with :frame
287
305
 
288
306
  ##
289
- # :method: iframe_with(criteria)
307
+ # :method: iframe_with
308
+ #
309
+ # :call-seq: iframe_with(criteria)
290
310
  #
291
311
  # Find a single iframe tag matching +criteria+.
292
312
  # Example:
293
313
  # page.iframe_with(:src => /foo/).click
294
314
 
295
315
  ##
296
- # :method: iframes_with(criteria)
316
+ # :method: iframes_with
317
+ #
318
+ # :call-seq: iframes_with(criteria)
297
319
  #
298
320
  # Find all iframe tags matching +criteria+.
299
321
  # Example:
@@ -304,14 +326,18 @@ class Mechanize::Page < Mechanize::File
304
326
  elements_with :iframe
305
327
 
306
328
  ##
307
- # :method: image_with(criteria)
329
+ # :method: image_with
330
+ #
331
+ # :call-seq: image_with(criteria)
308
332
  #
309
333
  # Find a single image matching +criteria+.
310
334
  # Example:
311
335
  # page.image_with(:alt => /main/).fetch.save
312
336
 
313
337
  ##
314
- # :method: images_with(criteria)
338
+ # :method: images_with
339
+ #
340
+ # :call-seq: images_with(criteria)
315
341
  #
316
342
  # Find all images matching +criteria+.
317
343
  # Example:
@@ -165,7 +165,7 @@ class Mechanize::Page::Image
165
165
  page.uri + src
166
166
  end
167
167
  else
168
- src
168
+ URI src
169
169
  end
170
170
  end
171
171
 
@@ -54,21 +54,30 @@ module Mechanize::Parser
54
54
  attr_accessor :code
55
55
 
56
56
  ##
57
- # :method: [](header)
57
+ # :method: []
58
+ #
59
+ # :call-seq:
60
+ # [](header)
58
61
  #
59
62
  # Access HTTP +header+ by name
60
63
 
61
64
  def_delegator :header, :[], :[]
62
65
 
63
66
  ##
64
- # :method: []=(header, value)
67
+ # :method: []=
68
+ #
69
+ # :call-seq:
70
+ # []=(header, value)
65
71
  #
66
72
  # Set HTTP +header+ to +value+
67
73
 
68
74
  def_delegator :header, :[]=, :[]=
69
75
 
70
76
  ##
71
- # :method: key?(header)
77
+ # :method: key?
78
+ #
79
+ # :call-seq:
80
+ # key?(header)
72
81
  #
73
82
  # Is the named +header+ present?
74
83
 
@@ -1,6 +1,7 @@
1
1
  require 'mechanize/file'
2
2
  require 'mechanize/file_saver'
3
3
  require 'mechanize/page'
4
+ require 'mechanize/xml_file'
4
5
 
5
6
  ##
6
7
  # Mechanize allows different parsers for different content types. Mechanize
@@ -64,7 +65,7 @@ class Mechanize::PluggableParser
64
65
  :xhtml => 'application/xhtml+xml',
65
66
  :pdf => 'application/pdf',
66
67
  :csv => 'text/csv',
67
- :xml => 'text/xml',
68
+ :xml => ['text/xml', 'application/xml'],
68
69
  }
69
70
 
70
71
  attr_accessor :default
@@ -74,7 +75,9 @@ class Mechanize::PluggableParser
74
75
  CONTENT_TYPES[:html] => Mechanize::Page,
75
76
  CONTENT_TYPES[:xhtml] => Mechanize::Page,
76
77
  CONTENT_TYPES[:wap] => Mechanize::Page,
77
- 'image' => Mechanize::Image
78
+ 'image' => Mechanize::Image,
79
+ 'text/xml' => Mechanize::XmlFile,
80
+ 'application/xml' => Mechanize::XmlFile,
78
81
  }
79
82
 
80
83
  @default = Mechanize::File
@@ -138,7 +141,9 @@ class Mechanize::PluggableParser
138
141
  # Registers +klass+ as the parser for text/xml content
139
142
 
140
143
  def xml=(klass)
141
- register_parser(CONTENT_TYPES[:xml], klass)
144
+ CONTENT_TYPES[:xml].each do |content_type|
145
+ register_parser content_type, klass
146
+ end
142
147
  end
143
148
 
144
149
  ##
@@ -78,6 +78,19 @@ class Mechanize::TestCase < MiniTest::Unit::TestCase
78
78
  Mechanize::Page.new uri, nil, body, 200, @mech
79
79
  end
80
80
 
81
+ ##
82
+ # Creates a Mechanize::CookieJar by parsing the given +str+
83
+
84
+ def cookie_jar str, uri = URI('http://example')
85
+ jar = Mechanize::CookieJar.new
86
+
87
+ Mechanize::Cookie.parse uri, str do |cookie|
88
+ jar.add uri, cookie
89
+ end
90
+
91
+ jar
92
+ end
93
+
81
94
  ##
82
95
  # Runs the block inside a temporary directory
83
96
 
@@ -15,19 +15,19 @@ class DigestAuthServlet < WEBrick::HTTPServlet::AbstractServlet
15
15
  :Logger => Logger.new(nil)
16
16
  })
17
17
 
18
- def do_GET(req,res)
18
+ def do_GET req, res
19
19
  def req.request_time; Time.now; end
20
20
  def req.request_uri; '/digest_auth'; end
21
- def req.request_method; "GET"; end
21
+ def req.request_method; 'GET'; end
22
22
 
23
23
  begin
24
- @@authenticator.authenticate(req,res)
24
+ @@authenticator.authenticate req, res
25
25
  res.body = 'You are authenticated'
26
26
  rescue WEBrick::HTTPStatus::Unauthorized
27
27
  res.status = 401
28
28
  end
29
- FileUtils.rm('digest.htpasswd') if File.exists?('digest.htpasswd')
30
29
  end
30
+
31
31
  alias :do_POST :do_GET
32
32
  end
33
33
 
@@ -27,7 +27,7 @@ class Mechanize::Util
27
27
  # Mechanize::Util::DefaultMimeTypes.replace(mimetab)
28
28
  DefaultMimeTypes = WEBrick::HTTPUtils::DefaultMimeTypes
29
29
 
30
- def self.build_query_string(parameters, enc=nil)
30
+ def self.build_query_string(parameters, enc = nil)
31
31
  parameters.map { |k,v|
32
32
  # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
33
33
  [CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") if k
@@ -35,7 +35,7 @@ class Mechanize::Util
35
35
  end
36
36
 
37
37
  # Converts string +s+ from +code+ to UTF-8.
38
- def self.from_native_charset(s, code, ignore_encoding_error=false, log=nil)
38
+ def self.from_native_charset(s, code, ignore_encoding_error = false, log = nil)
39
39
  return s unless s && code
40
40
  return s unless Mechanize.html_parser == Nokogiri::HTML
41
41
 
@@ -0,0 +1,47 @@
1
+ ##
2
+ # This class encapsulates an XML file. If Mechanize finds a content-type
3
+ # of 'text/xml' or 'application/xml' this class will be instantiated and
4
+ # returned. This class also opens up the +search+ and +at+ methods available
5
+ # on the underlying Nokogiri::XML::Document object.
6
+ #
7
+ # Example:
8
+ #
9
+ # require 'mechanize'
10
+ #
11
+ # agent = Mechanize.new
12
+ # xml = agent.get('http://example.org/some-xml-file.xml')
13
+ # xml.class #=> Mechanize::XmlFile
14
+ # xml.search('//foo[@attr="bar"]/etc')
15
+
16
+ class Mechanize::XmlFile < Mechanize::File
17
+ extend Forwardable
18
+
19
+ # The underlying Nokogiri::XML::Document object
20
+
21
+ attr_reader :xml
22
+
23
+ def initialize(uri = nil, response = nil, body = nil, code = nil)
24
+ super uri, response, body, code
25
+ @xml = Nokogiri.XML body
26
+ end
27
+
28
+ ##
29
+ # :method: search
30
+ #
31
+ # Search for +paths+ in the page using Nokogiri's #search. The +paths+ can
32
+ # be XPath or CSS and an optional Hash of namespaces may be appended.
33
+ #
34
+ # See Nokogiri::XML::Node#search for further details.
35
+
36
+ def_delegator :xml, :search, :search
37
+
38
+ ##
39
+ # :method: at
40
+ #
41
+ # Search through the page for +path+ under +namespace+ using Nokogiri's #at.
42
+ # The +path+ may be either a CSS or XPath expression.
43
+ #
44
+ # See also Nokogiri::XML::Node#at
45
+
46
+ def_delegator :xml, :at, :at
47
+ end
@@ -0,0 +1,8 @@
1
+ <html>
2
+ <head>
3
+ <meta http-equiv="refresh" content="0; url=http://localhost/tc_follow_meta_loop_2.html">
4
+ </head>
5
+ <body>
6
+ This page has a meta refresh.
7
+ </body>
8
+ </html>
@@ -0,0 +1,8 @@
1
+ <html>
2
+ <head>
3
+ <meta http-equiv="refresh" content="0; url=http://localhost/tc_follow_meta_loop_1.html">
4
+ </head>
5
+ <body>
6
+ This page has a meta refresh.
7
+ </body>
8
+ </html>
@@ -76,6 +76,8 @@ class TestMechanize < Mechanize::TestCase
76
76
  end
77
77
 
78
78
  assert_equal ssl_certificate, mech.certificate
79
+ assert_equal ssl_certificate, mech.cert
80
+ assert_equal ssl_private_key, mech.key
79
81
  end
80
82
 
81
83
  def test_cert_store
@@ -98,6 +100,18 @@ class TestMechanize < Mechanize::TestCase
98
100
  @mech.history.last.uri.to_s)
99
101
  end
100
102
 
103
+ def test_click_bogus_link_with_cookies
104
+ @mech.cookie_jar = cookie_jar("a=b")
105
+
106
+ page = html_page <<-BODY
107
+ <a href="http:///index.html">yes really</a>
108
+ BODY
109
+
110
+ page.links[0].click
111
+
112
+ assert_equal '/index.html', requests.first.path
113
+ end
114
+
101
115
  def test_click_frame
102
116
  frame = node 'frame', 'src' => '/index.html'
103
117
  frame = Mechanize::Page::Frame.new frame, @mech, fake_page
@@ -384,20 +398,11 @@ but not <a href="/" rel="me nofollow">this</a>!
384
398
  end
385
399
 
386
400
  def test_get_digest_auth
387
- block_called = false
388
-
389
- @mech.add_auth(@uri, 'user', 'pass')
401
+ @mech.add_auth @uri, 'user', 'pass'
390
402
 
391
- @mech.pre_connect_hooks << lambda { |_, request|
392
- block_called = true
393
- request.to_hash.each do |k,v|
394
- assert_equal(1, v.length)
395
- end
396
- }
403
+ page = @mech.get @uri + '/digest_auth'
397
404
 
398
- page = @mech.get(@uri + '/digest_auth')
399
- assert_equal('You are authenticated', page.body)
400
- assert block_called
405
+ assert_equal 'You are authenticated', page.body
401
406
  end
402
407
 
403
408
  def test_get_follow_meta_refresh
@@ -411,6 +416,21 @@ but not <a href="/" rel="me nofollow">this</a>!
411
416
  @mech.history.first.uri.to_s)
412
417
  assert_equal('http://localhost/index.html', page.uri.to_s)
413
418
  assert_equal('http://localhost/index.html', @mech.history.last.uri.to_s)
419
+
420
+ [5, 6].each { |limit|
421
+ @mech.redirection_limit = limit
422
+ begin
423
+ @mech.get('http://localhost/tc_follow_meta_loop_1.html')
424
+ rescue => e
425
+ assert_instance_of Mechanize::RedirectLimitReachedError, e
426
+ assert_equal limit, e.redirects
427
+ if limit % 2 == 0
428
+ assert_equal '/tc_follow_meta_loop_1.html', e.page.uri.path
429
+ else
430
+ assert_equal '/tc_follow_meta_loop_2.html', e.page.uri.path
431
+ end
432
+ end
433
+ }
414
434
  end
415
435
 
416
436
  def test_get_follow_meta_refresh_anywhere
@@ -1009,6 +1029,40 @@ but not <a href="/" rel="me nofollow">this</a>!
1009
1029
  assert_equal URI('http://user:pass@localhost:8080'), http.proxy_uri
1010
1030
  end
1011
1031
 
1032
+ def test_shutdown
1033
+ uri = URI 'http://localhost'
1034
+ jar = Mechanize::CookieJar.new
1035
+ Mechanize::Cookie.parse uri, 'a=b' do |cookie|
1036
+ jar.add uri, cookie
1037
+ end
1038
+
1039
+ @mech.cookie_jar = jar
1040
+
1041
+ @mech.get("http://localhost/")
1042
+
1043
+ assert_match(/Hello World/, @mech.current_page.body)
1044
+ refute_empty @mech.cookies
1045
+ refute_empty Thread.current[@mech.agent.http.request_key]
1046
+
1047
+ @mech.shutdown
1048
+
1049
+ assert_nil Thread.current[@mech.agent.http.request_key]
1050
+ assert_empty @mech.history
1051
+ assert_empty @mech.cookies
1052
+ end
1053
+
1054
+ def test_start
1055
+ body, id = nil
1056
+
1057
+ Mechanize.start do |m|
1058
+ body = m.get("http://localhost/").body
1059
+ id = m.agent.http.request_key
1060
+ end
1061
+
1062
+ assert_match(/Hello World/, body)
1063
+ assert_nil Thread.current[id]
1064
+ end
1065
+
1012
1066
  def test_submit_bad_form_method
1013
1067
  page = @mech.get("http://localhost/bad_form_test.html")
1014
1068
  assert_raises ArgumentError do