mechanize 2.7.3 → 2.7.4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +15 -0
  3. data/.travis.yml +5 -6
  4. data/CHANGELOG.rdoc +22 -0
  5. data/EXAMPLES.rdoc +1 -1
  6. data/Gemfile +3 -0
  7. data/Manifest.txt +0 -1
  8. data/README.rdoc +11 -17
  9. data/Rakefile +22 -38
  10. data/examples/{rubyforge.rb → rubygems.rb} +5 -4
  11. data/lib/mechanize.rb +72 -31
  12. data/lib/mechanize/directory_saver.rb +14 -2
  13. data/lib/mechanize/element_matcher.rb +24 -13
  14. data/lib/mechanize/file_response.rb +1 -3
  15. data/lib/mechanize/form.rb +8 -2
  16. data/lib/mechanize/http/agent.rb +38 -30
  17. data/lib/mechanize/http/auth_store.rb +2 -0
  18. data/lib/mechanize/http/www_authenticate_parser.rb +1 -1
  19. data/lib/mechanize/page.rb +162 -54
  20. data/lib/mechanize/page/image.rb +2 -0
  21. data/lib/mechanize/page/link.rb +5 -0
  22. data/lib/mechanize/pluggable_parsers.rb +13 -1
  23. data/lib/mechanize/test_case.rb +5 -0
  24. data/lib/mechanize/unsupported_scheme_error.rb +4 -2
  25. data/lib/mechanize/util.rb +88 -43
  26. data/lib/mechanize/version.rb +3 -0
  27. data/mechanize.gemspec +61 -0
  28. data/test/test_mechanize.rb +55 -41
  29. data/test/test_mechanize_form.rb +19 -0
  30. data/test/test_mechanize_form_encoding.rb +2 -7
  31. data/test/test_mechanize_http_agent.rb +61 -12
  32. data/test/test_mechanize_http_www_authenticate_parser.rb +8 -0
  33. data/test/test_mechanize_link.rb +14 -1
  34. data/test/test_mechanize_page.rb +53 -6
  35. data/test/test_mechanize_page_encoding.rb +2 -3
  36. data/test/test_mechanize_page_link.rb +17 -2
  37. data/test/test_mechanize_util.rb +45 -10
  38. metadata +147 -72
  39. data/.gemtest +0 -0
  40. data/lib/mechanize/monkey_patch.rb +0 -17
@@ -169,6 +169,8 @@ class Mechanize::Page::Image
169
169
  end
170
170
  end
171
171
 
172
+ alias uri url
173
+
172
174
  ##
173
175
  # The width attribute of the image
174
176
 
@@ -99,5 +99,10 @@ class Mechanize::Page::Link
99
99
  end
100
100
  end
101
101
 
102
+ # A fully resolved URI for the #href for this link.
103
+ def resolved_uri
104
+ @mech.resolve uri
105
+ end
106
+
102
107
  end
103
108
 
@@ -2,6 +2,7 @@ require 'mechanize/file'
2
2
  require 'mechanize/file_saver'
3
3
  require 'mechanize/page'
4
4
  require 'mechanize/xml_file'
5
+ require 'mime/types'
5
6
 
6
7
  ##
7
8
  # Mechanize allows different parsers for different content types. Mechanize
@@ -68,6 +69,15 @@ class Mechanize::PluggableParser
68
69
  :xml => ['text/xml', 'application/xml'],
69
70
  }
70
71
 
72
+ InvalidContentTypeError =
73
+ if defined?(MIME::Type::InvalidContentType)
74
+ # For mime-types >=2.1
75
+ MIME::Type::InvalidContentType
76
+ else
77
+ # For mime-types <2.1
78
+ MIME::InvalidContentType
79
+ end
80
+
71
81
  attr_accessor :default
72
82
 
73
83
  def initialize
@@ -97,9 +107,11 @@ class Mechanize::PluggableParser
97
107
 
98
108
  parser = @parsers[mime_type.to_s] ||
99
109
  @parsers[mime_type.simplified] ||
110
+ # Starting from mime-types 3.0 x-prefix is deprecated as per IANA
111
+ (@parsers[MIME::Type.simplified(mime_type.to_s, remove_x_prefix: true)] rescue nil) ||
100
112
  @parsers[mime_type.media_type] ||
101
113
  default
102
- rescue MIME::Type::InvalidContentType
114
+ rescue InvalidContentTypeError
103
115
  default
104
116
  end
105
117
 
@@ -14,6 +14,11 @@ end
14
14
 
15
15
  require 'minitest/autorun'
16
16
 
17
+ begin
18
+ require 'minitest/pride'
19
+ rescue LoadError
20
+ end
21
+
17
22
  ##
18
23
  # A generic test case for testing mechanize. Using a subclass of
19
24
  # Mechanize::TestCase for your tests will create an isolated mechanize
@@ -1,6 +1,8 @@
1
1
  class Mechanize::UnsupportedSchemeError < Mechanize::Error
2
- attr_accessor :scheme
3
- def initialize(scheme)
2
+ attr_accessor :scheme, :uri
3
+
4
+ def initialize(scheme, uri)
4
5
  @scheme = scheme
6
+ @uri = uri
5
7
  end
6
8
  end
@@ -2,36 +2,70 @@ require 'cgi'
2
2
  require 'nkf'
3
3
 
4
4
  class Mechanize::Util
5
- CODE_DIC = {
6
- NKF::JIS => "ISO-2022-JP",
7
- NKF::EUC => "EUC-JP",
8
- NKF::SJIS => "SHIFT_JIS",
9
- NKF::UTF8 => "UTF-8",
10
- NKF::UTF16 => "UTF-16",
11
- NKF::UTF32 => "UTF-32",
12
- }
13
-
14
- # true if RUBY_VERSION is 1.9.0 or later
15
- NEW_RUBY_ENCODING = RUBY_VERSION >= '1.9.0'
16
-
17
- # contains encoding error classes to raise
18
- ENCODING_ERRORS = if NEW_RUBY_ENCODING
19
- [EncodingError]
20
- else
21
- [Iconv::InvalidEncoding, Iconv::IllegalSequence]
22
- end
23
-
24
5
  # default mime type data for Page::Image#mime_type.
25
6
  # You can use another Apache-compatible mimetab.
26
7
  # mimetab = WEBrick::HTTPUtils.load_mime_types('/etc/mime.types')
27
8
  # Mechanize::Util::DefaultMimeTypes.replace(mimetab)
28
9
  DefaultMimeTypes = WEBrick::HTTPUtils::DefaultMimeTypes
29
10
 
30
- def self.build_query_string(parameters, enc = nil)
31
- parameters.map { |k,v|
32
- # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
33
- [CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") if k
34
- }.compact.join('&')
11
+ class << self
12
+ # Builds a query string from a given enumerable object
13
+ # +parameters+. This method uses Mechanize::Util.each_parameter
14
+ # as preprocessor, which see.
15
+ def build_query_string(parameters, enc = nil)
16
+ each_parameter(parameters).inject(nil) { |s, (k, v)|
17
+ # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
18
+ (s.nil? ? '' : s << '&') << [CGI.escape(k.to_s), CGI.escape(v.to_s)].join('=')
19
+ } || ''
20
+ end
21
+
22
+ # Parses an enumerable object +parameters+ and iterates over the
23
+ # key-value pairs it contains.
24
+ #
25
+ # +parameters+ may be a hash, or any enumerable object which
26
+ # iterates over [key, value] pairs, typically an array of arrays.
27
+ #
28
+ # If a key is paired with an array-like object, the pair is
29
+ # expanded into multiple occurrences of the key, one for each
30
+ # element of the array. e.g. { a: [1, 2] } => [:a, 1], [:a, 2]
31
+ #
32
+ # If a key is paired with a hash-like object, the pair is expanded
33
+ # into hash-like multiple pairs, one for each pair of the hash.
34
+ # e.g. { a: { x: 1, y: 2 } } => ['a[x]', 1], ['a[y]', 2]
35
+ #
36
+ # An array-like value is allowed to be specified as hash value.
37
+ # e.g. { a: { q: [1, 2] } } => ['a[q]', 1], ['a[q]', 2]
38
+ #
39
+ # For a non-array-like, non-hash-like value, the key-value pair is
40
+ # yielded as is.
41
+ def each_parameter(parameters, &block)
42
+ return to_enum(__method__, parameters) if block.nil?
43
+
44
+ parameters.each { |key, value|
45
+ each_parameter_1(key, value, &block)
46
+ }
47
+ end
48
+
49
+ private
50
+
51
+ def each_parameter_1(key, value, &block)
52
+ return if key.nil?
53
+
54
+ case
55
+ when s = String.try_convert(value)
56
+ yield [key, s]
57
+ when a = Array.try_convert(value)
58
+ a.each { |avalue|
59
+ yield [key, avalue]
60
+ }
61
+ when h = Hash.try_convert(value)
62
+ h.each { |hkey, hvalue|
63
+ each_parameter_1('%s[%s]' % [key, hkey], hvalue, &block)
64
+ }
65
+ else
66
+ yield [key, value]
67
+ end
68
+ end
35
69
  end
36
70
 
37
71
  # Converts string +s+ from +code+ to UTF-8.
@@ -40,8 +74,8 @@ class Mechanize::Util
40
74
  return s unless Mechanize.html_parser == Nokogiri::HTML
41
75
 
42
76
  begin
43
- encode_to(code, s)
44
- rescue *ENCODING_ERRORS => ex
77
+ s.encode(code)
78
+ rescue EncodingError => ex
45
79
  log.debug("from_native_charset: #{ex.class}: form encoding: #{code.inspect} string: #{s}") if log
46
80
  if ignore_encoding_error
47
81
  s
@@ -51,16 +85,6 @@ class Mechanize::Util
51
85
  end
52
86
  end
53
87
 
54
- # inner convert method of Util.from_native_charset
55
- def self.encode_to(encoding, str)
56
- if NEW_RUBY_ENCODING
57
- str.encode(encoding)
58
- else
59
- Iconv.conv(encoding.to_s, "UTF-8", str)
60
- end
61
- end
62
- private_class_method :encode_to
63
-
64
88
  def self.html_unescape(s)
65
89
  return s unless s
66
90
  s.gsub(/&(\w+|#[0-9]+);/) { |match|
@@ -75,15 +99,36 @@ class Mechanize::Util
75
99
  }
76
100
  end
77
101
 
102
+ case NKF::BINARY
103
+ when Encoding
104
+ def self.guess_encoding(src)
105
+ NKF.guess(src)
106
+ end
107
+ else
108
+ # Old NKF from 1.8, still bundled with JRuby and Rubinius
109
+ NKF_ENCODING_MAP = {
110
+ NKF::UNKNOWN => Encoding::US_ASCII,
111
+ NKF::BINARY => Encoding::ASCII_8BIT,
112
+ NKF::ASCII => Encoding::US_ASCII,
113
+ NKF::JIS => Encoding::ISO_2022_JP,
114
+ NKF::EUC => Encoding::EUC_JP,
115
+ NKF::SJIS => Encoding::Shift_JIS,
116
+ NKF::UTF8 => Encoding::UTF_8,
117
+ NKF::UTF16 => Encoding::UTF_16BE,
118
+ NKF::UTF32 => Encoding::UTF_32BE,
119
+ }
120
+
121
+ def self.guess_encoding(src)
122
+ NKF_ENCODING_MAP[NKF.guess(src)]
123
+ end
124
+ end
125
+
78
126
  def self.detect_charset(src)
79
- case enc = src && NKF.guess(src)
80
- when Integer
81
- # Ruby <= 1.8
82
- CODE_DIC[enc]
127
+ if src
128
+ guess_encoding(src).name.upcase
83
129
  else
84
- # Ruby >= 1.9
85
- enc && enc.to_s.upcase
86
- end || "ISO-8859-1"
130
+ Encoding::ISO8859_1.name
131
+ end
87
132
  end
88
133
 
89
134
  def self.uri_escape str, unsafe = nil
@@ -0,0 +1,3 @@
1
+ class Mechanize
2
+ VERSION = "2.7.4"
3
+ end
@@ -0,0 +1,61 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'mechanize/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "mechanize"
8
+ spec.version = Mechanize::VERSION
9
+ spec.homepage = "http://docs.seattlerb.org/mechanize/"
10
+ spec.summary = %q{The Mechanize library is used for automating interaction with websites}
11
+ spec.description =
12
+ [
13
+ "The Mechanize library is used for automating interaction with websites.",
14
+ "Mechanize automatically stores and sends cookies, follows redirects,",
15
+ "and can follow links and submit forms. Form fields can be populated and",
16
+ "submitted. Mechanize also keeps track of the sites that you have visited as",
17
+ "a history."
18
+ ].join("\n")
19
+
20
+ spec.authors =
21
+ [
22
+ 'Eric Hodel',
23
+ 'Aaron Patterson',
24
+ 'Mike Dalessio',
25
+ 'Akinori MUSHA',
26
+ 'Lee Jarvis'
27
+ ]
28
+ spec.email =
29
+ [
30
+ 'drbrain@segment7.net',
31
+ 'aaronp@rubyforge.org',
32
+ 'mike.dalessio@gmail.com',
33
+ 'knu@idaemons.org',
34
+ 'ljjarvis@gmail.com'
35
+ ]
36
+
37
+ spec.license = "MIT"
38
+
39
+ spec.require_paths = ["lib"]
40
+ spec.files = `git ls-files`.split($/)
41
+ spec.test_files = spec.files.grep(%r{^test/})
42
+
43
+ spec.extra_rdoc_files += Dir['*.rdoc']
44
+ spec.rdoc_options = ["--main", "README.rdoc"]
45
+
46
+ spec.required_ruby_version = ">= 1.9.2"
47
+
48
+ spec.add_runtime_dependency "net-http-digest_auth", [ ">= 1.1.1", "~> 1.1" ]
49
+ spec.add_runtime_dependency "net-http-persistent", [ ">= 2.5.2", "~> 2.5" ]
50
+ spec.add_runtime_dependency "mime-types", [ ">= 1.17.2", "< 3" ]
51
+ spec.add_runtime_dependency "http-cookie", [ "~> 1.0" ]
52
+ spec.add_runtime_dependency "nokogiri", [ "~> 1.6" ]
53
+ spec.add_runtime_dependency "ntlm-http", [ ">= 0.1.1", "~> 0.1" ]
54
+ spec.add_runtime_dependency "webrobots", [ "< 0.2", ">= 0.0.9" ]
55
+ spec.add_runtime_dependency "domain_name", [ ">= 0.5.1", "~> 0.5" ]
56
+
57
+ spec.add_development_dependency "rake"
58
+ spec.add_development_dependency "bundler", "~> 1.3"
59
+ spec.add_development_dependency "rdoc", "~> 4.0"
60
+ spec.add_development_dependency "minitest", "~> 5.0"
61
+ end
@@ -13,12 +13,6 @@ class TestMechanize < Mechanize::TestCase
13
13
  @res = Net::HTTPOK.allocate
14
14
  @res.instance_variable_set :@code, 200
15
15
  @res.instance_variable_set :@header, {}
16
-
17
- @headers = if RUBY_VERSION > '1.9' then
18
- %w[accept user-agent]
19
- else
20
- %w[accept]
21
- end
22
16
  end
23
17
 
24
18
  def test_back
@@ -112,6 +106,22 @@ class TestMechanize < Mechanize::TestCase
112
106
  assert_equal '/index.html', requests.first.path
113
107
  end
114
108
 
109
+ def test_click_image_button
110
+ page = @mech.get("http://localhost/form_test.html")
111
+ get_form = page.forms.find { |f| f.name == "get_form1" }
112
+ image_button = get_form.buttons.first
113
+ new_page = @mech.click(image_button)
114
+ assert_equal "http://localhost/form_post?first_name=&button.x=0&button.y=0", new_page.uri.to_s
115
+ end
116
+
117
+ def test_click_submit_button
118
+ page = @mech.get("http://localhost/form_test.html")
119
+ get_form = page.forms.find { |f| f.name == "get_form1" }
120
+ submit_button = get_form.submits.first
121
+ new_page = @mech.click(submit_button)
122
+ assert_equal "http://localhost/form_post?first_name=", new_page.uri.to_s
123
+ end
124
+
115
125
  def test_click_frame
116
126
  frame = node 'frame', 'src' => '/index.html'
117
127
  frame = Mechanize::Page::Frame.new frame, @mech, fake_page
@@ -433,10 +443,6 @@ but not <a href="/" rel="me nofollow">this</a>!
433
443
 
434
444
  def test_get_follow_meta_refresh_anywhere
435
445
  @mech.follow_meta_refresh = :anywhere
436
- requests = []
437
- @mech.pre_connect_hooks << lambda { |_, request|
438
- requests << request
439
- }
440
446
 
441
447
  @mech.get('http://localhost/tc_meta_in_body.html')
442
448
  assert_equal 2, requests.length
@@ -465,10 +471,6 @@ but not <a href="/" rel="me nofollow">this</a>!
465
471
 
466
472
  def test_get_follow_meta_refresh_in_body
467
473
  @mech.follow_meta_refresh = true
468
- requests = []
469
- @mech.pre_connect_hooks << lambda { |_, request|
470
- requests << request
471
- }
472
474
 
473
475
  @mech.get('http://localhost/tc_meta_in_body.html')
474
476
  assert_equal 1, requests.length
@@ -492,12 +494,6 @@ but not <a href="/" rel="me nofollow">this</a>!
492
494
  def test_get_follow_meta_refresh_referer_not_sent
493
495
  @mech.follow_meta_refresh = true
494
496
 
495
- requests = []
496
-
497
- @mech.pre_connect_hooks << lambda { |_, request|
498
- requests << request
499
- }
500
-
501
497
  @mech.get('http://localhost/tc_follow_meta.html')
502
498
 
503
499
  assert_equal 2, @mech.history.length
@@ -578,12 +574,6 @@ but not <a href="/" rel="me nofollow">this</a>!
578
574
  def test_get_http_refresh
579
575
  @mech.follow_meta_refresh = true
580
576
 
581
- requests = []
582
-
583
- @mech.pre_connect_hooks << lambda { |_, request|
584
- requests << request
585
- }
586
-
587
577
  page = @mech.get('http://example/http_refresh?refresh_time=0')
588
578
 
589
579
  assert_equal('http://example/', page.uri.to_s)
@@ -609,18 +599,15 @@ but not <a href="/" rel="me nofollow">this</a>!
609
599
  assert_equal('http://localhost/http_refresh?refresh_time=0', page.uri.to_s)
610
600
  end
611
601
 
612
- def test_get_kcode
613
- $KCODE = 'u'
614
- page = @mech.get("http://localhost/?a=#{[0xd6].pack('U')}")
615
-
616
- assert_equal('http://localhost/?a=%D6', page.uri.to_s)
617
-
618
- $KCODE = 'NONE'
619
- end unless RUBY_VERSION >= '1.9.0'
620
-
621
602
  def test_get_query
622
603
  page = @mech.get('http://localhost/', { :q => 'hello' })
623
604
  assert_equal('http://localhost/?q=hello', page.uri.to_s)
605
+
606
+ page = @mech.get('http://localhost/', { :q => %w[hello world]})
607
+ assert_equal('http://localhost/?q=hello&q=world', page.uri.to_s)
608
+
609
+ page = @mech.get('http://localhost/', { :paging => { start: 1, limit: 25 } })
610
+ assert_equal('http://localhost/?paging%5Bstart%5D=1&paging%5Blimit%5D=25', page.uri.to_s)
624
611
  end
625
612
 
626
613
  def test_get_redirect
@@ -665,11 +652,6 @@ but not <a href="/" rel="me nofollow">this</a>!
665
652
  end
666
653
 
667
654
  def test_get_referer_none
668
- requests = []
669
- @mech.pre_connect_hooks << lambda { |_, request|
670
- requests << request
671
- }
672
-
673
655
  @mech.get('http://localhost/')
674
656
  @mech.get('http://localhost/')
675
657
  assert_equal(2, requests.length)
@@ -680,7 +662,13 @@ but not <a href="/" rel="me nofollow">this</a>!
680
662
 
681
663
  def test_get_scheme_unsupported
682
664
  assert_raises Mechanize::UnsupportedSchemeError do
683
- @mech.get('ftp://server.com/foo.html')
665
+ begin
666
+ @mech.get('ftp://server.com/foo.html')
667
+ rescue Mechanize::UnsupportedSchemeError => error
668
+ assert_equal 'ftp', error.scheme
669
+ assert_equal 'ftp://server.com/foo.html', error.uri.to_s
670
+ raise
671
+ end
684
672
  end
685
673
  end
686
674
 
@@ -942,6 +930,25 @@ but not <a href="/" rel="me nofollow">this</a>!
942
930
  assert page.body.length > File.read(__FILE__).length
943
931
  end
944
932
 
933
+ def test_post_file_upload
934
+ name = File.basename(__FILE__)
935
+ file_upload = Mechanize::Form::FileUpload.new({'name' => 'userfile1'}, name)
936
+ file_upload.file_data = File.read(__FILE__)
937
+ file_upload.mime_type = 'application/zip'
938
+
939
+ page = @mech.post('http://localhost/file_upload', {
940
+ :name => 'Some file',
941
+ :userfile1 => file_upload
942
+ })
943
+
944
+ assert_match(
945
+ "Content-Disposition: form-data; name=\"userfile1\"; filename=\"#{name}\"",
946
+ page.body
947
+ )
948
+ assert_match("Content-Type: application/zip", page.body)
949
+ assert page.body.length > File.read(__FILE__).length
950
+ end
951
+
945
952
  def test_post_redirect
946
953
  page = @mech.post('http://localhost/redirect')
947
954
 
@@ -970,6 +977,13 @@ but not <a href="/" rel="me nofollow">this</a>!
970
977
  assert_equal 5, @mech.read_timeout
971
978
  end
972
979
 
980
+ def test_timeouts_for_file_connection
981
+ uri = URI.parse "file://#{File.expand_path __FILE__}"
982
+ @mech.read_timeout = 5
983
+ @mech.open_timeout = 5
984
+ assert @mech.get(uri)
985
+ end
986
+
973
987
  def test_referer
974
988
  host_path = "localhost/tc_referer.html?t=1"
975
989
  ['http', 'https'].each { |proto|