mechanize 1.0.1.beta.20110107104205 → 2.0.pre.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (89) hide show
  1. data.tar.gz.sig +2 -0
  2. data/{lib/mechanize/chain/post_connect_hook.rb → .gemtest} +0 -0
  3. data/CHANGELOG.rdoc +51 -6
  4. data/EXAMPLES.rdoc +5 -3
  5. data/GUIDE.rdoc +72 -32
  6. data/LICENSE.rdoc +20 -340
  7. data/Manifest.txt +20 -27
  8. data/README.rdoc +12 -9
  9. data/Rakefile +5 -2
  10. data/examples/spider.rb +13 -2
  11. data/lib/mechanize.rb +545 -267
  12. data/lib/mechanize/content_type_error.rb +1 -1
  13. data/lib/mechanize/cookie.rb +72 -65
  14. data/lib/mechanize/cookie_jar.rb +197 -148
  15. data/lib/mechanize/element_matcher.rb +35 -0
  16. data/lib/mechanize/file.rb +3 -1
  17. data/lib/mechanize/file_connection.rb +17 -0
  18. data/lib/mechanize/file_request.rb +26 -0
  19. data/lib/mechanize/file_response.rb +61 -47
  20. data/lib/mechanize/form.rb +57 -58
  21. data/lib/mechanize/form/image_button.rb +2 -3
  22. data/lib/mechanize/form/multi_select_list.rb +71 -55
  23. data/lib/mechanize/form/select_list.rb +34 -62
  24. data/lib/mechanize/monkey_patch.rb +13 -11
  25. data/lib/mechanize/page.rb +277 -270
  26. data/lib/mechanize/page/image.rb +6 -2
  27. data/lib/mechanize/redirect_limit_reached_error.rb +1 -1
  28. data/lib/mechanize/redirect_not_get_or_head_error.rb +1 -1
  29. data/lib/mechanize/response_code_error.rb +3 -3
  30. data/lib/mechanize/unsupported_scheme_error.rb +1 -1
  31. data/lib/mechanize/uri_resolver.rb +82 -0
  32. data/lib/mechanize/util.rb +76 -60
  33. data/test/helper.rb +35 -5
  34. data/test/htdocs/dir with spaces/foo.html +1 -0
  35. data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
  36. data/test/htdocs/tc_base_images.html +10 -0
  37. data/test/htdocs/tc_images.html +8 -0
  38. data/test/htdocs/test_click.html +11 -0
  39. data/test/servlets.rb +3 -2
  40. data/test/test_authenticate.rb +5 -5
  41. data/test/test_errors.rb +8 -8
  42. data/test/test_follow_meta.rb +4 -4
  43. data/test/test_form_as_hash.rb +4 -4
  44. data/test/test_forms.rb +3 -7
  45. data/test/test_hash_api.rb +2 -2
  46. data/test/test_headers.rb +1 -1
  47. data/test/test_images.rb +19 -0
  48. data/test/test_mech.rb +6 -6
  49. data/test/test_mechanize.rb +687 -0
  50. data/test/{test_cookie_class.rb → test_mechanize_cookie.rb} +52 -45
  51. data/test/test_mechanize_cookie_jar.rb +400 -0
  52. data/test/test_mechanize_file.rb +7 -1
  53. data/test/test_mechanize_file_request.rb +19 -0
  54. data/test/test_mechanize_file_response.rb +21 -0
  55. data/test/test_mechanize_form_image_button.rb +12 -0
  56. data/test/test_mechanize_page.rb +165 -0
  57. data/test/test_mechanize_uri_resolver.rb +29 -0
  58. data/test/{test_util.rb → test_mechanize_util.rb} +1 -1
  59. data/test/test_multi_select.rb +12 -0
  60. data/test/test_post_form.rb +7 -0
  61. data/test/test_redirect_verb_handling.rb +6 -6
  62. data/test/test_scheme.rb +0 -7
  63. data/test/test_verbs.rb +3 -3
  64. metadata +106 -72
  65. metadata.gz.sig +0 -0
  66. data/lib/mechanize/chain.rb +0 -36
  67. data/lib/mechanize/chain/auth_headers.rb +0 -78
  68. data/lib/mechanize/chain/body_decoding_handler.rb +0 -50
  69. data/lib/mechanize/chain/connection_resolver.rb +0 -28
  70. data/lib/mechanize/chain/custom_headers.rb +0 -21
  71. data/lib/mechanize/chain/handler.rb +0 -9
  72. data/lib/mechanize/chain/header_resolver.rb +0 -48
  73. data/lib/mechanize/chain/parameter_resolver.rb +0 -22
  74. data/lib/mechanize/chain/pre_connect_hook.rb +0 -20
  75. data/lib/mechanize/chain/request_resolver.rb +0 -31
  76. data/lib/mechanize/chain/response_body_parser.rb +0 -36
  77. data/lib/mechanize/chain/response_header_handler.rb +0 -34
  78. data/lib/mechanize/chain/response_reader.rb +0 -39
  79. data/lib/mechanize/chain/ssl_resolver.rb +0 -40
  80. data/lib/mechanize/chain/uri_resolver.rb +0 -75
  81. data/test/chain/test_argument_validator.rb +0 -14
  82. data/test/chain/test_auth_headers.rb +0 -25
  83. data/test/chain/test_custom_headers.rb +0 -18
  84. data/test/chain/test_header_resolver.rb +0 -27
  85. data/test/chain/test_parameter_resolver.rb +0 -35
  86. data/test/chain/test_request_resolver.rb +0 -29
  87. data/test/chain/test_response_reader.rb +0 -24
  88. data/test/test_cookie_jar.rb +0 -324
  89. data/test/test_page.rb +0 -124
@@ -17,8 +17,12 @@ class Mechanize
17
17
  case src
18
18
  when %r{^https?://}
19
19
  src
20
- else
21
- (page.uri + src).to_s
20
+ else
21
+ if page.bases[0]
22
+ (page.bases[0].href + src).to_s
23
+ else
24
+ (page.uri + src).to_s
25
+ end
22
26
  end
23
27
  end
24
28
  end
@@ -1,6 +1,6 @@
1
1
  class Mechanize
2
2
  # Thrown when too many redirects are sent
3
- class RedirectLimitReachedError < RuntimeError
3
+ class RedirectLimitReachedError < Mechanize::Error
4
4
  attr_reader :page, :response_code, :redirects
5
5
  def initialize(page, redirects)
6
6
  @page = page
@@ -1,7 +1,7 @@
1
1
  class Mechanize
2
2
  # Thrown when a POST, PUT, or DELETE request results in a redirect
3
3
  # see RFC 2616 10.3.2, 10.3.3 http://www.ietf.org/rfc/rfc2616.txt
4
- class RedirectNotGetOrHeadError < RuntimeError
4
+ class RedirectNotGetOrHeadError < Mechanize::Error
5
5
  attr_reader :page, :response_code, :verb, :uri
6
6
  def initialize(page, verb)
7
7
  @page = page
@@ -4,17 +4,17 @@ class Mechanize
4
4
  # not know how to handle. Currently, this exception will be thrown
5
5
  # if Mechanize encounters response codes other than 200, 301, or 302.
6
6
  # Any other response code is up to the user to handle.
7
- class ResponseCodeError < RuntimeError
7
+ class ResponseCodeError < Mechanize::Error
8
8
  attr_reader :response_code
9
9
  attr_reader :page
10
10
 
11
11
  def initialize(page)
12
12
  @page = page
13
- @response_code = page.code
13
+ @response_code = page.code.to_s
14
14
  end
15
15
 
16
16
  def to_s
17
- "#{response_code} => #{Net::HTTPResponse::CODE_TO_OBJ[response_code]}"
17
+ "#{@response_code} => #{Net::HTTPResponse::CODE_TO_OBJ[@response_code]}"
18
18
  end
19
19
 
20
20
  def inspect; to_s; end
@@ -1,5 +1,5 @@
1
1
  class Mechanize
2
- class UnsupportedSchemeError < RuntimeError
2
+ class UnsupportedSchemeError < Mechanize::Error
3
3
  attr_accessor :scheme
4
4
  def initialize(scheme)
5
5
  @scheme = scheme
@@ -0,0 +1,82 @@
1
+ class Mechanize::URIResolver
2
+
3
+ attr_reader :scheme_handlers
4
+
5
+ def initialize
6
+ @scheme_handlers = Hash.new { |h, scheme|
7
+ h[scheme] = lambda { |link, page|
8
+ raise Mechanize::UnsupportedSchemeError, scheme
9
+ }
10
+ }
11
+
12
+ @scheme_handlers['http'] = lambda { |link, page| link }
13
+ @scheme_handlers['https'] = @scheme_handlers['http']
14
+ @scheme_handlers['relative'] = @scheme_handlers['http']
15
+ @scheme_handlers['file'] = @scheme_handlers['http']
16
+ end
17
+
18
+ def resolve uri, referer = nil
19
+ uri = uri.dup if uri.is_a?(URI)
20
+
21
+ unless uri.is_a?(URI)
22
+ uri = uri.to_s.strip.gsub(/[^#{0.chr}-#{126.chr}]/o) { |match|
23
+ if RUBY_VERSION >= "1.9.0"
24
+ Mechanize::Util.uri_escape(match)
25
+ else
26
+ sprintf('%%%X', match.unpack($KCODE == 'UTF8' ? 'U' : 'C')[0])
27
+ end
28
+ }
29
+
30
+ unescaped = uri.split(/(?:%[0-9A-Fa-f]{2})+|#/)
31
+ escaped = uri.scan(/(?:%[0-9A-Fa-f]{2})+|#/)
32
+
33
+ escaped_uri = Mechanize::Util.html_unescape(
34
+ unescaped.zip(escaped).map { |x,y|
35
+ "#{WEBrick::HTTPUtils.escape(x)}#{y}"
36
+ }.join('')
37
+ )
38
+
39
+ begin
40
+ uri = URI.parse(escaped_uri)
41
+ rescue
42
+ uri = URI.parse(WEBrick::HTTPUtils.escape(escaped_uri))
43
+ end
44
+ end
45
+
46
+ scheme = uri.relative? ? 'relative' : uri.scheme.downcase
47
+ uri = @scheme_handlers[scheme].call(uri, referer)
48
+
49
+ if referer && referer.uri
50
+ if uri.path.length == 0 && uri.relative?
51
+ uri.path = referer.uri.path
52
+ end
53
+ end
54
+
55
+ uri.path = '/' if uri.path.length == 0
56
+
57
+ if uri.relative?
58
+ raise ArgumentError, "absolute URL needed (not #{uri})" unless
59
+ referer && referer.uri
60
+
61
+ base = nil
62
+ if referer.respond_to?(:bases) && referer.parser
63
+ base = referer.bases.last
64
+ end
65
+
66
+ uri = ((base && base.uri && base.uri.absolute?) ?
67
+ base.uri :
68
+ referer.uri) + uri
69
+ uri = referer.uri + uri
70
+ # Strip initial "/.." bits from the path
71
+ uri.path.sub!(/^(\/\.\.)+(?=\/)/, '')
72
+ end
73
+
74
+ unless ['http', 'https', 'file'].include?(uri.scheme.downcase)
75
+ raise ArgumentError, "unsupported scheme: #{uri.scheme}"
76
+ end
77
+
78
+ uri
79
+ end
80
+
81
+ end
82
+
@@ -1,73 +1,89 @@
1
1
  require 'cgi'
2
2
 
3
- class Mechanize
4
- class Util
5
- CODE_DIC = {
6
- :JIS => "ISO-2022-JP",
7
- :EUC => "EUC-JP",
8
- :SJIS => "SHIFT_JIS",
9
- :UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"}
3
+ class Mechanize::Util
4
+ CODE_DIC = {
5
+ :JIS => "ISO-2022-JP",
6
+ :EUC => "EUC-JP",
7
+ :SJIS => "SHIFT_JIS",
8
+ :UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"}
10
9
 
11
- class << self
12
- def build_query_string(parameters, enc=nil)
13
- parameters.map { |k,v|
14
- # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
15
- [CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") if k
16
- }.compact.join('&')
17
- end
10
+ def self.build_query_string(parameters, enc=nil)
11
+ parameters.map { |k,v|
12
+ # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
13
+ [CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") if k
14
+ }.compact.join('&')
15
+ end
18
16
 
19
- def to_native_charset(s, code=nil)
20
- if Mechanize.html_parser == Nokogiri::HTML
21
- return unless s
22
- code ||= detect_charset(s)
23
- Iconv.iconv("UTF-8", code, s).join("")
24
- else
25
- s
26
- end
27
- end
17
+ def self.to_native_charset(s, code=nil)
18
+ if Mechanize.html_parser == Nokogiri::HTML
19
+ return unless s
20
+ code ||= detect_charset(s)
21
+ Iconv.iconv("UTF-8", code, s).join("")
22
+ else
23
+ s
24
+ end
25
+ end
28
26
 
29
- def from_native_charset(s, code)
30
- return s unless s && code
31
- return s unless Mechanize.html_parser == Nokogiri::HTML
27
+ def self.from_native_charset(s, code)
28
+ return s unless s && code
29
+ return s unless Mechanize.html_parser == Nokogiri::HTML
32
30
 
33
- if RUBY_VERSION < '1.9.2'
34
- begin
35
- Iconv.iconv(code.to_s, "UTF-8", s).join("")
36
- rescue Iconv::InvalidEncoding, Iconv::IllegalSequence
37
- s
38
- end
39
- else
40
- s.encode("UTF-8") rescue s
41
- end
31
+ if RUBY_VERSION < '1.9.2'
32
+ begin
33
+ Iconv.iconv(code.to_s, "UTF-8", s).join("")
34
+ rescue Iconv::InvalidEncoding, Iconv::IllegalSequence
35
+ s
42
36
  end
37
+ else
38
+ s.encode("UTF-8") rescue s
39
+ end
40
+ end
43
41
 
44
- def html_unescape(s)
45
- return s unless s
46
- s.gsub(/&(\w+|#[0-9]+);/) { |match|
47
- number = case match
48
- when /&(\w+);/
49
- Mechanize.html_parser::NamedCharacters[$1]
50
- when /&#([0-9]+);/
51
- $1.to_i
52
- end
53
-
54
- number ? ([number].pack('U') rescue match) : match
55
- }
56
- end
42
+ def self.html_unescape(s)
43
+ return s unless s
44
+ s.gsub(/&(\w+|#[0-9]+);/) { |match|
45
+ number = case match
46
+ when /&(\w+);/
47
+ Mechanize.html_parser::NamedCharacters[$1]
48
+ when /&#([0-9]+);/
49
+ $1.to_i
50
+ end
57
51
 
58
- def detect_charset(src)
59
- tmp = NKF.guess(src || "<html></html>")
60
- if RUBY_VERSION >= "1.9.0"
61
- enc = tmp.to_s.upcase
62
- else
63
- enc = NKF.constants.find{|c|
64
- NKF.const_get(c) == tmp
65
- }
66
- enc = CODE_DIC[enc.intern]
67
- end
68
- enc || "ISO-8859-1"
69
- end
52
+ number ? ([number].pack('U') rescue match) : match
53
+ }
54
+ end
70
55
 
56
+ def self.detect_charset(src)
57
+ tmp = NKF.guess(src || "<html></html>")
58
+ if RUBY_VERSION >= "1.9.0"
59
+ enc = tmp.to_s.upcase
60
+ else
61
+ enc = NKF.constants.find{|c|
62
+ NKF.const_get(c) == tmp
63
+ }
64
+ enc = CODE_DIC[enc.intern]
71
65
  end
66
+ enc || "ISO-8859-1"
67
+ end
68
+
69
+ def self.uri_escape str
70
+ @parser ||= begin
71
+ URI::Parser.new
72
+ rescue NameError
73
+ URI
74
+ end
75
+
76
+ @parser.escape str
72
77
  end
78
+
79
+ def self.uri_unescape str
80
+ @parser ||= begin
81
+ URI::Parser.new
82
+ rescue NameError
83
+ URI
84
+ end
85
+
86
+ @parser.unescape str
87
+ end
88
+
73
89
  end
@@ -3,6 +3,8 @@ require 'rubygems'
3
3
  require 'mechanize'
4
4
  require 'webrick/httputils'
5
5
  require 'servlets'
6
+ require 'tmpdir'
7
+ require 'tempfile'
6
8
 
7
9
  BASE_DIR = File.dirname(__FILE__)
8
10
 
@@ -65,7 +67,7 @@ class Net::HTTP
65
67
 
66
68
  path = '/index.html' if path == '/'
67
69
 
68
- res = Response.new
70
+ res = ::Response.new
69
71
  res.query_params = url.query
70
72
 
71
73
  request.query = if 'POST' != request.method && url.query then
@@ -95,14 +97,37 @@ class Net::HTTP
95
97
  end
96
98
 
97
99
  res['Content-Type'] ||= 'text/html'
98
- res['Content-Length'] ||= res.body.length.to_s
99
100
  res.code ||= "200"
100
101
 
102
+ response_klass = Net::HTTPResponse::CODE_TO_OBJ[res.code.to_s]
103
+ response = response_klass.new res.http_version, res.code, res.message
104
+
105
+ res.header.each do |k,v|
106
+ v = v.first if v.length == 1
107
+ response[k] = v
108
+ end
109
+
101
110
  res.cookies.each do |cookie|
102
- res.add_field('Set-Cookie', cookie.to_s)
111
+ response.add_field 'Set-Cookie', cookie.to_s
112
+ end
113
+
114
+ response['Content-Type'] ||= 'text/html'
115
+ response['Content-Length'] = res['Content-Length'] || res.body.length.to_s
116
+
117
+ io = StringIO.new(res.body)
118
+ response.instance_variable_set :@socket, io
119
+ def io.read clen, dest, _
120
+ dest << string[0, clen]
103
121
  end
104
- yield res if block_given?
105
- res
122
+
123
+ body_exist = request.response_body_permitted? &&
124
+ response_klass.body_permitted?
125
+
126
+ response.instance_variable_set :@body_exist, body_exist
127
+
128
+ yield response if block_given?
129
+
130
+ response
106
131
  end
107
132
  end
108
133
 
@@ -116,6 +141,7 @@ class Response
116
141
  attr_reader :code
117
142
  attr_accessor :body, :query, :cookies
118
143
  attr_accessor :query_params, :http_version
144
+ attr_accessor :header
119
145
 
120
146
  def code=(c)
121
147
  @code = c.to_s
@@ -136,4 +162,8 @@ class Response
136
162
  def read_body
137
163
  yield body
138
164
  end
165
+
166
+ def message
167
+ ''
168
+ end
139
169
  end
@@ -0,0 +1,27 @@
1
+ <html>
2
+ <head>
3
+ <meta name="csrf-param" content="authenticity_token"/>
4
+ <meta name="csrf-token" content="+6MKmkYpUcOC7ClPngk3FMTDL1Yc0cU1sS9800eeAPA="/>
5
+ </head>
6
+ <body>
7
+
8
+ <form accept-charset="UTF-8" action="/form_post" class="new_user_session" id="new_user_session" method="post">
9
+ <div style="margin:0;padding:0;display:inline">
10
+ <input name="utf8" type="hidden" value="&#x2713;" />
11
+ <input name="authenticity_token" type="hidden" value="+6MKmkYpUcOC7ClPngk3FMTDL1Yc0cU1sS9800eeAPA=" />
12
+ </div>
13
+ <div class='field'>
14
+ <label for="user_session_email">Email</label>
15
+ <input id="user_session_email" name="user_session[email]" size="30" type="text" />
16
+ </div>
17
+ <div class='field'>
18
+ <label for="user_session_password">Password</label>
19
+ <input id="user_session_password" name="user_session[password]" size="30" type="password" />
20
+ </div>
21
+ <div class='buttons'>
22
+ <input id="user_session_submit" name="commit" type="submit" value="Login" />
23
+ </div>
24
+ </form>
25
+
26
+ </body>
27
+ </html>
@@ -0,0 +1,10 @@
1
+ <html>
2
+ <head>
3
+ <base href="http://localhost/">
4
+ </head>
5
+ <body>
6
+ <img src="a.jpg">
7
+ <img src="b.gif">
8
+ </body>
9
+ </html>
10
+
@@ -0,0 +1,8 @@
1
+ <html>
2
+ <head>
3
+ </head>
4
+ <body>
5
+ <img src="a.jpg">
6
+ <img src="b.gif">
7
+ </body>
8
+ </html>
@@ -0,0 +1,11 @@
1
+ <html>
2
+ <head><title>Page Title</title></head>
3
+ <body>
4
+ <a href="/frame_test.html">This link is not called "A Link"</a>
5
+ <form method="get" action="/frame_test.html">
6
+ <input type="text" name="words" value="nil">
7
+ <input type="submit" value="A Button">
8
+ </form>
9
+ <a href="/index.html">A Link</a>
10
+ </body>
11
+ </html>
@@ -9,7 +9,7 @@ class VerbServlet < WEBrick::HTTPServlet::AbstractServlet
9
9
  %w(HEAD GET POST PUT DELETE).each do |verb|
10
10
  eval(<<-eomethod)
11
11
  def do_#{verb}(req, res)
12
- res.body = "method: #{verb}"
12
+ res.header['X-Request-Method'] = #{verb.dump}
13
13
  end
14
14
  eomethod
15
15
  end
@@ -129,7 +129,8 @@ class GzipServlet < WEBrick::HTTPServlet::AbstractServlet
129
129
  res['Content-Encoding'] = 'gzip'
130
130
  res['Content-Type'] = "text/html"
131
131
  else
132
- raise 'no gzip'
132
+ res.code = 400
133
+ res.body = 'no gzip'
133
134
  end
134
135
  end
135
136
  end