mechanize 1.0.1.beta.20110107104205 → 2.0.pre.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- data.tar.gz.sig +2 -0
- data/{lib/mechanize/chain/post_connect_hook.rb → .gemtest} +0 -0
- data/CHANGELOG.rdoc +51 -6
- data/EXAMPLES.rdoc +5 -3
- data/GUIDE.rdoc +72 -32
- data/LICENSE.rdoc +20 -340
- data/Manifest.txt +20 -27
- data/README.rdoc +12 -9
- data/Rakefile +5 -2
- data/examples/spider.rb +13 -2
- data/lib/mechanize.rb +545 -267
- data/lib/mechanize/content_type_error.rb +1 -1
- data/lib/mechanize/cookie.rb +72 -65
- data/lib/mechanize/cookie_jar.rb +197 -148
- data/lib/mechanize/element_matcher.rb +35 -0
- data/lib/mechanize/file.rb +3 -1
- data/lib/mechanize/file_connection.rb +17 -0
- data/lib/mechanize/file_request.rb +26 -0
- data/lib/mechanize/file_response.rb +61 -47
- data/lib/mechanize/form.rb +57 -58
- data/lib/mechanize/form/image_button.rb +2 -3
- data/lib/mechanize/form/multi_select_list.rb +71 -55
- data/lib/mechanize/form/select_list.rb +34 -62
- data/lib/mechanize/monkey_patch.rb +13 -11
- data/lib/mechanize/page.rb +277 -270
- data/lib/mechanize/page/image.rb +6 -2
- data/lib/mechanize/redirect_limit_reached_error.rb +1 -1
- data/lib/mechanize/redirect_not_get_or_head_error.rb +1 -1
- data/lib/mechanize/response_code_error.rb +3 -3
- data/lib/mechanize/unsupported_scheme_error.rb +1 -1
- data/lib/mechanize/uri_resolver.rb +82 -0
- data/lib/mechanize/util.rb +76 -60
- data/test/helper.rb +35 -5
- data/test/htdocs/dir with spaces/foo.html +1 -0
- data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
- data/test/htdocs/tc_base_images.html +10 -0
- data/test/htdocs/tc_images.html +8 -0
- data/test/htdocs/test_click.html +11 -0
- data/test/servlets.rb +3 -2
- data/test/test_authenticate.rb +5 -5
- data/test/test_errors.rb +8 -8
- data/test/test_follow_meta.rb +4 -4
- data/test/test_form_as_hash.rb +4 -4
- data/test/test_forms.rb +3 -7
- data/test/test_hash_api.rb +2 -2
- data/test/test_headers.rb +1 -1
- data/test/test_images.rb +19 -0
- data/test/test_mech.rb +6 -6
- data/test/test_mechanize.rb +687 -0
- data/test/{test_cookie_class.rb → test_mechanize_cookie.rb} +52 -45
- data/test/test_mechanize_cookie_jar.rb +400 -0
- data/test/test_mechanize_file.rb +7 -1
- data/test/test_mechanize_file_request.rb +19 -0
- data/test/test_mechanize_file_response.rb +21 -0
- data/test/test_mechanize_form_image_button.rb +12 -0
- data/test/test_mechanize_page.rb +165 -0
- data/test/test_mechanize_uri_resolver.rb +29 -0
- data/test/{test_util.rb → test_mechanize_util.rb} +1 -1
- data/test/test_multi_select.rb +12 -0
- data/test/test_post_form.rb +7 -0
- data/test/test_redirect_verb_handling.rb +6 -6
- data/test/test_scheme.rb +0 -7
- data/test/test_verbs.rb +3 -3
- metadata +106 -72
- metadata.gz.sig +0 -0
- data/lib/mechanize/chain.rb +0 -36
- data/lib/mechanize/chain/auth_headers.rb +0 -78
- data/lib/mechanize/chain/body_decoding_handler.rb +0 -50
- data/lib/mechanize/chain/connection_resolver.rb +0 -28
- data/lib/mechanize/chain/custom_headers.rb +0 -21
- data/lib/mechanize/chain/handler.rb +0 -9
- data/lib/mechanize/chain/header_resolver.rb +0 -48
- data/lib/mechanize/chain/parameter_resolver.rb +0 -22
- data/lib/mechanize/chain/pre_connect_hook.rb +0 -20
- data/lib/mechanize/chain/request_resolver.rb +0 -31
- data/lib/mechanize/chain/response_body_parser.rb +0 -36
- data/lib/mechanize/chain/response_header_handler.rb +0 -34
- data/lib/mechanize/chain/response_reader.rb +0 -39
- data/lib/mechanize/chain/ssl_resolver.rb +0 -40
- data/lib/mechanize/chain/uri_resolver.rb +0 -75
- data/test/chain/test_argument_validator.rb +0 -14
- data/test/chain/test_auth_headers.rb +0 -25
- data/test/chain/test_custom_headers.rb +0 -18
- data/test/chain/test_header_resolver.rb +0 -27
- data/test/chain/test_parameter_resolver.rb +0 -35
- data/test/chain/test_request_resolver.rb +0 -29
- data/test/chain/test_response_reader.rb +0 -24
- data/test/test_cookie_jar.rb +0 -324
- data/test/test_page.rb +0 -124
data/lib/mechanize/page/image.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
class Mechanize
|
2
2
|
# Thrown when a POST, PUT, or DELETE request results in a redirect
|
3
3
|
# see RFC 2616 10.3.2, 10.3.3 http://www.ietf.org/rfc/rfc2616.txt
|
4
|
-
class RedirectNotGetOrHeadError <
|
4
|
+
class RedirectNotGetOrHeadError < Mechanize::Error
|
5
5
|
attr_reader :page, :response_code, :verb, :uri
|
6
6
|
def initialize(page, verb)
|
7
7
|
@page = page
|
@@ -4,17 +4,17 @@ class Mechanize
|
|
4
4
|
# not know how to handle. Currently, this exception will be thrown
|
5
5
|
# if Mechanize encounters response codes other than 200, 301, or 302.
|
6
6
|
# Any other response code is up to the user to handle.
|
7
|
-
class ResponseCodeError <
|
7
|
+
class ResponseCodeError < Mechanize::Error
|
8
8
|
attr_reader :response_code
|
9
9
|
attr_reader :page
|
10
10
|
|
11
11
|
def initialize(page)
|
12
12
|
@page = page
|
13
|
-
@response_code = page.code
|
13
|
+
@response_code = page.code.to_s
|
14
14
|
end
|
15
15
|
|
16
16
|
def to_s
|
17
|
-
"#{response_code} => #{Net::HTTPResponse::CODE_TO_OBJ[response_code]}"
|
17
|
+
"#{@response_code} => #{Net::HTTPResponse::CODE_TO_OBJ[@response_code]}"
|
18
18
|
end
|
19
19
|
|
20
20
|
def inspect; to_s; end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
class Mechanize::URIResolver
|
2
|
+
|
3
|
+
attr_reader :scheme_handlers
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@scheme_handlers = Hash.new { |h, scheme|
|
7
|
+
h[scheme] = lambda { |link, page|
|
8
|
+
raise Mechanize::UnsupportedSchemeError, scheme
|
9
|
+
}
|
10
|
+
}
|
11
|
+
|
12
|
+
@scheme_handlers['http'] = lambda { |link, page| link }
|
13
|
+
@scheme_handlers['https'] = @scheme_handlers['http']
|
14
|
+
@scheme_handlers['relative'] = @scheme_handlers['http']
|
15
|
+
@scheme_handlers['file'] = @scheme_handlers['http']
|
16
|
+
end
|
17
|
+
|
18
|
+
def resolve uri, referer = nil
|
19
|
+
uri = uri.dup if uri.is_a?(URI)
|
20
|
+
|
21
|
+
unless uri.is_a?(URI)
|
22
|
+
uri = uri.to_s.strip.gsub(/[^#{0.chr}-#{126.chr}]/o) { |match|
|
23
|
+
if RUBY_VERSION >= "1.9.0"
|
24
|
+
Mechanize::Util.uri_escape(match)
|
25
|
+
else
|
26
|
+
sprintf('%%%X', match.unpack($KCODE == 'UTF8' ? 'U' : 'C')[0])
|
27
|
+
end
|
28
|
+
}
|
29
|
+
|
30
|
+
unescaped = uri.split(/(?:%[0-9A-Fa-f]{2})+|#/)
|
31
|
+
escaped = uri.scan(/(?:%[0-9A-Fa-f]{2})+|#/)
|
32
|
+
|
33
|
+
escaped_uri = Mechanize::Util.html_unescape(
|
34
|
+
unescaped.zip(escaped).map { |x,y|
|
35
|
+
"#{WEBrick::HTTPUtils.escape(x)}#{y}"
|
36
|
+
}.join('')
|
37
|
+
)
|
38
|
+
|
39
|
+
begin
|
40
|
+
uri = URI.parse(escaped_uri)
|
41
|
+
rescue
|
42
|
+
uri = URI.parse(WEBrick::HTTPUtils.escape(escaped_uri))
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
scheme = uri.relative? ? 'relative' : uri.scheme.downcase
|
47
|
+
uri = @scheme_handlers[scheme].call(uri, referer)
|
48
|
+
|
49
|
+
if referer && referer.uri
|
50
|
+
if uri.path.length == 0 && uri.relative?
|
51
|
+
uri.path = referer.uri.path
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
uri.path = '/' if uri.path.length == 0
|
56
|
+
|
57
|
+
if uri.relative?
|
58
|
+
raise ArgumentError, "absolute URL needed (not #{uri})" unless
|
59
|
+
referer && referer.uri
|
60
|
+
|
61
|
+
base = nil
|
62
|
+
if referer.respond_to?(:bases) && referer.parser
|
63
|
+
base = referer.bases.last
|
64
|
+
end
|
65
|
+
|
66
|
+
uri = ((base && base.uri && base.uri.absolute?) ?
|
67
|
+
base.uri :
|
68
|
+
referer.uri) + uri
|
69
|
+
uri = referer.uri + uri
|
70
|
+
# Strip initial "/.." bits from the path
|
71
|
+
uri.path.sub!(/^(\/\.\.)+(?=\/)/, '')
|
72
|
+
end
|
73
|
+
|
74
|
+
unless ['http', 'https', 'file'].include?(uri.scheme.downcase)
|
75
|
+
raise ArgumentError, "unsupported scheme: #{uri.scheme}"
|
76
|
+
end
|
77
|
+
|
78
|
+
uri
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
data/lib/mechanize/util.rb
CHANGED
@@ -1,73 +1,89 @@
|
|
1
1
|
require 'cgi'
|
2
2
|
|
3
|
-
class Mechanize
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
:UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"}
|
3
|
+
class Mechanize::Util
|
4
|
+
CODE_DIC = {
|
5
|
+
:JIS => "ISO-2022-JP",
|
6
|
+
:EUC => "EUC-JP",
|
7
|
+
:SJIS => "SHIFT_JIS",
|
8
|
+
:UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"}
|
10
9
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
end
|
10
|
+
def self.build_query_string(parameters, enc=nil)
|
11
|
+
parameters.map { |k,v|
|
12
|
+
# WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
|
13
|
+
[CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") if k
|
14
|
+
}.compact.join('&')
|
15
|
+
end
|
18
16
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
17
|
+
def self.to_native_charset(s, code=nil)
|
18
|
+
if Mechanize.html_parser == Nokogiri::HTML
|
19
|
+
return unless s
|
20
|
+
code ||= detect_charset(s)
|
21
|
+
Iconv.iconv("UTF-8", code, s).join("")
|
22
|
+
else
|
23
|
+
s
|
24
|
+
end
|
25
|
+
end
|
28
26
|
|
29
|
-
|
30
|
-
|
31
|
-
|
27
|
+
def self.from_native_charset(s, code)
|
28
|
+
return s unless s && code
|
29
|
+
return s unless Mechanize.html_parser == Nokogiri::HTML
|
32
30
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
end
|
39
|
-
else
|
40
|
-
s.encode("UTF-8") rescue s
|
41
|
-
end
|
31
|
+
if RUBY_VERSION < '1.9.2'
|
32
|
+
begin
|
33
|
+
Iconv.iconv(code.to_s, "UTF-8", s).join("")
|
34
|
+
rescue Iconv::InvalidEncoding, Iconv::IllegalSequence
|
35
|
+
s
|
42
36
|
end
|
37
|
+
else
|
38
|
+
s.encode("UTF-8") rescue s
|
39
|
+
end
|
40
|
+
end
|
43
41
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
number ? ([number].pack('U') rescue match) : match
|
55
|
-
}
|
56
|
-
end
|
42
|
+
def self.html_unescape(s)
|
43
|
+
return s unless s
|
44
|
+
s.gsub(/&(\w+|#[0-9]+);/) { |match|
|
45
|
+
number = case match
|
46
|
+
when /&(\w+);/
|
47
|
+
Mechanize.html_parser::NamedCharacters[$1]
|
48
|
+
when /&#([0-9]+);/
|
49
|
+
$1.to_i
|
50
|
+
end
|
57
51
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
enc = tmp.to_s.upcase
|
62
|
-
else
|
63
|
-
enc = NKF.constants.find{|c|
|
64
|
-
NKF.const_get(c) == tmp
|
65
|
-
}
|
66
|
-
enc = CODE_DIC[enc.intern]
|
67
|
-
end
|
68
|
-
enc || "ISO-8859-1"
|
69
|
-
end
|
52
|
+
number ? ([number].pack('U') rescue match) : match
|
53
|
+
}
|
54
|
+
end
|
70
55
|
|
56
|
+
def self.detect_charset(src)
|
57
|
+
tmp = NKF.guess(src || "<html></html>")
|
58
|
+
if RUBY_VERSION >= "1.9.0"
|
59
|
+
enc = tmp.to_s.upcase
|
60
|
+
else
|
61
|
+
enc = NKF.constants.find{|c|
|
62
|
+
NKF.const_get(c) == tmp
|
63
|
+
}
|
64
|
+
enc = CODE_DIC[enc.intern]
|
71
65
|
end
|
66
|
+
enc || "ISO-8859-1"
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.uri_escape str
|
70
|
+
@parser ||= begin
|
71
|
+
URI::Parser.new
|
72
|
+
rescue NameError
|
73
|
+
URI
|
74
|
+
end
|
75
|
+
|
76
|
+
@parser.escape str
|
72
77
|
end
|
78
|
+
|
79
|
+
def self.uri_unescape str
|
80
|
+
@parser ||= begin
|
81
|
+
URI::Parser.new
|
82
|
+
rescue NameError
|
83
|
+
URI
|
84
|
+
end
|
85
|
+
|
86
|
+
@parser.unescape str
|
87
|
+
end
|
88
|
+
|
73
89
|
end
|
data/test/helper.rb
CHANGED
@@ -3,6 +3,8 @@ require 'rubygems'
|
|
3
3
|
require 'mechanize'
|
4
4
|
require 'webrick/httputils'
|
5
5
|
require 'servlets'
|
6
|
+
require 'tmpdir'
|
7
|
+
require 'tempfile'
|
6
8
|
|
7
9
|
BASE_DIR = File.dirname(__FILE__)
|
8
10
|
|
@@ -65,7 +67,7 @@ class Net::HTTP
|
|
65
67
|
|
66
68
|
path = '/index.html' if path == '/'
|
67
69
|
|
68
|
-
res = Response.new
|
70
|
+
res = ::Response.new
|
69
71
|
res.query_params = url.query
|
70
72
|
|
71
73
|
request.query = if 'POST' != request.method && url.query then
|
@@ -95,14 +97,37 @@ class Net::HTTP
|
|
95
97
|
end
|
96
98
|
|
97
99
|
res['Content-Type'] ||= 'text/html'
|
98
|
-
res['Content-Length'] ||= res.body.length.to_s
|
99
100
|
res.code ||= "200"
|
100
101
|
|
102
|
+
response_klass = Net::HTTPResponse::CODE_TO_OBJ[res.code.to_s]
|
103
|
+
response = response_klass.new res.http_version, res.code, res.message
|
104
|
+
|
105
|
+
res.header.each do |k,v|
|
106
|
+
v = v.first if v.length == 1
|
107
|
+
response[k] = v
|
108
|
+
end
|
109
|
+
|
101
110
|
res.cookies.each do |cookie|
|
102
|
-
|
111
|
+
response.add_field 'Set-Cookie', cookie.to_s
|
112
|
+
end
|
113
|
+
|
114
|
+
response['Content-Type'] ||= 'text/html'
|
115
|
+
response['Content-Length'] = res['Content-Length'] || res.body.length.to_s
|
116
|
+
|
117
|
+
io = StringIO.new(res.body)
|
118
|
+
response.instance_variable_set :@socket, io
|
119
|
+
def io.read clen, dest, _
|
120
|
+
dest << string[0, clen]
|
103
121
|
end
|
104
|
-
|
105
|
-
|
122
|
+
|
123
|
+
body_exist = request.response_body_permitted? &&
|
124
|
+
response_klass.body_permitted?
|
125
|
+
|
126
|
+
response.instance_variable_set :@body_exist, body_exist
|
127
|
+
|
128
|
+
yield response if block_given?
|
129
|
+
|
130
|
+
response
|
106
131
|
end
|
107
132
|
end
|
108
133
|
|
@@ -116,6 +141,7 @@ class Response
|
|
116
141
|
attr_reader :code
|
117
142
|
attr_accessor :body, :query, :cookies
|
118
143
|
attr_accessor :query_params, :http_version
|
144
|
+
attr_accessor :header
|
119
145
|
|
120
146
|
def code=(c)
|
121
147
|
@code = c.to_s
|
@@ -136,4 +162,8 @@ class Response
|
|
136
162
|
def read_body
|
137
163
|
yield body
|
138
164
|
end
|
165
|
+
|
166
|
+
def message
|
167
|
+
''
|
168
|
+
end
|
139
169
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
HELLO
|
@@ -0,0 +1,27 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<meta name="csrf-param" content="authenticity_token"/>
|
4
|
+
<meta name="csrf-token" content="+6MKmkYpUcOC7ClPngk3FMTDL1Yc0cU1sS9800eeAPA="/>
|
5
|
+
</head>
|
6
|
+
<body>
|
7
|
+
|
8
|
+
<form accept-charset="UTF-8" action="/form_post" class="new_user_session" id="new_user_session" method="post">
|
9
|
+
<div style="margin:0;padding:0;display:inline">
|
10
|
+
<input name="utf8" type="hidden" value="✓" />
|
11
|
+
<input name="authenticity_token" type="hidden" value="+6MKmkYpUcOC7ClPngk3FMTDL1Yc0cU1sS9800eeAPA=" />
|
12
|
+
</div>
|
13
|
+
<div class='field'>
|
14
|
+
<label for="user_session_email">Email</label>
|
15
|
+
<input id="user_session_email" name="user_session[email]" size="30" type="text" />
|
16
|
+
</div>
|
17
|
+
<div class='field'>
|
18
|
+
<label for="user_session_password">Password</label>
|
19
|
+
<input id="user_session_password" name="user_session[password]" size="30" type="password" />
|
20
|
+
</div>
|
21
|
+
<div class='buttons'>
|
22
|
+
<input id="user_session_submit" name="commit" type="submit" value="Login" />
|
23
|
+
</div>
|
24
|
+
</form>
|
25
|
+
|
26
|
+
</body>
|
27
|
+
</html>
|
@@ -0,0 +1,11 @@
|
|
1
|
+
<html>
|
2
|
+
<head><title>Page Title</title></head>
|
3
|
+
<body>
|
4
|
+
<a href="/frame_test.html">This link is not called "A Link"</a>
|
5
|
+
<form method="get" action="/frame_test.html">
|
6
|
+
<input type="text" name="words" value="nil">
|
7
|
+
<input type="submit" value="A Button">
|
8
|
+
</form>
|
9
|
+
<a href="/index.html">A Link</a>
|
10
|
+
</body>
|
11
|
+
</html>
|
data/test/servlets.rb
CHANGED
@@ -9,7 +9,7 @@ class VerbServlet < WEBrick::HTTPServlet::AbstractServlet
|
|
9
9
|
%w(HEAD GET POST PUT DELETE).each do |verb|
|
10
10
|
eval(<<-eomethod)
|
11
11
|
def do_#{verb}(req, res)
|
12
|
-
res.
|
12
|
+
res.header['X-Request-Method'] = #{verb.dump}
|
13
13
|
end
|
14
14
|
eomethod
|
15
15
|
end
|
@@ -129,7 +129,8 @@ class GzipServlet < WEBrick::HTTPServlet::AbstractServlet
|
|
129
129
|
res['Content-Encoding'] = 'gzip'
|
130
130
|
res['Content-Type'] = "text/html"
|
131
131
|
else
|
132
|
-
|
132
|
+
res.code = 400
|
133
|
+
res.body = 'no gzip'
|
133
134
|
end
|
134
135
|
end
|
135
136
|
end
|