mechanize 1.0.0 → 1.0.1.beta.20110107104205
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- data/CHANGELOG.rdoc +19 -0
- data/EXAMPLES.rdoc +13 -13
- data/Manifest.txt +0 -1
- data/Rakefile +4 -9
- data/lib/mechanize/chain/body_decoding_handler.rb +2 -0
- data/lib/mechanize/chain/connection_resolver.rb +6 -55
- data/lib/mechanize/chain/header_resolver.rb +3 -12
- data/lib/mechanize/chain/parameter_resolver.rb +2 -2
- data/lib/mechanize/chain/request_resolver.rb +1 -0
- data/lib/mechanize/chain/response_body_parser.rb +4 -6
- data/lib/mechanize/chain/response_header_handler.rb +1 -15
- data/lib/mechanize/chain/uri_resolver.rb +2 -2
- data/lib/mechanize/chain.rb +4 -1
- data/lib/mechanize/cookie.rb +1 -1
- data/lib/mechanize/file.rb +7 -0
- data/lib/mechanize/form/field.rb +6 -0
- data/lib/mechanize/form.rb +16 -4
- data/lib/mechanize/headers.rb +14 -0
- data/lib/mechanize/page/link.rb +7 -1
- data/lib/mechanize/page/meta.rb +3 -5
- data/lib/mechanize/page.rb +17 -4
- data/lib/mechanize/util.rb +8 -4
- data/lib/mechanize.rb +82 -56
- data/test/chain/test_header_resolver.rb +0 -2
- data/test/chain/test_parameter_resolver.rb +1 -1
- data/test/helper.rb +20 -13
- data/test/htdocs/form_multival.html +2 -2
- data/test/htdocs/form_test.html +2 -2
- data/test/htdocs/tc_links.html +1 -1
- data/test/htdocs/tc_referer.html +2 -0
- data/test/servlets.rb +41 -5
- data/test/test_authenticate.rb +3 -3
- data/test/test_cookies.rb +10 -4
- data/test/test_errors.rb +1 -1
- data/test/test_field_precedence.rb +4 -1
- data/test/test_follow_meta.rb +3 -3
- data/test/test_form_button.rb +8 -0
- data/test/test_forms.rb +63 -42
- data/test/test_gzipping.rb +1 -1
- data/test/test_headers.rb +33 -0
- data/test/test_history.rb +2 -2
- data/test/test_history_added.rb +1 -1
- data/test/test_links.rb +9 -0
- data/test/test_mech.rb +22 -7
- data/test/test_mech_proxy.rb +4 -4
- data/test/test_meta.rb +5 -3
- data/test/test_no_attributes.rb +1 -1
- data/test/test_referer.rb +21 -3
- data/test/test_relative_links.rb +4 -4
- data/test/test_response_code.rb +1 -1
- data/test/test_scheme.rb +7 -0
- metadata +61 -18
- data/test/test_keep_alive.rb +0 -31
data/CHANGELOG.rdoc
CHANGED
@@ -1,5 +1,24 @@
|
|
1
1
|
= Mechanize CHANGELOG
|
2
2
|
|
3
|
+
=== HEAD
|
4
|
+
|
5
|
+
* New Features
|
6
|
+
|
7
|
+
* Add header reference methods to Mechanize::File so that a reponse
|
8
|
+
object gets compatible with Net::HTTPResponse.
|
9
|
+
* Mechanize#click accepts a regexp or string to click a button/link
|
10
|
+
in the current page. It works as expected when not passed a
|
11
|
+
string or regexp.
|
12
|
+
|
13
|
+
* Provide a way to only follow permanent redirects (301)
|
14
|
+
automatically: agent.redirect_ok = :permanent GH #73
|
15
|
+
|
16
|
+
* Bug Fixes:
|
17
|
+
|
18
|
+
* Fixed a bug where Referer is not sent when accessing a relative
|
19
|
+
URI starting with "http".
|
20
|
+
* Fix handling of Meta Refresh with relative paths. GH #39
|
21
|
+
|
3
22
|
=== 1.0.0
|
4
23
|
|
5
24
|
* New Features:
|
data/EXAMPLES.rdoc
CHANGED
@@ -8,16 +8,16 @@ is the same as { ... }.submit.
|
|
8
8
|
|
9
9
|
require 'rubygems'
|
10
10
|
require 'mechanize'
|
11
|
-
|
11
|
+
|
12
12
|
a = Mechanize.new { |agent|
|
13
13
|
agent.user_agent_alias = 'Mac Safari'
|
14
14
|
}
|
15
|
-
|
15
|
+
|
16
16
|
a.get('http://google.com/') do |page|
|
17
17
|
search_result = page.form_with(:name => 'f') do |search|
|
18
18
|
search.q = 'Hello world'
|
19
19
|
end.submit
|
20
|
-
|
20
|
+
|
21
21
|
search_result.links.each do |link|
|
22
22
|
puts link.text
|
23
23
|
end
|
@@ -32,13 +32,13 @@ is the same as { ... }.submit.
|
|
32
32
|
a.get('http://rubyforge.org/') do |page|
|
33
33
|
# Click the login link
|
34
34
|
login_page = a.click(page.link_with(:text => /Log In/))
|
35
|
-
|
35
|
+
|
36
36
|
# Submit the login form
|
37
37
|
my_page = login_page.form_with(:action => '/account/login.php') do |f|
|
38
38
|
f.form_loginname = ARGV[0]
|
39
39
|
f.form_pw = ARGV[1]
|
40
40
|
end.click_button
|
41
|
-
|
41
|
+
|
42
42
|
my_page.links.each do |link|
|
43
43
|
text = link.text.strip
|
44
44
|
next unless text.length > 0
|
@@ -48,7 +48,7 @@ is the same as { ... }.submit.
|
|
48
48
|
|
49
49
|
== File Upload
|
50
50
|
Upload a file to flickr.
|
51
|
-
|
51
|
+
|
52
52
|
require 'rubygems'
|
53
53
|
require 'mechanize'
|
54
54
|
|
@@ -106,7 +106,7 @@ Beautiful Soup for that page.
|
|
106
106
|
|
107
107
|
require 'rubygems'
|
108
108
|
require 'mechanize'
|
109
|
-
|
109
|
+
|
110
110
|
agent = Mechanize.new
|
111
111
|
agent.set_proxy('localhost', '8000')
|
112
112
|
page = agent.get(ARGV[0])
|
@@ -155,8 +155,8 @@ This example also demonstrates subclassing Mechanize.
|
|
155
155
|
|
156
156
|
In most cases a client certificate is created as an additional layer of security
|
157
157
|
for certain websites. The specific case that this was initially tested on was
|
158
|
-
for automating the download of archived images from a banks (Wachovia) lockbox
|
159
|
-
system. Once the certificate is installed into your browser you will have to
|
158
|
+
for automating the download of archived images from a banks (Wachovia) lockbox
|
159
|
+
system. Once the certificate is installed into your browser you will have to
|
160
160
|
export it and split the certificate and private key into separate files. Exported
|
161
161
|
files are usually in .p12 format (IE 7 & Firefox 2.0) which stands for PKCS #12.
|
162
162
|
You can convert them from p12 to pem format by using the following commands:
|
@@ -169,17 +169,17 @@ openssl.exe pkcs12 -in input_file.p12 -clcerts -out example.cer -nokeys
|
|
169
169
|
|
170
170
|
# create Mechanize instance
|
171
171
|
agent = Mechanize.new
|
172
|
-
|
172
|
+
|
173
173
|
# set the path of the certificate file
|
174
174
|
agent.cert = 'example.cer'
|
175
|
-
|
175
|
+
|
176
176
|
# set the path of the private key file
|
177
177
|
agent.key = 'example.key'
|
178
178
|
|
179
179
|
# get the login form & fill it out with the username/password
|
180
|
-
login_form =
|
180
|
+
login_form = agent.get("http://example.com/login_page").form('Login')
|
181
181
|
login_form.Userid = 'TestUser'
|
182
182
|
login_form.Password = 'TestPassword'
|
183
|
-
|
183
|
+
|
184
184
|
# submit login form
|
185
185
|
agent.submit(login_form, login_form.buttons.first)
|
data/Manifest.txt
CHANGED
data/Rakefile
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'hoe'
|
3
|
+
Hoe.plugin :bundler
|
4
|
+
Hoe.plugin :gemspec
|
5
|
+
Hoe.plugin :git
|
3
6
|
|
4
7
|
Hoe.spec 'mechanize' do
|
5
8
|
developer 'Aaron Patterson', 'aaronp@rubyforge.org'
|
@@ -9,6 +12,7 @@ Hoe.spec 'mechanize' do
|
|
9
12
|
self.history_file = 'CHANGELOG.rdoc'
|
10
13
|
self.extra_rdoc_files += Dir['*.rdoc']
|
11
14
|
self.extra_deps << ['nokogiri', '>= 1.2.1']
|
15
|
+
self.extra_deps << ['net-http-persistent', '~> 1.1']
|
12
16
|
end
|
13
17
|
|
14
18
|
desc "Update SSL Certificate"
|
@@ -22,12 +26,3 @@ task('ssl_cert') do |p|
|
|
22
26
|
sh "mv server.key server.csr server.crt server.pem test/data/"
|
23
27
|
sh "rm server.key.org"
|
24
28
|
end
|
25
|
-
|
26
|
-
desc 'Generate a gem spec'
|
27
|
-
task "gem:spec" do
|
28
|
-
File.open("mechanize.gemspec", 'w') do |f|
|
29
|
-
now = Time.now.strftime("%Y%m%d%H%M%S")
|
30
|
-
f.write `rake debug_gem`.sub(/(s.version = ".*)(")/) { "#{$1}.#{now}#{$2}" }
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
@@ -3,73 +3,24 @@ class Mechanize
|
|
3
3
|
class ConnectionResolver
|
4
4
|
include Mechanize::Handler
|
5
5
|
|
6
|
-
def initialize( connection_cache,
|
7
|
-
keep_alive,
|
8
|
-
proxy_addr,
|
9
|
-
proxy_port,
|
10
|
-
proxy_user,
|
11
|
-
proxy_pass )
|
12
|
-
|
13
|
-
@connection_cache = connection_cache
|
14
|
-
@keep_alive = keep_alive
|
15
|
-
@proxy_addr = proxy_addr
|
16
|
-
@proxy_port = proxy_port
|
17
|
-
@proxy_user = proxy_user
|
18
|
-
@proxy_pass = proxy_pass
|
19
|
-
end
|
20
|
-
|
21
6
|
def handle(ctx, params)
|
22
7
|
uri = params[:uri]
|
23
8
|
http_obj = nil
|
24
9
|
|
25
10
|
case uri.scheme.downcase
|
26
|
-
when 'http', 'https'
|
27
|
-
|
28
|
-
|
29
|
-
:keep_alive_options => {},
|
30
|
-
})
|
31
|
-
http_obj = cache_obj[:connection]
|
32
|
-
if http_obj.nil? || ! http_obj.started?
|
33
|
-
http_obj = cache_obj[:connection] =
|
34
|
-
Net::HTTP.new( uri.host,
|
35
|
-
uri.port,
|
36
|
-
@proxy_addr,
|
37
|
-
@proxy_port,
|
38
|
-
@proxy_user,
|
39
|
-
@proxy_pass
|
40
|
-
)
|
41
|
-
cache_obj[:keep_alive_options] = {}
|
42
|
-
end
|
43
|
-
|
44
|
-
# If we're keeping connections alive and the last request time is too
|
45
|
-
# long ago, stop the connection. Or, if the max requests left is 1,
|
46
|
-
# reset the connection.
|
47
|
-
if @keep_alive && http_obj.started?
|
48
|
-
opts = cache_obj[:keep_alive_options]
|
49
|
-
if((opts[:timeout] &&
|
50
|
-
Time.now.to_i - cache_obj[:last_request_time] > opts[:timeout].to_i) ||
|
51
|
-
opts[:max] && opts[:max].to_i == 1)
|
52
|
-
|
53
|
-
Mechanize.log.debug('Finishing stale connection') if Mechanize.log
|
54
|
-
http_obj.finish
|
55
|
-
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
cache_obj[:last_request_time] = Time.now.to_i
|
60
|
-
when 'file'
|
11
|
+
when 'http', 'https' then
|
12
|
+
http_obj = ctx.http
|
13
|
+
when 'file' then
|
61
14
|
http_obj = Object.new
|
62
15
|
class << http_obj
|
63
|
-
def
|
64
|
-
|
65
|
-
response = FileResponse.new(request.uri.path)
|
66
|
-
yield response
|
16
|
+
def request(uri, request)
|
17
|
+
yield FileResponse.new(CGI.unescape(uri.path))
|
67
18
|
end
|
68
19
|
end
|
69
20
|
end
|
70
21
|
|
71
|
-
http_obj.extend(Mutex_m)
|
72
22
|
params[:connection] = http_obj
|
23
|
+
|
73
24
|
super
|
74
25
|
end
|
75
26
|
end
|
@@ -2,10 +2,7 @@ class Mechanize
|
|
2
2
|
class Chain
|
3
3
|
class HeaderResolver
|
4
4
|
include Mechanize::Handler
|
5
|
-
def initialize(
|
6
|
-
gzip_enabled, headers)
|
7
|
-
@keep_alive = keep_alive
|
8
|
-
@keep_alive_time = keep_alive_time
|
5
|
+
def initialize(cookie_jar, user_agent, gzip_enabled, headers)
|
9
6
|
@cookie_jar = cookie_jar
|
10
7
|
@user_agent = user_agent
|
11
8
|
@gzip_enabled = gzip_enabled
|
@@ -17,12 +14,6 @@ class Mechanize
|
|
17
14
|
referer = params[:referer]
|
18
15
|
request = params[:request]
|
19
16
|
|
20
|
-
if @keep_alive
|
21
|
-
request['Connection'] = 'keep-alive'
|
22
|
-
request['Keep-Alive'] = @keep_alive_time.to_s
|
23
|
-
else
|
24
|
-
request['Connection'] = 'close'
|
25
|
-
end
|
26
17
|
if @gzip_enabled
|
27
18
|
request['Accept-Encoding'] = 'gzip,identity'
|
28
19
|
else
|
@@ -39,8 +30,8 @@ class Mechanize
|
|
39
30
|
request.add_field('Cookie', cookie)
|
40
31
|
end
|
41
32
|
|
42
|
-
# Add Referer header to request
|
43
|
-
if referer && referer.uri
|
33
|
+
# Add Referer header to request except https => http
|
34
|
+
if referer && referer.uri && (!(URI::HTTPS === referer.uri) or URI::HTTPS === uri)
|
44
35
|
request['Referer'] = referer.uri.to_s
|
45
36
|
end
|
46
37
|
|
@@ -8,12 +8,12 @@ class Mechanize
|
|
8
8
|
uri = params[:uri]
|
9
9
|
case params[:verb]
|
10
10
|
when :head, :get, :delete, :trace
|
11
|
-
if parameters.length > 0
|
11
|
+
if parameters and parameters.length > 0
|
12
12
|
uri.query ||= ''
|
13
13
|
uri.query << '&' if uri.query.length > 0
|
14
14
|
uri.query << Util.build_query_string(parameters)
|
15
15
|
end
|
16
|
-
params[:params] =
|
16
|
+
params[:params] = nil
|
17
17
|
end
|
18
18
|
super
|
19
19
|
end
|
@@ -8,6 +8,7 @@ class Mechanize
|
|
8
8
|
if %w{ http https }.include?(uri.scheme.downcase)
|
9
9
|
klass = Net::HTTP.const_get(params[:verb].to_s.capitalize)
|
10
10
|
params[:request] ||= klass.new(uri.request_uri)
|
11
|
+
params[:request].body = params[:params].first if params[:params]
|
11
12
|
end
|
12
13
|
|
13
14
|
if %w{ file }.include?(uri.scheme.downcase)
|
@@ -20,17 +20,15 @@ class Mechanize
|
|
20
20
|
end
|
21
21
|
|
22
22
|
# Find our pluggable parser
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
response_body,
|
27
|
-
response.code
|
28
|
-
) { |parser|
|
23
|
+
parser_klass = @pluggable_parser.parser(content_type)
|
24
|
+
params[:page] = parser_klass.new(uri, response, response_body,
|
25
|
+
response.code) { |parser|
|
29
26
|
parser.mech = params[:agent] if parser.respond_to? :mech=
|
30
27
|
if parser.respond_to?(:watch_for_set=) && @watch_for_set
|
31
28
|
parser.watch_for_set = @watch_for_set
|
32
29
|
end
|
33
30
|
}
|
31
|
+
|
34
32
|
super
|
35
33
|
end
|
36
34
|
end
|
@@ -3,28 +3,14 @@ class Mechanize
|
|
3
3
|
class ResponseHeaderHandler
|
4
4
|
include Mechanize::Handler
|
5
5
|
|
6
|
-
def initialize(cookie_jar
|
6
|
+
def initialize(cookie_jar)
|
7
7
|
@cookie_jar = cookie_jar
|
8
|
-
@connection_cache = connection_cache
|
9
8
|
end
|
10
9
|
|
11
10
|
def handle(ctx, params)
|
12
11
|
response = params[:response]
|
13
12
|
uri = params[:uri]
|
14
13
|
page = params[:page]
|
15
|
-
cache_obj = (@connection_cache["#{uri.host}:#{uri.port}"] ||= {
|
16
|
-
:connection => nil,
|
17
|
-
:keep_alive_options => {},
|
18
|
-
})
|
19
|
-
|
20
|
-
# If the server sends back keep alive options, save them
|
21
|
-
if keep_alive_info = response['keep-alive']
|
22
|
-
keep_alive_info.split(/,\s*/).each do |option|
|
23
|
-
k, v = option.split(/\=/)
|
24
|
-
cache_obj[:keep_alive_options] ||= {}
|
25
|
-
cache_obj[:keep_alive_options][k.intern] = v
|
26
|
-
end
|
27
|
-
end
|
28
14
|
|
29
15
|
if page.is_a?(Page) && page.body =~ /Set-Cookie/n
|
30
16
|
page.search('//head/meta[@http-equiv="Set-Cookie"]').each do |meta|
|
@@ -25,14 +25,14 @@ class Mechanize
|
|
25
25
|
uri.split(/(?:%[0-9A-Fa-f]{2})+|#/).zip(
|
26
26
|
uri.scan(/(?:%[0-9A-Fa-f]{2})+|#/)
|
27
27
|
).map { |x,y|
|
28
|
-
"#{
|
28
|
+
"#{WEBrick::HTTPUtils.escape(x)}#{y}"
|
29
29
|
}.join('')
|
30
30
|
)
|
31
31
|
|
32
32
|
begin
|
33
33
|
uri = URI.parse(escaped_uri)
|
34
34
|
rescue
|
35
|
-
uri = URI.parse(
|
35
|
+
uri = URI.parse(WEBrick::HTTPUtils.escape(escaped_uri))
|
36
36
|
end
|
37
37
|
|
38
38
|
end
|
data/lib/mechanize/chain.rb
CHANGED
data/lib/mechanize/cookie.rb
CHANGED
data/lib/mechanize/file.rb
CHANGED
@@ -16,8 +16,15 @@ class Mechanize
|
|
16
16
|
# agent.get('http://example.com/foo.jpg').class #=> Mechanize::File
|
17
17
|
#
|
18
18
|
class File
|
19
|
+
extend Forwardable
|
20
|
+
|
19
21
|
attr_accessor :uri, :response, :body, :code, :filename
|
20
22
|
alias :header :response
|
23
|
+
def_delegator :header, :[], :[]
|
24
|
+
def_delegator :header, :[]=, :[]=
|
25
|
+
def_delegator :header, :key?, :key?
|
26
|
+
def_delegator :header, :each, :each
|
27
|
+
def_delegator :header, :canonical_each, :canonical_each
|
21
28
|
|
22
29
|
alias :content :body
|
23
30
|
|
data/lib/mechanize/form/field.rb
CHANGED
@@ -29,6 +29,12 @@ class Mechanize
|
|
29
29
|
return -1 if Hash === other.node
|
30
30
|
node <=> other.node
|
31
31
|
end
|
32
|
+
|
33
|
+
# This method is a shortcut to get field's DOM id.
|
34
|
+
# Common usage: form.field_with(:dom_id => "foo")
|
35
|
+
def dom_id
|
36
|
+
node['id']
|
37
|
+
end
|
32
38
|
end
|
33
39
|
|
34
40
|
class Text < Field; end
|
data/lib/mechanize/form.rb
CHANGED
@@ -72,6 +72,15 @@ class Mechanize
|
|
72
72
|
def hidden_field?(field_name) !! hiddens.find{|f| f.name == field_name}; end
|
73
73
|
def textarea_field?(field_name) !!textareas.find{|f| f.name == field_name}; end
|
74
74
|
|
75
|
+
# This method is a shortcut to get form's DOM id.
|
76
|
+
# Common usage:
|
77
|
+
# page.form_with(:dom_id => "foorm")
|
78
|
+
# Note that you can also use +:id+ to get to this method:
|
79
|
+
# page.form_with(:id => "foorm")
|
80
|
+
def dom_id
|
81
|
+
form_node['id']
|
82
|
+
end
|
83
|
+
|
75
84
|
# Add a field with +field_name+ and +value+
|
76
85
|
def add_field!(field_name, value = nil)
|
77
86
|
fields << Field.new({'name' => field_name}, value)
|
@@ -226,7 +235,7 @@ class Mechanize
|
|
226
235
|
params = []
|
227
236
|
query_params.each { |k,v| params << param_to_multipart(k, v) unless k.nil? }
|
228
237
|
@file_uploads.each { |f| params << file_to_multipart(f) }
|
229
|
-
params.collect { |p| "--#{boundary}\r\n#{p}" }.join('') +
|
238
|
+
params.collect { |p| "--#{boundary}\r\n#{p.respond_to?(:force_encoding) ? p.force_encoding('ASCII-8BIT') : p}" }.join('') +
|
230
239
|
"--#{boundary}--\r\n"
|
231
240
|
else
|
232
241
|
Mechanize::Util.build_query_string(query_params)
|
@@ -243,7 +252,7 @@ class Mechanize
|
|
243
252
|
#
|
244
253
|
# Find one field that matches +criteria+
|
245
254
|
# Example:
|
246
|
-
# form.field_with(:
|
255
|
+
# form.field_with(:id => "exact_field_id").value = 'hello'
|
247
256
|
|
248
257
|
##
|
249
258
|
# :method: fields_with(criteria)
|
@@ -329,7 +338,10 @@ class Mechanize
|
|
329
338
|
def #{plural}_with criteria = {}
|
330
339
|
criteria = {:name => criteria} if String === criteria
|
331
340
|
f = #{plural}.find_all do |thing|
|
332
|
-
criteria.all?
|
341
|
+
criteria.all? do |k,v|
|
342
|
+
k = :dom_id if(k.to_s == "id")
|
343
|
+
v === thing.send(k)
|
344
|
+
end
|
333
345
|
end
|
334
346
|
yield f if block_given?
|
335
347
|
f
|
@@ -356,7 +368,7 @@ class Mechanize
|
|
356
368
|
form_node.search('input').each do |node|
|
357
369
|
type = (node['type'] || 'text').downcase
|
358
370
|
name = node['name']
|
359
|
-
next if name.nil? && !(type == 'submit' || type =='button')
|
371
|
+
next if name.nil? && !(type == 'submit' || type =='button' || type == 'image')
|
360
372
|
case type
|
361
373
|
when 'radio'
|
362
374
|
@radiobuttons << RadioButton.new(node, self)
|
data/lib/mechanize/headers.rb
CHANGED
@@ -3,9 +3,23 @@ class Mechanize
|
|
3
3
|
def [](key)
|
4
4
|
super(key.downcase)
|
5
5
|
end
|
6
|
+
|
6
7
|
def []=(key, value)
|
7
8
|
super(key.downcase, value)
|
8
9
|
end
|
10
|
+
|
11
|
+
def key?(key)
|
12
|
+
super(key.downcase)
|
13
|
+
end
|
14
|
+
|
15
|
+
def canonical_each
|
16
|
+
block_given? or return enum_for(__method__)
|
17
|
+
each { |key, value|
|
18
|
+
key = key.capitalize
|
19
|
+
key.gsub!(/-([a-z])/) { "-#{$1.upcase}" }
|
20
|
+
yield [key, value]
|
21
|
+
}
|
22
|
+
end
|
9
23
|
end
|
10
24
|
end
|
11
25
|
|
data/lib/mechanize/page/link.rb
CHANGED
@@ -36,13 +36,19 @@ class Mechanize
|
|
36
36
|
end
|
37
37
|
|
38
38
|
def uri
|
39
|
-
@href && URI.parse(
|
39
|
+
@href && URI.parse(WEBrick::HTTPUtils.escape(@href))
|
40
40
|
end
|
41
41
|
|
42
42
|
# Click on this link
|
43
43
|
def click
|
44
44
|
@mech.click self
|
45
45
|
end
|
46
|
+
|
47
|
+
# This method is a shorthand to get link's DOM id.
|
48
|
+
# Common usage: page.link_with(:dom_id => "links_exact_id")
|
49
|
+
def dom_id
|
50
|
+
node['id']
|
51
|
+
end
|
46
52
|
end
|
47
53
|
end
|
48
54
|
end
|
data/lib/mechanize/page/meta.rb
CHANGED
@@ -32,11 +32,9 @@ class Mechanize
|
|
32
32
|
if content && content =~ CONTENT_REGEXP
|
33
33
|
delay, url = $1, $3
|
34
34
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
else "http://#{uri.host}#{url}"
|
39
|
-
end
|
35
|
+
dest = uri
|
36
|
+
dest += url if url
|
37
|
+
url = dest.to_s
|
40
38
|
|
41
39
|
block_given? ? yield(delay, url) : [delay, url]
|
42
40
|
else
|
data/lib/mechanize/page.rb
CHANGED
@@ -50,9 +50,19 @@ class Mechanize
|
|
50
50
|
end
|
51
51
|
|
52
52
|
def title
|
53
|
-
@title ||=
|
54
|
-
|
55
|
-
|
53
|
+
@title ||=
|
54
|
+
if doc = parser
|
55
|
+
title = if doc.respond_to?(:title)
|
56
|
+
doc.title
|
57
|
+
else
|
58
|
+
doc.search('title').inner_text
|
59
|
+
end
|
60
|
+
if title && !title.empty?
|
61
|
+
title
|
62
|
+
else
|
63
|
+
nil
|
64
|
+
end
|
65
|
+
end
|
56
66
|
end
|
57
67
|
|
58
68
|
def encoding=(encoding)
|
@@ -186,7 +196,10 @@ class Mechanize
|
|
186
196
|
def #{type}s_with(criteria)
|
187
197
|
criteria = {:name => criteria} if String === criteria
|
188
198
|
f = #{type}s.find_all do |thing|
|
189
|
-
criteria.all?
|
199
|
+
criteria.all? do |k,v|
|
200
|
+
k = :dom_id if(k.to_s == "id")
|
201
|
+
v === thing.send(k)
|
202
|
+
end
|
190
203
|
end
|
191
204
|
yield f if block_given?
|
192
205
|
f
|
data/lib/mechanize/util.rb
CHANGED
@@ -30,10 +30,14 @@ class Mechanize
|
|
30
30
|
return s unless s && code
|
31
31
|
return s unless Mechanize.html_parser == Nokogiri::HTML
|
32
32
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
33
|
+
if RUBY_VERSION < '1.9.2'
|
34
|
+
begin
|
35
|
+
Iconv.iconv(code.to_s, "UTF-8", s).join("")
|
36
|
+
rescue Iconv::InvalidEncoding, Iconv::IllegalSequence
|
37
|
+
s
|
38
|
+
end
|
39
|
+
else
|
40
|
+
s.encode("UTF-8") rescue s
|
37
41
|
end
|
38
42
|
end
|
39
43
|
|