tenderlove-mechanize 0.9.3.20090623142847 → 0.9.3.20090911221705
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest.txt +55 -48
- data/Rakefile +12 -22
- data/lib/mechanize.rb +618 -4
- data/lib/mechanize/chain.rb +33 -0
- data/lib/mechanize/chain/auth_headers.rb +78 -0
- data/lib/mechanize/chain/body_decoding_handler.rb +46 -0
- data/lib/mechanize/chain/connection_resolver.rb +76 -0
- data/lib/mechanize/chain/custom_headers.rb +21 -0
- data/lib/{www/mechanize → mechanize}/chain/handler.rb +1 -1
- data/lib/mechanize/chain/header_resolver.rb +51 -0
- data/lib/mechanize/chain/parameter_resolver.rb +22 -0
- data/lib/{www/mechanize → mechanize}/chain/post_connect_hook.rb +0 -0
- data/lib/mechanize/chain/pre_connect_hook.rb +20 -0
- data/lib/mechanize/chain/request_resolver.rb +30 -0
- data/lib/mechanize/chain/response_body_parser.rb +38 -0
- data/lib/mechanize/chain/response_header_handler.rb +48 -0
- data/lib/mechanize/chain/response_reader.rb +39 -0
- data/lib/mechanize/chain/ssl_resolver.rb +40 -0
- data/lib/mechanize/chain/uri_resolver.rb +75 -0
- data/lib/mechanize/content_type_error.rb +14 -0
- data/lib/mechanize/cookie.rb +70 -0
- data/lib/mechanize/cookie_jar.rb +188 -0
- data/lib/mechanize/file.rb +71 -0
- data/lib/mechanize/file_response.rb +60 -0
- data/lib/mechanize/file_saver.rb +37 -0
- data/lib/mechanize/form.rb +378 -0
- data/lib/mechanize/form/button.rb +9 -0
- data/lib/mechanize/form/check_box.rb +11 -0
- data/lib/mechanize/form/field.rb +30 -0
- data/lib/mechanize/form/file_upload.rb +22 -0
- data/lib/mechanize/form/image_button.rb +21 -0
- data/lib/mechanize/form/multi_select_list.rb +67 -0
- data/lib/mechanize/form/option.rb +49 -0
- data/lib/mechanize/form/radio_button.rb +49 -0
- data/lib/mechanize/form/select_list.rb +43 -0
- data/lib/mechanize/headers.rb +11 -0
- data/lib/mechanize/history.rb +65 -0
- data/lib/mechanize/inspect.rb +88 -0
- data/lib/{www/mechanize → mechanize}/monkey_patch.rb +4 -6
- data/lib/mechanize/page.rb +206 -0
- data/lib/mechanize/page/base.rb +8 -0
- data/lib/mechanize/page/frame.rb +20 -0
- data/lib/mechanize/page/image.rb +26 -0
- data/lib/mechanize/page/label.rb +20 -0
- data/lib/mechanize/page/link.rb +48 -0
- data/lib/mechanize/page/meta.rb +50 -0
- data/lib/mechanize/pluggable_parsers.rb +101 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +18 -0
- data/lib/mechanize/response_code_error.rb +22 -0
- data/lib/mechanize/unsupported_scheme_error.rb +8 -0
- data/lib/mechanize/util.rb +67 -0
- data/mechanize.gemspec +8 -8
- data/test/chain/test_argument_validator.rb +2 -2
- data/test/chain/test_auth_headers.rb +2 -2
- data/test/chain/test_custom_headers.rb +2 -2
- data/test/chain/test_header_resolver.rb +3 -3
- data/test/chain/test_parameter_resolver.rb +4 -4
- data/test/chain/test_request_resolver.rb +4 -4
- data/test/chain/test_response_reader.rb +3 -3
- data/test/helper.rb +1 -1
- data/test/htdocs/tc_bad_charset.html +9 -0
- data/test/htdocs/tc_charset.html +6 -0
- data/test/htdocs/test_bad_encoding.html +52 -0
- data/test/test_authenticate.rb +3 -3
- data/test/test_bad_links.rb +1 -1
- data/test/test_blank_form.rb +1 -1
- data/test/test_checkboxes.rb +1 -1
- data/test/test_content_type.rb +2 -2
- data/test/test_cookie_class.rb +12 -12
- data/test/test_cookie_jar.rb +13 -13
- data/test/test_cookies.rb +1 -1
- data/test/test_encoded_links.rb +1 -1
- data/test/test_errors.rb +2 -2
- data/test/test_follow_meta.rb +3 -3
- data/test/test_form_action.rb +1 -1
- data/test/test_form_as_hash.rb +1 -1
- data/test/test_form_button.rb +2 -2
- data/test/test_form_no_inputname.rb +1 -1
- data/test/test_forms.rb +1 -1
- data/test/test_frames.rb +1 -1
- data/test/test_get_headers.rb +1 -1
- data/test/test_gzipping.rb +2 -2
- data/test/test_hash_api.rb +1 -1
- data/test/test_history.rb +7 -7
- data/test/test_history_added.rb +1 -1
- data/test/test_html_unscape_forms.rb +7 -7
- data/test/test_if_modified_since.rb +1 -1
- data/test/test_keep_alive.rb +1 -1
- data/test/test_links.rb +2 -2
- data/test/test_mech.rb +2 -2
- data/test/test_mechanize_file.rb +7 -7
- data/test/test_meta.rb +2 -2
- data/test/test_multi_select.rb +1 -1
- data/test/test_no_attributes.rb +1 -1
- data/test/test_option.rb +1 -1
- data/test/test_page.rb +3 -3
- data/test/test_pluggable_parser.rb +14 -14
- data/test/test_post_form.rb +1 -1
- data/test/test_pretty_print.rb +2 -2
- data/test/test_radiobutton.rb +1 -1
- data/test/test_redirect_limit_reached.rb +1 -3
- data/test/test_redirect_verb_handling.rb +1 -3
- data/test/test_referer.rb +1 -1
- data/test/test_relative_links.rb +1 -1
- data/test/test_request.rb +1 -1
- data/test/test_response_code.rb +3 -3
- data/test/test_save_file.rb +3 -3
- data/test/test_scheme.rb +3 -3
- data/test/test_select.rb +2 -2
- data/test/test_select_all.rb +1 -1
- data/test/test_select_none.rb +1 -1
- data/test/test_select_noopts.rb +1 -1
- data/test/test_set_fields.rb +1 -1
- data/test/test_ssl_server.rb +1 -1
- data/test/test_subclass.rb +1 -1
- data/test/test_textarea.rb +1 -1
- data/test/test_upload.rb +1 -1
- data/test/test_verbs.rb +1 -1
- metadata +61 -56
- data/lib/www/mechanize.rb +0 -619
- data/lib/www/mechanize/chain.rb +0 -34
- data/lib/www/mechanize/chain/auth_headers.rb +0 -80
- data/lib/www/mechanize/chain/body_decoding_handler.rb +0 -48
- data/lib/www/mechanize/chain/connection_resolver.rb +0 -78
- data/lib/www/mechanize/chain/custom_headers.rb +0 -23
- data/lib/www/mechanize/chain/header_resolver.rb +0 -53
- data/lib/www/mechanize/chain/parameter_resolver.rb +0 -24
- data/lib/www/mechanize/chain/pre_connect_hook.rb +0 -22
- data/lib/www/mechanize/chain/request_resolver.rb +0 -32
- data/lib/www/mechanize/chain/response_body_parser.rb +0 -40
- data/lib/www/mechanize/chain/response_header_handler.rb +0 -50
- data/lib/www/mechanize/chain/response_reader.rb +0 -41
- data/lib/www/mechanize/chain/ssl_resolver.rb +0 -42
- data/lib/www/mechanize/chain/uri_resolver.rb +0 -77
- data/lib/www/mechanize/content_type_error.rb +0 -16
- data/lib/www/mechanize/cookie.rb +0 -72
- data/lib/www/mechanize/cookie_jar.rb +0 -191
- data/lib/www/mechanize/file.rb +0 -73
- data/lib/www/mechanize/file_response.rb +0 -62
- data/lib/www/mechanize/file_saver.rb +0 -39
- data/lib/www/mechanize/form.rb +0 -360
- data/lib/www/mechanize/form/button.rb +0 -8
- data/lib/www/mechanize/form/check_box.rb +0 -13
- data/lib/www/mechanize/form/field.rb +0 -28
- data/lib/www/mechanize/form/file_upload.rb +0 -24
- data/lib/www/mechanize/form/image_button.rb +0 -23
- data/lib/www/mechanize/form/multi_select_list.rb +0 -69
- data/lib/www/mechanize/form/option.rb +0 -51
- data/lib/www/mechanize/form/radio_button.rb +0 -38
- data/lib/www/mechanize/form/select_list.rb +0 -45
- data/lib/www/mechanize/headers.rb +0 -12
- data/lib/www/mechanize/history.rb +0 -67
- data/lib/www/mechanize/inspect.rb +0 -90
- data/lib/www/mechanize/page.rb +0 -181
- data/lib/www/mechanize/page/base.rb +0 -10
- data/lib/www/mechanize/page/frame.rb +0 -22
- data/lib/www/mechanize/page/link.rb +0 -50
- data/lib/www/mechanize/page/meta.rb +0 -51
- data/lib/www/mechanize/pluggable_parsers.rb +0 -103
- data/lib/www/mechanize/redirect_limit_reached_error.rb +0 -18
- data/lib/www/mechanize/redirect_not_get_or_head_error.rb +0 -20
- data/lib/www/mechanize/response_code_error.rb +0 -25
- data/lib/www/mechanize/unsupported_scheme_error.rb +0 -10
- data/lib/www/mechanize/util.rb +0 -76
data/Manifest.txt
CHANGED
@@ -12,57 +12,60 @@ examples/proxy_req.rb
|
|
12
12
|
examples/rubyforge.rb
|
13
13
|
examples/spider.rb
|
14
14
|
lib/mechanize.rb
|
15
|
-
lib/
|
16
|
-
lib/
|
17
|
-
lib/
|
18
|
-
lib/
|
19
|
-
lib/
|
20
|
-
lib/
|
21
|
-
lib/
|
22
|
-
lib/
|
23
|
-
lib/
|
24
|
-
lib/
|
25
|
-
lib/
|
26
|
-
lib/
|
27
|
-
lib/
|
28
|
-
lib/
|
29
|
-
lib/
|
30
|
-
lib/
|
31
|
-
lib/
|
32
|
-
lib/
|
33
|
-
lib/
|
34
|
-
lib/
|
35
|
-
lib/
|
36
|
-
lib/
|
37
|
-
lib/
|
38
|
-
lib/
|
39
|
-
lib/
|
40
|
-
lib/
|
41
|
-
lib/
|
42
|
-
lib/
|
43
|
-
lib/
|
44
|
-
lib/
|
45
|
-
lib/
|
46
|
-
lib/
|
47
|
-
lib/
|
48
|
-
lib/
|
49
|
-
lib/
|
50
|
-
lib/
|
51
|
-
lib/
|
52
|
-
lib/
|
53
|
-
lib/
|
54
|
-
lib/
|
55
|
-
lib/
|
56
|
-
lib/
|
57
|
-
lib/
|
58
|
-
lib/
|
59
|
-
lib/
|
60
|
-
lib/
|
61
|
-
lib/
|
62
|
-
lib/
|
15
|
+
lib/mechanize/chain.rb
|
16
|
+
lib/mechanize/chain/auth_headers.rb
|
17
|
+
lib/mechanize/chain/body_decoding_handler.rb
|
18
|
+
lib/mechanize/chain/connection_resolver.rb
|
19
|
+
lib/mechanize/chain/custom_headers.rb
|
20
|
+
lib/mechanize/chain/handler.rb
|
21
|
+
lib/mechanize/chain/header_resolver.rb
|
22
|
+
lib/mechanize/chain/parameter_resolver.rb
|
23
|
+
lib/mechanize/chain/post_connect_hook.rb
|
24
|
+
lib/mechanize/chain/pre_connect_hook.rb
|
25
|
+
lib/mechanize/chain/request_resolver.rb
|
26
|
+
lib/mechanize/chain/response_body_parser.rb
|
27
|
+
lib/mechanize/chain/response_header_handler.rb
|
28
|
+
lib/mechanize/chain/response_reader.rb
|
29
|
+
lib/mechanize/chain/ssl_resolver.rb
|
30
|
+
lib/mechanize/chain/uri_resolver.rb
|
31
|
+
lib/mechanize/content_type_error.rb
|
32
|
+
lib/mechanize/cookie.rb
|
33
|
+
lib/mechanize/cookie_jar.rb
|
34
|
+
lib/mechanize/file.rb
|
35
|
+
lib/mechanize/file_response.rb
|
36
|
+
lib/mechanize/file_saver.rb
|
37
|
+
lib/mechanize/form.rb
|
38
|
+
lib/mechanize/form/button.rb
|
39
|
+
lib/mechanize/form/check_box.rb
|
40
|
+
lib/mechanize/form/field.rb
|
41
|
+
lib/mechanize/form/file_upload.rb
|
42
|
+
lib/mechanize/form/image_button.rb
|
43
|
+
lib/mechanize/form/multi_select_list.rb
|
44
|
+
lib/mechanize/form/option.rb
|
45
|
+
lib/mechanize/form/radio_button.rb
|
46
|
+
lib/mechanize/form/select_list.rb
|
47
|
+
lib/mechanize/headers.rb
|
48
|
+
lib/mechanize/history.rb
|
49
|
+
lib/mechanize/inspect.rb
|
50
|
+
lib/mechanize/monkey_patch.rb
|
51
|
+
lib/mechanize/page.rb
|
52
|
+
lib/mechanize/page/base.rb
|
53
|
+
lib/mechanize/page/frame.rb
|
54
|
+
lib/mechanize/page/image.rb
|
55
|
+
lib/mechanize/page/label.rb
|
56
|
+
lib/mechanize/page/link.rb
|
57
|
+
lib/mechanize/page/meta.rb
|
58
|
+
lib/mechanize/pluggable_parsers.rb
|
59
|
+
lib/mechanize/redirect_limit_reached_error.rb
|
60
|
+
lib/mechanize/redirect_not_get_or_head_error.rb
|
61
|
+
lib/mechanize/response_code_error.rb
|
62
|
+
lib/mechanize/unsupported_scheme_error.rb
|
63
|
+
lib/mechanize/util.rb
|
63
64
|
mechanize.gemspec
|
64
65
|
test/chain/test_argument_validator.rb
|
66
|
+
test/chain/test_auth_headers.rb
|
65
67
|
test/chain/test_custom_headers.rb
|
68
|
+
test/chain/test_header_resolver.rb
|
66
69
|
test/chain/test_parameter_resolver.rb
|
67
70
|
test/chain/test_request_resolver.rb
|
68
71
|
test/chain/test_response_reader.rb
|
@@ -96,9 +99,11 @@ test/htdocs/link with space.html
|
|
96
99
|
test/htdocs/meta_cookie.html
|
97
100
|
test/htdocs/no_title_test.html
|
98
101
|
test/htdocs/relative/tc_relative_links.html
|
102
|
+
test/htdocs/tc_bad_charset.html
|
99
103
|
test/htdocs/tc_bad_links.html
|
100
104
|
test/htdocs/tc_base_link.html
|
101
105
|
test/htdocs/tc_blank_form.html
|
106
|
+
test/htdocs/tc_charset.html
|
102
107
|
test/htdocs/tc_checkboxes.html
|
103
108
|
test/htdocs/tc_encoded_links.html
|
104
109
|
test/htdocs/tc_follow_meta.html
|
@@ -110,6 +115,7 @@ test/htdocs/tc_radiobuttons.html
|
|
110
115
|
test/htdocs/tc_referer.html
|
111
116
|
test/htdocs/tc_relative_links.html
|
112
117
|
test/htdocs/tc_textarea.html
|
118
|
+
test/htdocs/test_bad_encoding.html
|
113
119
|
test/htdocs/unusual______.html
|
114
120
|
test/servlets.rb
|
115
121
|
test/ssl_server.rb
|
@@ -141,6 +147,7 @@ test/test_keep_alive.rb
|
|
141
147
|
test/test_links.rb
|
142
148
|
test/test_mech.rb
|
143
149
|
test/test_mechanize_file.rb
|
150
|
+
test/test_meta.rb
|
144
151
|
test/test_multi_select.rb
|
145
152
|
test/test_no_attributes.rb
|
146
153
|
test/test_option.rb
|
data/Rakefile
CHANGED
@@ -1,17 +1,14 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'hoe'
|
3
3
|
|
4
|
-
|
5
|
-
|
4
|
+
Hoe.spec 'mechanize' do
|
5
|
+
developer 'Aaron Patterson', 'aaronp@rubyforge.org'
|
6
|
+
developer 'Mike Dalessio', 'mike.dalessio@gmail.com'
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
p.history_file = 'CHANGELOG.rdoc'
|
12
|
-
p.extra_rdoc_files = FileList['*.rdoc']
|
13
|
-
p.summary = "Mechanize provides automated web-browsing"
|
14
|
-
p.extra_deps = [['nokogiri', '>= 1.2.1']]
|
8
|
+
self.readme_file = 'README.rdoc'
|
9
|
+
self.history_file = 'CHANGELOG.rdoc'
|
10
|
+
self.extra_rdoc_files += Dir['*.rdoc']
|
11
|
+
self.extra_deps << ['nokogiri', '>= 1.2.1']
|
15
12
|
end
|
16
13
|
|
17
14
|
desc "Update SSL Certificate"
|
@@ -26,18 +23,11 @@ task('ssl_cert') do |p|
|
|
26
23
|
sh "rm server.key.org"
|
27
24
|
end
|
28
25
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
f.write(HOE.spec.to_ruby)
|
35
|
-
end
|
26
|
+
desc 'Generate a gem spec'
|
27
|
+
task "gem:spec" do
|
28
|
+
File.open("mechanize.gemspec", 'w') do |f|
|
29
|
+
now = Time.now.strftime("%Y%m%d%H%M%S")
|
30
|
+
f.write `rake debug_gem`.sub(/(s.version = ".*)(")/) { "#{$1}.#{now}#{$2}" }
|
36
31
|
end
|
37
32
|
end
|
38
33
|
|
39
|
-
desc "Run code-coverage analysis"
|
40
|
-
task :coverage do
|
41
|
-
rm_rf "coverage"
|
42
|
-
sh "rcov -x Library -I lib:test #{Dir[*HOE.test_globs].join(' ')}"
|
43
|
-
end
|
data/lib/mechanize.rb
CHANGED
@@ -1,7 +1,621 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require 'net/http'
|
2
|
+
require 'net/https'
|
3
|
+
require 'uri'
|
4
|
+
require 'webrick/httputils'
|
5
|
+
require 'zlib'
|
6
|
+
require 'stringio'
|
7
|
+
require 'digest/md5'
|
8
|
+
require 'fileutils'
|
9
|
+
require 'nokogiri'
|
10
|
+
require 'forwardable'
|
11
|
+
require 'iconv'
|
12
|
+
require 'nkf'
|
13
|
+
|
14
|
+
require 'mechanize/util'
|
15
|
+
require 'mechanize/content_type_error'
|
16
|
+
require 'mechanize/response_code_error'
|
17
|
+
require 'mechanize/unsupported_scheme_error'
|
18
|
+
require 'mechanize/redirect_limit_reached_error'
|
19
|
+
require 'mechanize/redirect_not_get_or_head_error'
|
20
|
+
require 'mechanize/cookie'
|
21
|
+
require 'mechanize/cookie_jar'
|
22
|
+
require 'mechanize/history'
|
23
|
+
require 'mechanize/form'
|
24
|
+
require 'mechanize/pluggable_parsers'
|
25
|
+
require 'mechanize/file_response'
|
26
|
+
require 'mechanize/inspect'
|
27
|
+
require 'mechanize/chain'
|
28
|
+
require 'mechanize/monkey_patch'
|
29
|
+
|
30
|
+
# = Synopsis
|
31
|
+
# The Mechanize library is used for automating interaction with a website. It
|
32
|
+
# can follow links, and submit forms. Form fields can be populated and
|
33
|
+
# submitted. A history of URL's is maintained and can be queried.
|
34
|
+
#
|
35
|
+
# == Example
|
36
|
+
# require 'rubygems'
|
37
|
+
# require 'mechanize'
|
38
|
+
# require 'logger'
|
3
39
|
#
|
4
|
-
#
|
40
|
+
# agent = WWW::Mechanize.new { |a| a.log = Logger.new("mech.log") }
|
41
|
+
# agent.user_agent_alias = 'Mac Safari'
|
42
|
+
# page = agent.get("http://www.google.com/")
|
43
|
+
# search_form = page.form_with(:name => "f")
|
44
|
+
# search_form.field_with(:name => "q").value = "Hello"
|
45
|
+
# search_results = agent.submit(search_form)
|
46
|
+
# puts search_results.body
|
47
|
+
class Mechanize
|
48
|
+
##
|
49
|
+
# The version of Mechanize you are using.
|
50
|
+
VERSION = '0.9.3'
|
51
|
+
|
52
|
+
##
|
53
|
+
# User Agent aliases
|
54
|
+
AGENT_ALIASES = {
|
55
|
+
'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
|
56
|
+
'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
|
57
|
+
'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
|
58
|
+
'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/418 (KHTML, like Gecko) Safari/417.9.3',
|
59
|
+
'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3',
|
60
|
+
'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
|
61
|
+
'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
|
62
|
+
'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
|
63
|
+
'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
|
64
|
+
'Mechanize' => "WWW-Mechanize/#{VERSION} (http://rubyforge.org/projects/mechanize/)"
|
65
|
+
}
|
66
|
+
|
67
|
+
attr_accessor :cookie_jar
|
68
|
+
attr_accessor :open_timeout, :read_timeout
|
69
|
+
attr_accessor :user_agent
|
70
|
+
attr_accessor :watch_for_set
|
71
|
+
attr_accessor :ca_file
|
72
|
+
attr_accessor :key
|
73
|
+
attr_accessor :cert
|
74
|
+
attr_accessor :pass
|
75
|
+
attr_accessor :redirect_ok
|
76
|
+
attr_accessor :keep_alive_time
|
77
|
+
attr_accessor :keep_alive
|
78
|
+
attr_accessor :conditional_requests
|
79
|
+
attr_accessor :follow_meta_refresh
|
80
|
+
attr_accessor :verify_callback
|
81
|
+
attr_accessor :history_added
|
82
|
+
attr_accessor :scheme_handlers
|
83
|
+
attr_accessor :redirection_limit
|
84
|
+
|
85
|
+
# A hash of custom request headers
|
86
|
+
attr_accessor :request_headers
|
87
|
+
|
88
|
+
# The HTML parser to be used when parsing documents
|
89
|
+
attr_accessor :html_parser
|
90
|
+
|
91
|
+
attr_reader :history
|
92
|
+
attr_reader :pluggable_parser
|
93
|
+
|
94
|
+
alias :follow_redirect? :redirect_ok
|
95
|
+
|
96
|
+
@html_parser = Nokogiri::HTML
|
97
|
+
class << self; attr_accessor :html_parser, :log end
|
98
|
+
|
99
|
+
def initialize
|
100
|
+
# attr_accessors
|
101
|
+
@cookie_jar = CookieJar.new
|
102
|
+
@log = nil
|
103
|
+
@open_timeout = nil
|
104
|
+
@read_timeout = nil
|
105
|
+
@user_agent = AGENT_ALIASES['Mechanize']
|
106
|
+
@watch_for_set = nil
|
107
|
+
@history_added = nil
|
108
|
+
@ca_file = nil # OpenSSL server certificate file
|
109
|
+
|
110
|
+
# callback for OpenSSL errors while verifying the server certificate
|
111
|
+
# chain, can be used for debugging or to ignore errors by always
|
112
|
+
# returning _true_
|
113
|
+
@verify_callback = nil
|
114
|
+
@cert = nil # OpenSSL Certificate
|
115
|
+
@key = nil # OpenSSL Private Key
|
116
|
+
@pass = nil # OpenSSL Password
|
117
|
+
@redirect_ok = true # Should we follow redirects?
|
118
|
+
|
119
|
+
# attr_readers
|
120
|
+
@history = Mechanize::History.new
|
121
|
+
@pluggable_parser = PluggableParser.new
|
122
|
+
|
123
|
+
# Auth variables
|
124
|
+
@user = nil # Auth User
|
125
|
+
@password = nil # Auth Password
|
126
|
+
@digest = nil # DigestAuth Digest
|
127
|
+
@auth_hash = {} # Keep track of urls for sending auth
|
128
|
+
@request_headers= {} # A hash of request headers to be used
|
129
|
+
|
130
|
+
# Proxy settings
|
131
|
+
@proxy_addr = nil
|
132
|
+
@proxy_pass = nil
|
133
|
+
@proxy_port = nil
|
134
|
+
@proxy_user = nil
|
135
|
+
|
136
|
+
@conditional_requests = true
|
137
|
+
|
138
|
+
@follow_meta_refresh = false
|
139
|
+
@redirection_limit = 20
|
140
|
+
|
141
|
+
# Connection Cache & Keep alive
|
142
|
+
@connection_cache = {}
|
143
|
+
@keep_alive_time = 300
|
144
|
+
@keep_alive = true
|
145
|
+
|
146
|
+
@scheme_handlers = Hash.new { |h,k|
|
147
|
+
h[k] = lambda { |link, page|
|
148
|
+
raise UnsupportedSchemeError.new(k)
|
149
|
+
}
|
150
|
+
}
|
151
|
+
@scheme_handlers['http'] = lambda { |link, page| link }
|
152
|
+
@scheme_handlers['https'] = @scheme_handlers['http']
|
153
|
+
@scheme_handlers['relative'] = @scheme_handlers['http']
|
154
|
+
@scheme_handlers['file'] = @scheme_handlers['http']
|
155
|
+
|
156
|
+
@pre_connect_hook = Chain::PreConnectHook.new
|
157
|
+
@post_connect_hook = Chain::PostConnectHook.new
|
158
|
+
|
159
|
+
@html_parser = self.class.html_parser
|
160
|
+
|
161
|
+
yield self if block_given?
|
162
|
+
end
|
163
|
+
|
164
|
+
def max_history=(length); @history.max_size = length end
|
165
|
+
def max_history; @history.max_size end
|
166
|
+
def log=(l); self.class.log = l end
|
167
|
+
def log; self.class.log end
|
168
|
+
|
169
|
+
def pre_connect_hooks
|
170
|
+
@pre_connect_hook.hooks
|
171
|
+
end
|
172
|
+
|
173
|
+
def post_connect_hooks
|
174
|
+
@post_connect_hook.hooks
|
175
|
+
end
|
176
|
+
|
177
|
+
# Sets the proxy address, port, user, and password
|
178
|
+
# +addr+ should be a host, with no "http://"
|
179
|
+
def set_proxy(addr, port, user = nil, pass = nil)
|
180
|
+
@proxy_addr, @proxy_port, @proxy_user, @proxy_pass = addr, port, user, pass
|
181
|
+
end
|
182
|
+
|
183
|
+
# Set the user agent for the Mechanize object.
|
184
|
+
# See AGENT_ALIASES
|
185
|
+
def user_agent_alias=(al)
|
186
|
+
self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias")
|
187
|
+
end
|
188
|
+
|
189
|
+
# Returns a list of cookies stored in the cookie jar.
|
190
|
+
def cookies
|
191
|
+
@cookie_jar.to_a
|
192
|
+
end
|
193
|
+
|
194
|
+
# Sets the user and password to be used for authentication.
|
195
|
+
def auth(user, password)
|
196
|
+
@user = user
|
197
|
+
@password = password
|
198
|
+
end
|
199
|
+
alias :basic_auth :auth
|
200
|
+
|
201
|
+
# Fetches the URL passed in and returns a page.
|
202
|
+
def get(options, parameters = [], referer = nil)
|
203
|
+
unless options.is_a? Hash
|
204
|
+
url = options
|
205
|
+
unless parameters.respond_to?(:each) # FIXME: Remove this in 0.8.0
|
206
|
+
referer = parameters
|
207
|
+
parameters = []
|
208
|
+
end
|
209
|
+
else
|
210
|
+
raise ArgumentError.new("url must be specified") unless url = options[:url]
|
211
|
+
parameters = options[:params] || []
|
212
|
+
referer = options[:referer]
|
213
|
+
headers = options[:headers]
|
214
|
+
end
|
215
|
+
|
216
|
+
unless referer
|
217
|
+
if url.to_s =~ /^http/
|
218
|
+
referer = Page.new(nil, {'content-type'=>'text/html'})
|
219
|
+
else
|
220
|
+
referer = current_page || Page.new(nil, {'content-type'=>'text/html'})
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
# FIXME: Huge hack so that using a URI as a referer works. I need to
|
225
|
+
# refactor everything to pass around URIs but still support
|
226
|
+
# Mechanize::Page#base
|
227
|
+
unless referer.is_a?(Mechanize::File)
|
228
|
+
referer = referer.is_a?(String) ?
|
229
|
+
Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
|
230
|
+
Page.new(referer, {'content-type' => 'text/html'})
|
231
|
+
end
|
232
|
+
|
233
|
+
# fetch the page
|
234
|
+
page = fetch_page( :uri => url,
|
235
|
+
:referer => referer,
|
236
|
+
:headers => headers || {},
|
237
|
+
:params => parameters
|
238
|
+
)
|
239
|
+
add_to_history(page)
|
240
|
+
yield page if block_given?
|
241
|
+
page
|
242
|
+
end
|
243
|
+
|
244
|
+
####
|
245
|
+
# PUT to +url+ with +entity+, and setting +options+:
|
246
|
+
#
|
247
|
+
# put('http://tenderlovemaking.com/', 'new content', :headers => {'Content-Type' => 'text/plain'})
|
248
|
+
#
|
249
|
+
def put(url, entity, options = {})
|
250
|
+
request_with_entity(:put, url, entity, options)
|
251
|
+
end
|
252
|
+
|
253
|
+
####
|
254
|
+
# DELETE to +url+ with +query_params+, and setting +options+:
|
255
|
+
#
|
256
|
+
# delete('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
|
257
|
+
#
|
258
|
+
def delete(url, query_params = {}, options = {})
|
259
|
+
page = head(url, query_params, options.merge({:verb => :delete}))
|
260
|
+
add_to_history(page)
|
261
|
+
page
|
262
|
+
end
|
263
|
+
|
264
|
+
####
|
265
|
+
# HEAD to +url+ with +query_params+, and setting +options+:
|
266
|
+
#
|
267
|
+
# head('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
|
268
|
+
#
|
269
|
+
def head(url, query_params = {}, options = {})
|
270
|
+
options = {
|
271
|
+
:uri => url,
|
272
|
+
:headers => {},
|
273
|
+
:params => query_params,
|
274
|
+
:verb => :head
|
275
|
+
}.merge(options)
|
276
|
+
# fetch the page
|
277
|
+
page = fetch_page(options)
|
278
|
+
yield page if block_given?
|
279
|
+
page
|
280
|
+
end
|
281
|
+
|
282
|
+
# Fetch a file and return the contents of the file.
|
283
|
+
def get_file(url)
|
284
|
+
get(url).body
|
285
|
+
end
|
286
|
+
|
287
|
+
# Clicks the Mechanize::Link object passed in and returns the
|
288
|
+
# page fetched.
|
289
|
+
def click(link)
|
290
|
+
referer = link.page rescue referer = nil
|
291
|
+
href = link.respond_to?(:href) ? link.href :
|
292
|
+
(link['href'] || link['src'])
|
293
|
+
get(:url => href, :referer => (referer || current_page()))
|
294
|
+
end
|
295
|
+
|
296
|
+
# Equivalent to the browser back button. Returns the most recent page
|
297
|
+
# visited.
|
298
|
+
def back
|
299
|
+
@history.pop
|
300
|
+
end
|
301
|
+
|
302
|
+
# Posts to the given URL with the request entity. The request
|
303
|
+
# entity is specified by either a string, or a list of key-value
|
304
|
+
# pairs represented by a hash or an array of arrays.
|
305
|
+
#
|
306
|
+
# Examples:
|
307
|
+
# agent.post('http://example.com/', "foo" => "bar")
|
308
|
+
#
|
309
|
+
# agent.post('http://example.com/', [ ["foo", "bar"] ])
|
310
|
+
#
|
311
|
+
# agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')
|
312
|
+
def post(url, query={}, headers={})
|
313
|
+
if query.is_a?(String)
|
314
|
+
return request_with_entity(:post, url, query, :headers => headers)
|
315
|
+
end
|
316
|
+
node = {}
|
317
|
+
# Create a fake form
|
318
|
+
class << node
|
319
|
+
def search(*args); []; end
|
320
|
+
end
|
321
|
+
node['method'] = 'POST'
|
322
|
+
node['enctype'] = 'application/x-www-form-urlencoded'
|
323
|
+
|
324
|
+
form = Form.new(node)
|
325
|
+
query.each { |k,v|
|
326
|
+
if v.is_a?(IO)
|
327
|
+
form.enctype = 'multipart/form-data'
|
328
|
+
ul = Form::FileUpload.new(k.to_s,::File.basename(v.path))
|
329
|
+
ul.file_data = v.read
|
330
|
+
form.file_uploads << ul
|
331
|
+
else
|
332
|
+
form.fields << Form::Field.new(k.to_s,v)
|
333
|
+
end
|
334
|
+
}
|
335
|
+
post_form(url, form, headers)
|
336
|
+
end
|
337
|
+
|
338
|
+
# Submit a form with an optional button.
|
339
|
+
# Without a button:
|
340
|
+
# page = agent.get('http://example.com')
|
341
|
+
# agent.submit(page.forms.first)
|
342
|
+
# With a button
|
343
|
+
# agent.submit(page.forms.first, page.forms.first.buttons.first)
|
344
|
+
def submit(form, button=nil, headers={})
|
345
|
+
form.add_button_to_query(button) if button
|
346
|
+
case form.method.upcase
|
347
|
+
when 'POST'
|
348
|
+
post_form(form.action, form, headers)
|
349
|
+
when 'GET'
|
350
|
+
get( :url => form.action.gsub(/\?[^\?]*$/, ''),
|
351
|
+
:params => form.build_query,
|
352
|
+
:headers => headers,
|
353
|
+
:referer => form.page
|
354
|
+
)
|
355
|
+
else
|
356
|
+
raise "unsupported method: #{form.method.upcase}"
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
def request_with_entity(verb, url, entity, options={})
|
361
|
+
cur_page = current_page || Page.new( nil, {'content-type'=>'text/html'})
|
362
|
+
|
363
|
+
options = {
|
364
|
+
:uri => url,
|
365
|
+
:referer => cur_page,
|
366
|
+
:headers => {},
|
367
|
+
}.update(options)
|
368
|
+
|
369
|
+
headers = {
|
370
|
+
'Content-Type' => 'application/octet-stream',
|
371
|
+
'Content-Length' => entity.size.to_s,
|
372
|
+
}.update(options[:headers])
|
373
|
+
|
374
|
+
options.update({
|
375
|
+
:verb => verb,
|
376
|
+
:params => [entity],
|
377
|
+
:headers => headers,
|
378
|
+
})
|
379
|
+
|
380
|
+
page = fetch_page(options)
|
381
|
+
add_to_history(page)
|
382
|
+
page
|
383
|
+
end
|
384
|
+
|
385
|
+
# Returns the current page loaded by Mechanize
|
386
|
+
def current_page
|
387
|
+
@history.last
|
388
|
+
end
|
389
|
+
|
390
|
+
# Returns whether or not a url has been visited
|
391
|
+
def visited?(url)
|
392
|
+
! visited_page(url).nil?
|
393
|
+
end
|
394
|
+
|
395
|
+
# Returns a visited page for the url passed in, otherwise nil
|
396
|
+
def visited_page(url)
|
397
|
+
if url.respond_to? :href
|
398
|
+
url = url.href
|
399
|
+
end
|
400
|
+
@history.visited_page(resolve(url))
|
401
|
+
end
|
402
|
+
|
403
|
+
# Runs given block, then resets the page history as it was before. self is
|
404
|
+
# given as a parameter to the block. Returns the value of the block.
|
405
|
+
def transact
|
406
|
+
history_backup = @history.dup
|
407
|
+
begin
|
408
|
+
yield self
|
409
|
+
ensure
|
410
|
+
@history = history_backup
|
411
|
+
end
|
412
|
+
end
|
413
|
+
|
414
|
+
alias :page :current_page
|
415
|
+
|
416
|
+
private
|
417
|
+
|
418
|
+
def resolve(url, referer = current_page())
|
419
|
+
hash = { :uri => url, :referer => referer }
|
420
|
+
chain = Chain.new([
|
421
|
+
Chain::URIResolver.new(@scheme_handlers)
|
422
|
+
]).handle(hash)
|
423
|
+
hash[:uri].to_s
|
424
|
+
end
|
425
|
+
|
426
|
+
def post_form(url, form, headers = {})
|
427
|
+
cur_page = form.page || current_page ||
|
428
|
+
Page.new( nil, {'content-type'=>'text/html'})
|
429
|
+
|
430
|
+
request_data = form.request_data
|
431
|
+
|
432
|
+
log.debug("query: #{ request_data.inspect }") if log
|
433
|
+
|
434
|
+
# fetch the page
|
435
|
+
page = fetch_page( :uri => url,
|
436
|
+
:referer => cur_page,
|
437
|
+
:verb => :post,
|
438
|
+
:params => [request_data],
|
439
|
+
:headers => {
|
440
|
+
'Content-Type' => form.enctype,
|
441
|
+
'Content-Length' => request_data.size.to_s,
|
442
|
+
}.merge(headers))
|
443
|
+
add_to_history(page)
|
444
|
+
page
|
445
|
+
end
|
446
|
+
|
447
|
+
# uri is an absolute URI
|
448
|
+
def fetch_page(params)
|
449
|
+
options = {
|
450
|
+
:request => nil,
|
451
|
+
:response => nil,
|
452
|
+
:connection => nil,
|
453
|
+
:referer => current_page(),
|
454
|
+
:uri => nil,
|
455
|
+
:verb => :get,
|
456
|
+
:agent => self,
|
457
|
+
:redirects => 0,
|
458
|
+
:params => [],
|
459
|
+
:headers => {},
|
460
|
+
}.merge(params)
|
461
|
+
|
462
|
+
before_connect = Chain.new([
|
463
|
+
Chain::URIResolver.new(@scheme_handlers),
|
464
|
+
Chain::ParameterResolver.new,
|
465
|
+
Chain::RequestResolver.new,
|
466
|
+
Chain::ConnectionResolver.new(
|
467
|
+
@connection_cache,
|
468
|
+
@keep_alive,
|
469
|
+
@proxy_addr,
|
470
|
+
@proxy_port,
|
471
|
+
@proxy_user,
|
472
|
+
@proxy_pass
|
473
|
+
),
|
474
|
+
Chain::SSLResolver.new(@ca_file, @verify_callback, @cert, @key, @pass),
|
475
|
+
Chain::AuthHeaders.new(@auth_hash, @user, @password, @digest),
|
476
|
+
Chain::HeaderResolver.new(
|
477
|
+
@keep_alive,
|
478
|
+
@keep_alive_time,
|
479
|
+
@cookie_jar,
|
480
|
+
@user_agent,
|
481
|
+
@request_headers
|
482
|
+
),
|
483
|
+
Chain::CustomHeaders.new,
|
484
|
+
@pre_connect_hook,
|
485
|
+
])
|
486
|
+
before_connect.handle(options)
|
487
|
+
|
488
|
+
uri = options[:uri]
|
489
|
+
request = options[:request]
|
490
|
+
cur_page = options[:referer]
|
491
|
+
request_data = options[:params]
|
492
|
+
redirects = options[:redirects]
|
493
|
+
http_obj = options[:connection]
|
494
|
+
|
495
|
+
# Add If-Modified-Since if page is in history
|
496
|
+
if( (page = visited_page(uri)) && page.response['Last-Modified'] )
|
497
|
+
request['If-Modified-Since'] = page.response['Last-Modified']
|
498
|
+
end if(@conditional_requests)
|
499
|
+
|
500
|
+
# Specify timeouts if given
|
501
|
+
http_obj.open_timeout = @open_timeout if @open_timeout
|
502
|
+
http_obj.read_timeout = @read_timeout if @read_timeout
|
503
|
+
http_obj.start unless http_obj.started?
|
504
|
+
|
505
|
+
# Log specified headers for the request
|
506
|
+
log.info("#{ request.class }: #{ request.path }") if log
|
507
|
+
request.each_header do |k, v|
|
508
|
+
log.debug("request-header: #{ k } => #{ v }")
|
509
|
+
end if log
|
510
|
+
|
511
|
+
# Send the request
|
512
|
+
attempts = 0
|
513
|
+
begin
|
514
|
+
response = http_obj.request(request, *request_data) { |r|
|
515
|
+
connection_chain = Chain.new([
|
516
|
+
Chain::ResponseReader.new(r),
|
517
|
+
Chain::BodyDecodingHandler.new,
|
518
|
+
])
|
519
|
+
connection_chain.handle(options)
|
520
|
+
}
|
521
|
+
rescue EOFError, Errno::ECONNRESET, Errno::EPIPE => x
|
522
|
+
log.error("Rescuing EOF error") if log
|
523
|
+
http_obj.finish
|
524
|
+
raise x if attempts >= 2
|
525
|
+
request.body = nil
|
526
|
+
http_obj.start
|
527
|
+
attempts += 1
|
528
|
+
retry
|
529
|
+
end
|
530
|
+
|
531
|
+
after_connect = Chain.new([
|
532
|
+
@post_connect_hook,
|
533
|
+
Chain::ResponseBodyParser.new(@pluggable_parser, @watch_for_set),
|
534
|
+
Chain::ResponseHeaderHandler.new(@cookie_jar, @connection_cache),
|
535
|
+
])
|
536
|
+
after_connect.handle(options)
|
537
|
+
|
538
|
+
res_klass = options[:res_klass]
|
539
|
+
response_body = options[:response_body]
|
540
|
+
page = options[:page]
|
541
|
+
|
542
|
+
log.info("status: #{ page.code }") if log
|
543
|
+
|
544
|
+
if follow_meta_refresh
|
545
|
+
redirect_uri = nil
|
546
|
+
referer = page
|
547
|
+
if (page.respond_to?(:meta) && (redirect = page.meta.first))
|
548
|
+
redirect_uri = redirect.uri.to_s
|
549
|
+
sleep redirect.node['delay'].to_f
|
550
|
+
referer = Page.new(nil, {'content-type'=>'text/html'})
|
551
|
+
elsif refresh = response['refresh']
|
552
|
+
delay, redirect_uri = Page::Meta.parse(refresh, uri)
|
553
|
+
raise StandardError, "Invalid refresh http header" unless delay
|
554
|
+
if redirects + 1 > redirection_limit
|
555
|
+
raise RedirectLimitReachedError.new(page, redirects)
|
556
|
+
end
|
557
|
+
sleep delay.to_f
|
558
|
+
end
|
559
|
+
if redirect_uri
|
560
|
+
@history.push(page, page.uri)
|
561
|
+
return fetch_page(
|
562
|
+
:uri => redirect_uri,
|
563
|
+
:referer => referer,
|
564
|
+
:params => [],
|
565
|
+
:verb => :get,
|
566
|
+
:redirects => redirects + 1
|
567
|
+
)
|
568
|
+
end
|
569
|
+
end
|
570
|
+
|
571
|
+
return page if res_klass <= Net::HTTPSuccess
|
572
|
+
|
573
|
+
if res_klass == Net::HTTPNotModified
|
574
|
+
log.debug("Got cached page") if log
|
575
|
+
return visited_page(uri) || page
|
576
|
+
elsif res_klass <= Net::HTTPRedirection
|
577
|
+
return page unless follow_redirect?
|
578
|
+
log.info("follow redirect to: #{ response['Location'] }") if log
|
579
|
+
from_uri = page.uri
|
580
|
+
raise RedirectLimitReachedError.new(page, redirects) if redirects + 1 > redirection_limit
|
581
|
+
redirect_verb = options[:verb] == :head ? :head : :get
|
582
|
+
page = fetch_page( :uri => response['Location'].to_s,
|
583
|
+
:referer => page,
|
584
|
+
:params => [],
|
585
|
+
:verb => redirect_verb,
|
586
|
+
:redirects => redirects + 1
|
587
|
+
)
|
588
|
+
@history.push(page, from_uri)
|
589
|
+
return page
|
590
|
+
elsif res_klass <= Net::HTTPUnauthorized
|
591
|
+
raise ResponseCodeError.new(page) unless @user || @password
|
592
|
+
raise ResponseCodeError.new(page) if @auth_hash.has_key?(uri.host)
|
593
|
+
if response['www-authenticate'] =~ /Digest/i
|
594
|
+
@auth_hash[uri.host] = :digest
|
595
|
+
if response['server'] =~ /Microsoft-IIS/
|
596
|
+
@auth_hash[uri.host] = :iis_digest
|
597
|
+
end
|
598
|
+
@digest = response['www-authenticate']
|
599
|
+
else
|
600
|
+
@auth_hash[uri.host] = :basic
|
601
|
+
end
|
602
|
+
return fetch_page( :uri => uri,
|
603
|
+
:referer => cur_page,
|
604
|
+
:verb => request.method.downcase.to_sym,
|
605
|
+
:params => request_data,
|
606
|
+
:headers => options[:headers]
|
607
|
+
)
|
608
|
+
end
|
609
|
+
|
610
|
+
raise ResponseCodeError.new(page), "Unhandled response", caller
|
611
|
+
end
|
612
|
+
|
613
|
+
def add_to_history(page)
|
614
|
+
@history.push(page, resolve(page.uri))
|
615
|
+
history_added.call(page) if history_added
|
616
|
+
end
|
617
|
+
end
|
5
618
|
|
619
|
+
module WWW; end
|
620
|
+
WWW::Mechanize = ::Mechanize
|
6
621
|
|
7
|
-
require 'www/mechanize'
|