diamond-mechanize 2.1 → 2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. metadata +222 -167
  2. data/Rakefile +0 -49
  3. data/lib/mechanize/content_type_error.rb +0 -13
  4. data/lib/mechanize/cookie.rb +0 -232
  5. data/lib/mechanize/cookie_jar.rb +0 -194
  6. data/lib/mechanize/download.rb +0 -59
  7. data/lib/mechanize/element_matcher.rb +0 -36
  8. data/lib/mechanize/file.rb +0 -65
  9. data/lib/mechanize/file_connection.rb +0 -17
  10. data/lib/mechanize/file_request.rb +0 -26
  11. data/lib/mechanize/file_response.rb +0 -74
  12. data/lib/mechanize/file_saver.rb +0 -39
  13. data/lib/mechanize/form/button.rb +0 -6
  14. data/lib/mechanize/form/check_box.rb +0 -12
  15. data/lib/mechanize/form/field.rb +0 -54
  16. data/lib/mechanize/form/file_upload.rb +0 -21
  17. data/lib/mechanize/form/hidden.rb +0 -3
  18. data/lib/mechanize/form/image_button.rb +0 -19
  19. data/lib/mechanize/form/keygen.rb +0 -34
  20. data/lib/mechanize/form/multi_select_list.rb +0 -94
  21. data/lib/mechanize/form/option.rb +0 -50
  22. data/lib/mechanize/form/radio_button.rb +0 -55
  23. data/lib/mechanize/form/reset.rb +0 -3
  24. data/lib/mechanize/form/select_list.rb +0 -44
  25. data/lib/mechanize/form/submit.rb +0 -3
  26. data/lib/mechanize/form/text.rb +0 -3
  27. data/lib/mechanize/form/textarea.rb +0 -3
  28. data/lib/mechanize/form.rb +0 -543
  29. data/lib/mechanize/headers.rb +0 -23
  30. data/lib/mechanize/history.rb +0 -82
  31. data/lib/mechanize/http/agent.rb +0 -1004
  32. data/lib/mechanize/http/auth_challenge.rb +0 -59
  33. data/lib/mechanize/http/auth_realm.rb +0 -31
  34. data/lib/mechanize/http/content_disposition_parser.rb +0 -188
  35. data/lib/mechanize/http/www_authenticate_parser.rb +0 -155
  36. data/lib/mechanize/http.rb +0 -8
  37. data/lib/mechanize/monkey_patch.rb +0 -16
  38. data/lib/mechanize/page/base.rb +0 -7
  39. data/lib/mechanize/page/frame.rb +0 -27
  40. data/lib/mechanize/page/image.rb +0 -30
  41. data/lib/mechanize/page/label.rb +0 -20
  42. data/lib/mechanize/page/link.rb +0 -98
  43. data/lib/mechanize/page/meta_refresh.rb +0 -68
  44. data/lib/mechanize/page.rb +0 -440
  45. data/lib/mechanize/parser.rb +0 -173
  46. data/lib/mechanize/pluggable_parsers.rb +0 -144
  47. data/lib/mechanize/redirect_limit_reached_error.rb +0 -19
  48. data/lib/mechanize/redirect_not_get_or_head_error.rb +0 -21
  49. data/lib/mechanize/response_code_error.rb +0 -21
  50. data/lib/mechanize/response_read_error.rb +0 -27
  51. data/lib/mechanize/robots_disallowed_error.rb +0 -28
  52. data/lib/mechanize/test_case.rb +0 -663
  53. data/lib/mechanize/unauthorized_error.rb +0 -3
  54. data/lib/mechanize/unsupported_scheme_error.rb +0 -6
  55. data/lib/mechanize/util.rb +0 -101
  56. data/lib/mechanize.rb +0 -1079
  57. data/test/data/htpasswd +0 -1
  58. data/test/data/server.crt +0 -16
  59. data/test/data/server.csr +0 -12
  60. data/test/data/server.key +0 -15
  61. data/test/data/server.pem +0 -15
  62. data/test/htdocs/alt_text.html +0 -10
  63. data/test/htdocs/bad_form_test.html +0 -9
  64. data/test/htdocs/button.jpg +0 -0
  65. data/test/htdocs/canonical_uri.html +0 -9
  66. data/test/htdocs/dir with spaces/foo.html +0 -1
  67. data/test/htdocs/empty_form.html +0 -6
  68. data/test/htdocs/file_upload.html +0 -26
  69. data/test/htdocs/find_link.html +0 -41
  70. data/test/htdocs/form_multi_select.html +0 -16
  71. data/test/htdocs/form_multival.html +0 -37
  72. data/test/htdocs/form_no_action.html +0 -18
  73. data/test/htdocs/form_no_input_name.html +0 -16
  74. data/test/htdocs/form_order_test.html +0 -11
  75. data/test/htdocs/form_select.html +0 -16
  76. data/test/htdocs/form_set_fields.html +0 -14
  77. data/test/htdocs/form_test.html +0 -188
  78. data/test/htdocs/frame_referer_test.html +0 -10
  79. data/test/htdocs/frame_test.html +0 -30
  80. data/test/htdocs/google.html +0 -13
  81. data/test/htdocs/index.html +0 -6
  82. data/test/htdocs/link with space.html +0 -5
  83. data/test/htdocs/meta_cookie.html +0 -11
  84. data/test/htdocs/no_title_test.html +0 -6
  85. data/test/htdocs/noindex.html +0 -9
  86. data/test/htdocs/rails_3_encoding_hack_form_test.html +0 -27
  87. data/test/htdocs/relative/tc_relative_links.html +0 -21
  88. data/test/htdocs/robots.html +0 -8
  89. data/test/htdocs/robots.txt +0 -2
  90. data/test/htdocs/tc_bad_charset.html +0 -9
  91. data/test/htdocs/tc_bad_links.html +0 -5
  92. data/test/htdocs/tc_base_link.html +0 -8
  93. data/test/htdocs/tc_blank_form.html +0 -11
  94. data/test/htdocs/tc_charset.html +0 -6
  95. data/test/htdocs/tc_checkboxes.html +0 -19
  96. data/test/htdocs/tc_encoded_links.html +0 -5
  97. data/test/htdocs/tc_field_precedence.html +0 -11
  98. data/test/htdocs/tc_follow_meta.html +0 -8
  99. data/test/htdocs/tc_form_action.html +0 -48
  100. data/test/htdocs/tc_links.html +0 -19
  101. data/test/htdocs/tc_meta_in_body.html +0 -9
  102. data/test/htdocs/tc_pretty_print.html +0 -17
  103. data/test/htdocs/tc_referer.html +0 -16
  104. data/test/htdocs/tc_relative_links.html +0 -19
  105. data/test/htdocs/tc_textarea.html +0 -23
  106. data/test/htdocs/test_click.html +0 -11
  107. data/test/htdocs/unusual______.html +0 -5
  108. data/test/test_mechanize.rb +0 -1164
  109. data/test/test_mechanize_cookie.rb +0 -451
  110. data/test/test_mechanize_cookie_jar.rb +0 -483
  111. data/test/test_mechanize_download.rb +0 -43
  112. data/test/test_mechanize_file.rb +0 -61
  113. data/test/test_mechanize_file_connection.rb +0 -21
  114. data/test/test_mechanize_file_request.rb +0 -19
  115. data/test/test_mechanize_file_saver.rb +0 -21
  116. data/test/test_mechanize_form.rb +0 -875
  117. data/test/test_mechanize_form_check_box.rb +0 -38
  118. data/test/test_mechanize_form_encoding.rb +0 -114
  119. data/test/test_mechanize_form_field.rb +0 -63
  120. data/test/test_mechanize_form_file_upload.rb +0 -20
  121. data/test/test_mechanize_form_image_button.rb +0 -12
  122. data/test/test_mechanize_form_keygen.rb +0 -32
  123. data/test/test_mechanize_form_multi_select_list.rb +0 -84
  124. data/test/test_mechanize_form_option.rb +0 -55
  125. data/test/test_mechanize_form_radio_button.rb +0 -78
  126. data/test/test_mechanize_form_select_list.rb +0 -76
  127. data/test/test_mechanize_form_textarea.rb +0 -52
  128. data/test/test_mechanize_headers.rb +0 -35
  129. data/test/test_mechanize_history.rb +0 -103
  130. data/test/test_mechanize_http_agent.rb +0 -1225
  131. data/test/test_mechanize_http_auth_challenge.rb +0 -39
  132. data/test/test_mechanize_http_auth_realm.rb +0 -49
  133. data/test/test_mechanize_http_content_disposition_parser.rb +0 -118
  134. data/test/test_mechanize_http_www_authenticate_parser.rb +0 -146
  135. data/test/test_mechanize_link.rb +0 -80
  136. data/test/test_mechanize_page.rb +0 -118
  137. data/test/test_mechanize_page_encoding.rb +0 -182
  138. data/test/test_mechanize_page_frame.rb +0 -16
  139. data/test/test_mechanize_page_link.rb +0 -390
  140. data/test/test_mechanize_page_meta_refresh.rb +0 -127
  141. data/test/test_mechanize_parser.rb +0 -289
  142. data/test/test_mechanize_pluggable_parser.rb +0 -52
  143. data/test/test_mechanize_redirect_limit_reached_error.rb +0 -24
  144. data/test/test_mechanize_redirect_not_get_or_head_error.rb +0 -14
  145. data/test/test_mechanize_subclass.rb +0 -22
  146. data/test/test_mechanize_util.rb +0 -103
  147. data/test/test_multi_select.rb +0 -119
@@ -1,3 +0,0 @@
1
- class Mechanize::UnauthorizedError < Mechanize::ResponseCodeError
2
- end
3
-
@@ -1,6 +0,0 @@
1
- class Mechanize::UnsupportedSchemeError < Mechanize::Error
2
- attr_accessor :scheme
3
- def initialize(scheme)
4
- @scheme = scheme
5
- end
6
- end
@@ -1,101 +0,0 @@
1
- require 'cgi'
2
-
3
- class Mechanize::Util
4
- CODE_DIC = {
5
- :JIS => "ISO-2022-JP",
6
- :EUC => "EUC-JP",
7
- :SJIS => "SHIFT_JIS",
8
- :UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"}
9
-
10
- # true if RUBY_VERSION is 1.9.0 or later
11
- NEW_RUBY_ENCODING = RUBY_VERSION >= '1.9.0'
12
-
13
- # contains encoding error classes to raise
14
- ENCODING_ERRORS = if NEW_RUBY_ENCODING
15
- [EncodingError]
16
- else
17
- [Iconv::InvalidEncoding, Iconv::IllegalSequence]
18
- end
19
-
20
- def self.build_query_string(parameters, enc=nil)
21
- parameters.map { |k,v|
22
- # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
23
- [CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") if k
24
- }.compact.join('&')
25
- end
26
-
27
- # Converts string +s+ from +code+ to UTF-8.
28
- def self.from_native_charset(s, code, ignore_encoding_error=false, log=nil)
29
- return s unless s && code
30
- return s unless Mechanize.html_parser == Nokogiri::HTML
31
-
32
- begin
33
- encode_to(code, s)
34
- rescue *ENCODING_ERRORS => ex
35
- log.debug("from_native_charset: #{ex.class}: form encoding: #{code.inspect} string: #{s}") if log
36
- if ignore_encoding_error
37
- s
38
- else
39
- raise
40
- end
41
- end
42
- end
43
-
44
- # inner convert method of Util.from_native_charset
45
- def self.encode_to(encoding, str)
46
- if NEW_RUBY_ENCODING
47
- str.encode(encoding)
48
- else
49
- Iconv.conv(encoding.to_s, "UTF-8", str)
50
- end
51
- end
52
- private_class_method :encode_to
53
-
54
- def self.html_unescape(s)
55
- return s unless s
56
- s.gsub(/&(\w+|#[0-9]+);/) { |match|
57
- number = case match
58
- when /&(\w+);/
59
- Mechanize.html_parser::NamedCharacters[$1]
60
- when /&#([0-9]+);/
61
- $1.to_i
62
- end
63
-
64
- number ? ([number].pack('U') rescue match) : match
65
- }
66
- end
67
-
68
- def self.detect_charset(src)
69
- tmp = NKF.guess(src || "<html></html>")
70
- if RUBY_VERSION >= "1.9.0"
71
- enc = tmp.to_s.upcase
72
- else
73
- enc = NKF.constants.find{|c|
74
- NKF.const_get(c) == tmp
75
- }
76
- enc = CODE_DIC[enc.intern]
77
- end
78
- enc || "ISO-8859-1"
79
- end
80
-
81
- def self.uri_escape str
82
- @parser ||= begin
83
- URI::Parser.new
84
- rescue NameError
85
- URI
86
- end
87
-
88
- @parser.escape str
89
- end
90
-
91
- def self.uri_unescape str
92
- @parser ||= begin
93
- URI::Parser.new
94
- rescue NameError
95
- URI
96
- end
97
-
98
- @parser.unescape str
99
- end
100
-
101
- end
data/lib/mechanize.rb DELETED
@@ -1,1079 +0,0 @@
1
- require 'fileutils'
2
- require 'forwardable'
3
- require 'iconv' if RUBY_VERSION < '1.9.2'
4
- require 'mutex_m'
5
- require 'net/http/digest_auth'
6
- require 'net/http/persistent'
7
- require 'nkf'
8
- require 'nokogiri'
9
- require 'openssl'
10
- require 'pp'
11
- require 'stringio'
12
- require 'uri'
13
- require 'webrick/httputils'
14
- require 'zlib'
15
-
16
- ##
17
- # The Mechanize library is used for automating interactions with a website. It
18
- # can follow links and submit forms. Form fields can be populated and
19
- # submitted. A history of URL's is maintained and can be queried.
20
- #
21
- # == Example
22
- #
23
- # require 'mechanize'
24
- # require 'logger'
25
- #
26
- # agent = Mechanize.new
27
- # agent.log = Logger.new "mech.log"
28
- # agent.user_agent_alias = 'Mac Safari'
29
- #
30
- # page = agent.get "http://www.google.com/"
31
- # search_form = page.form_with :name => "f"
32
- # search_form.field_with(:name => "q").value = "Hello"
33
- #
34
- # search_results = agent.submit search_form
35
- # puts search_results.body
36
-
37
- class Mechanize
38
-
39
- ##
40
- # The version of Mechanize you are using.
41
-
42
- VERSION = '2.1'
43
-
44
- ##
45
- # Base mechanize error class
46
-
47
- class Error < RuntimeError
48
- end
49
-
50
- ruby_version = if RUBY_PATCHLEVEL >= 0 then
51
- "#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}"
52
- else
53
- "#{RUBY_VERSION}dev#{RUBY_REVISION}"
54
- end
55
-
56
- ##
57
- # Supported User-Agent aliases for use with user_agent_alias=. The
58
- # description in parenthesis is for informative purposes and is not part of
59
- # the alias name.
60
- #
61
- # * Linux Firefox (3.6.1)
62
- # * Linux Konqueror (3)
63
- # * Linux Mozilla
64
- # * Mac Firefox (3.6)
65
- # * Mac Mozilla
66
- # * Mac Safari (5)
67
- # * Mac Safari 4
68
- # * Mechanize (default)
69
- # * Windows IE 6
70
- # * Windows IE 7
71
- # * Windows IE 8
72
- # * Windows IE 9
73
- # * Windows Mozilla
74
- # * iPhone (3.0)
75
- #
76
- # Example:
77
- #
78
- # agent = Mechanize.new
79
- # agent.user_agent_alias = 'Mac Safari'
80
-
81
- AGENT_ALIASES = {
82
- 'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/tenderlove/mechanize/)",
83
- 'Linux Firefox' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/20100122 firefox/3.6.1',
84
- 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
85
- 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
86
- 'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
87
- 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
88
- 'Mac Safari 4' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
89
- 'Mac Safari' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22',
90
- 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
91
- 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
92
- 'Windows IE 8' => 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
93
- 'Windows IE 9' => 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
94
- 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
95
- 'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
96
- }
97
-
98
- def self.inherited(child) # :nodoc:
99
- child.html_parser ||= html_parser
100
- child.log ||= log
101
- super
102
- end
103
-
104
- ##
105
- # Creates a new mechanize instance. If a block is given, the created
106
- # instance is yielded to the block for setting up pre-connection state such
107
- # as SSL parameters or proxies:
108
- #
109
- # agent = Mechanize.new do |a|
110
- # a.proxy_host = 'proxy.example'
111
- # a.proxy_port = 8080
112
- # end
113
-
114
- def initialize
115
- @agent = Mechanize::HTTP::Agent.new
116
- @agent.context = self
117
- @log = nil
118
-
119
- # attr_accessors
120
- @agent.user_agent = AGENT_ALIASES['Mechanize']
121
- @watch_for_set = nil
122
- @history_added = nil
123
-
124
- # attr_readers
125
- @pluggable_parser = PluggableParser.new
126
-
127
- @keep_alive_time = 0
128
-
129
- # Proxy
130
- @proxy_addr = nil
131
- @proxy_port = nil
132
- @proxy_user = nil
133
- @proxy_pass = nil
134
-
135
- @html_parser = self.class.html_parser
136
-
137
- @default_encoding = nil
138
- @force_default_encoding = false
139
-
140
- yield self if block_given?
141
-
142
- @agent.set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass
143
- @agent.set_http
144
- end
145
-
146
- # :section: History
147
- #
148
- # Methods for navigating and controlling history
149
-
150
- ##
151
- # Equivalent to the browser back button. Returns the previous page visited.
152
-
153
- def back
154
- @agent.history.pop
155
- end
156
-
157
- ##
158
- # Returns the latest page loaded by Mechanize
159
-
160
- def current_page
161
- @agent.current_page
162
- end
163
-
164
- alias page current_page
165
-
166
- ##
167
- # The history of this mechanize run
168
-
169
- def history
170
- @agent.history
171
- end
172
-
173
- ##
174
- # Maximum number of items allowed in the history.
175
-
176
- def max_history
177
- @agent.history.max_size
178
- end
179
-
180
- ##
181
- # Sets the maximum number of items allowed in the history to +length+.
182
-
183
- def max_history= length
184
- @agent.history.max_size = length
185
- end
186
-
187
- ##
188
- # Returns a visited page for the +url+ passed in, otherwise nil
189
-
190
- def visited? url
191
- url = url.href if url.respond_to? :href
192
-
193
- @agent.visited_page url
194
- end
195
-
196
- ##
197
- # Returns whether or not a url has been visited
198
-
199
- alias visited_page visited?
200
-
201
- # :section: Hooks
202
- #
203
- # Hooks into the operation of mechanize
204
-
205
- ##
206
- # A list of hooks to call before reading response header 'content-encoding'.
207
- #
208
- # The hook is called with the agent making the request, the URI of the
209
- # request, the response an IO containing the response body.
210
-
211
- def content_encoding_hooks
212
- @agent.content_encoding_hooks
213
- end
214
-
215
- ##
216
- # Callback which is invoked with the page that was added to history.
217
-
218
- attr_accessor :history_added
219
-
220
- ##
221
- # A list of hooks to call after retrieving a response. Hooks are called with
222
- # the agent and the response returned.
223
-
224
- def post_connect_hooks
225
- @agent.post_connect_hooks
226
- end
227
-
228
- ##
229
- # A list of hooks to call before making a request. Hooks are called with
230
- # the agent and the request to be performed.
231
-
232
- def pre_connect_hooks
233
- @agent.pre_connect_hooks
234
- end
235
-
236
- # :section: Requests
237
- #
238
- # Methods for making HTTP requests
239
-
240
- ##
241
- # If the parameter is a string, finds the button or link with the
242
- # value of the string on the current page and clicks it. Otherwise, clicks
243
- # the Mechanize::Page::Link object passed in. Returns the page fetched.
244
-
245
- def click link
246
- case link
247
- when Page::Link then
248
- referer = link.page || current_page()
249
- if @agent.robots
250
- if (referer.is_a?(Page) and referer.parser.nofollow?) or
251
- link.rel?('nofollow') then
252
- raise RobotsDisallowedError.new(link.href)
253
- end
254
- end
255
- if link.rel?('noreferrer')
256
- href = @agent.resolve(link.href, link.page || current_page)
257
- referer = Page.new(nil, {'content-type'=>'text/html'})
258
- else
259
- href = link.href
260
- end
261
- get href, [], referer
262
- when String, Regexp then
263
- if real_link = page.link_with(:text => link)
264
- click real_link
265
- else
266
- button = nil
267
- form = page.forms.find do |f|
268
- button = f.button_with(:value => link)
269
- button.is_a? Form::Submit
270
- end
271
- submit form, button if form
272
- end
273
- else
274
- referer = current_page()
275
- href = link.respond_to?(:href) ? link.href :
276
- (link['href'] || link['src'])
277
- get href, [], referer
278
- end
279
- end
280
-
281
- ##
282
- # DELETE +uri+ with +query_params+, and setting +headers+:
283
- #
284
- # delete('http://example/', {'q' => 'foo'}, {})
285
-
286
- def delete(uri, query_params = {}, headers = {})
287
- page = @agent.fetch(uri, :delete, headers, query_params)
288
- add_to_history(page)
289
- page
290
- end
291
-
292
- ##
293
- # GET the +uri+ with the given request +parameters+, +referer+ and
294
- # +headers+.
295
- #
296
- # The +referer+ may be a URI or a page.
297
-
298
- def get(uri, parameters = [], referer = nil, headers = {})
299
- method = :get
300
-
301
- referer ||=
302
- if uri.to_s =~ %r{\Ahttps?://}
303
- Page.new(nil, {'content-type'=>'text/html'})
304
- else
305
- current_page || Page.new(nil, {'content-type'=>'text/html'})
306
- end
307
-
308
- # FIXME: Huge hack so that using a URI as a referer works. I need to
309
- # refactor everything to pass around URIs but still support
310
- # Mechanize::Page#base
311
- unless Mechanize::Parser === referer then
312
- referer = referer.is_a?(String) ?
313
- Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
314
- Page.new(referer, {'content-type' => 'text/html'})
315
- end
316
-
317
- # fetch the page
318
- headers ||= {}
319
- page = @agent.fetch uri, method, headers, parameters, referer
320
- add_to_history(page)
321
- yield page if block_given?
322
- page
323
- end
324
-
325
- ##
326
- # GET +url+ and return only its contents
327
-
328
- def get_file(url)
329
- get(url).body
330
- end
331
-
332
- ##
333
- # HEAD +uri+ with +query_params+, and setting +headers+:
334
- #
335
- # head('http://example/', {'q' => 'foo'}, {})
336
-
337
- def head(uri, query_params = {}, headers = {})
338
- # fetch the page
339
- page = @agent.fetch(uri, :head, headers, query_params)
340
- yield page if block_given?
341
- page
342
- end
343
-
344
- ##
345
- # POST to the given +uri+ with the given +query+. The query is specified by
346
- # either a string, or a list of key-value pairs represented by a hash or an
347
- # array of arrays.
348
- #
349
- # Examples:
350
- # agent.post 'http://example.com/', "foo" => "bar"
351
- #
352
- # agent.post 'http://example.com/', [%w[foo bar]]
353
- #
354
- # agent.post('http://example.com/', "<message>hello</message>",
355
- # 'Content-Type' => 'application/xml')
356
-
357
- def post(uri, query={}, headers={})
358
- return request_with_entity(:post, uri, query, headers) if String === query
359
-
360
- node = {}
361
- # Create a fake form
362
- class << node
363
- def search(*args); []; end
364
- end
365
- node['method'] = 'POST'
366
- node['enctype'] = 'application/x-www-form-urlencoded'
367
-
368
- form = Form.new(node)
369
-
370
- query.each { |k, v|
371
- if v.is_a?(IO)
372
- form.enctype = 'multipart/form-data'
373
- ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path))
374
- ul.file_data = v.read
375
- form.file_uploads << ul
376
- else
377
- form.fields << Form::Field.new({'name' => k.to_s},v)
378
- end
379
- }
380
- post_form(uri, form, headers)
381
- end
382
-
383
- ##
384
- # PUT to +uri+ with +entity+, and setting +headers+:
385
- #
386
- # put('http://example/', 'new content', {'Content-Type' => 'text/plain'})
387
-
388
- def put(uri, entity, headers = {})
389
- request_with_entity(:put, uri, entity, headers)
390
- end
391
-
392
- ##
393
- # Makes an HTTP request to +url+ using HTTP method +verb+. +entity+ is used
394
- # as the request body, if allowed.
395
-
396
- def request_with_entity(verb, uri, entity, headers = {})
397
- cur_page = current_page || Page.new(nil, {'content-type'=>'text/html'})
398
-
399
- headers = {
400
- 'Content-Type' => 'application/octet-stream',
401
- 'Content-Length' => entity.size.to_s,
402
- }.update headers
403
-
404
- page = @agent.fetch uri, verb, headers, [entity], cur_page
405
- add_to_history(page)
406
- page
407
- end
408
-
409
- ##
410
- # Submits +form+ with an optional +button+.
411
- #
412
- # Without a button:
413
- #
414
- # page = agent.get('http://example.com')
415
- # agent.submit(page.forms.first)
416
- #
417
- # With a button:
418
- #
419
- # agent.submit(page.forms.first, page.forms.first.buttons.first)
420
-
421
- def submit(form, button=nil, headers={})
422
- form.add_button_to_query(button) if button
423
-
424
- case form.method.upcase
425
- when 'POST'
426
- post_form(form.action, form, headers)
427
- when 'GET'
428
- get(form.action.gsub(/\?[^\?]*$/, ''),
429
- form.build_query,
430
- form.page,
431
- headers)
432
- else
433
- raise ArgumentError, "unsupported method: #{form.method.upcase}"
434
- end
435
- end
436
-
437
- ##
438
- # Runs given block, then resets the page history as it was before. self is
439
- # given as a parameter to the block. Returns the value of the block.
440
-
441
- def transact
442
- history_backup = @agent.history.dup
443
- begin
444
- yield self
445
- ensure
446
- @agent.history = history_backup
447
- end
448
- end
449
-
450
- # :section: Settings
451
- #
452
- # Settings that adjust how mechanize makes HTTP requests including timeouts,
453
- # keep-alives, compression, redirects and headers.
454
-
455
- @html_parser = Nokogiri::HTML
456
-
457
- class << self
458
-
459
- ##
460
- # Default HTML parser for all mechanize instances
461
- #
462
- # Mechanize.html_parser = Nokogiri::XML
463
-
464
- attr_accessor :html_parser
465
-
466
- ##
467
- # Default logger for all mechanize instances
468
- #
469
- # Mechanize.log = Logger.new $stderr
470
-
471
- attr_accessor :log
472
-
473
- end
474
-
475
- ##
476
- # A default encoding name used when parsing HTML parsing. When set it is
477
- # used after any other encoding. The default is nil.
478
-
479
- attr_accessor :default_encoding
480
-
481
- ##
482
- # Overrides the encodings given by the HTTP server and the HTML page with
483
- # the default_encoding when set to true.
484
-
485
- attr_accessor :force_default_encoding
486
-
487
- ##
488
- # The HTML parser to be used when parsing documents
489
-
490
- attr_accessor :html_parser
491
-
492
- ##
493
- # HTTP/1.0 keep-alive time. This is no longer supported by mechanize as it
494
- # now uses net-http-persistent which only supports HTTP/1.1 persistent
495
- # connections
496
-
497
- attr_accessor :keep_alive_time
498
-
499
- ##
500
- # The HTTP proxy address
501
-
502
- attr_reader :proxy_addr
503
-
504
- ##
505
- # The HTTP proxy password
506
-
507
- attr_reader :proxy_pass
508
-
509
- ##
510
- # The HTTP proxy port
511
-
512
- attr_reader :proxy_port
513
-
514
- ##
515
- # The HTTP proxy username
516
-
517
- attr_reader :proxy_user
518
-
519
- ##
520
- # Sets the user and password to be used for HTTP authentication.
521
-
522
- def auth(user, password)
523
- @agent.user = user
524
- @agent.password = password
525
- end
526
-
527
- alias basic_auth auth
528
-
529
- ##
530
- # Are If-Modified-Since conditional requests enabled?
531
-
532
- def conditional_requests
533
- @agent.conditional_requests
534
- end
535
-
536
- ##
537
- # Disables If-Modified-Since conditional requests (enabled by default)
538
-
539
- def conditional_requests= enabled
540
- @agent.conditional_requests = enabled
541
- end
542
-
543
- ##
544
- # A Mechanize::CookieJar which stores cookies
545
-
546
- def cookie_jar
547
- @agent.cookie_jar
548
- end
549
-
550
- ##
551
- # Replaces the cookie jar with +cookie_jar+
552
-
553
- def cookie_jar= cookie_jar
554
- @agent.cookie_jar = cookie_jar
555
- end
556
-
557
- ##
558
- # Returns a list of cookies stored in the cookie jar.
559
-
560
- def cookies
561
- @agent.cookie_jar.to_a
562
- end
563
-
564
- ##
565
- # Follow HTML meta refresh and HTTP Refresh headers. If set to +:anywhere+
566
- # meta refresh tags outside of the head element will be followed.
567
-
568
- def follow_meta_refresh
569
- @agent.follow_meta_refresh
570
- end
571
-
572
- ##
573
- # Controls following of HTML meta refresh and HTTP Refresh headers in
574
- # responses.
575
-
576
- def follow_meta_refresh= follow
577
- @agent.follow_meta_refresh = follow
578
- end
579
-
580
- ##
581
- # Follow an HTML meta refresh and HTTP Refresh headers that have no "url="
582
- # in the content attribute.
583
- #
584
- # Defaults to false to prevent infinite refresh loops.
585
-
586
- def follow_meta_refresh_self
587
- @agent.follow_meta_refresh_self
588
- end
589
-
590
- ##
591
- # Alters the following of HTML meta refresh and HTTP Refresh headers that
592
- # point to the same page.
593
-
594
- def follow_meta_refresh_self= follow
595
- @agent.follow_meta_refresh_self = follow
596
- end
597
-
598
- ##
599
- # Is gzip compression of responses enabled?
600
-
601
- def gzip_enabled
602
- @agent.gzip_enabled
603
- end
604
-
605
- ##
606
- # Disables HTTP/1.1 gzip compression (enabled by default)
607
-
608
- def gzip_enabled=enabled
609
- @agent.gzip_enabled = enabled
610
- end
611
-
612
- ##
613
- # Connections that have not been used in this many seconds will be reset.
614
-
615
- def idle_timeout
616
- @agent.idle_timeout
617
- end
618
-
619
- # Sets the idle timeout to +idle_timeout+. The default timeout is 5
620
- # seconds. If you experience "too many connection resets", reducing this
621
- # value may help.
622
-
623
- def idle_timeout= idle_timeout
624
- @agent.idle_timeout = idle_timeout
625
- end
626
-
627
- ##
628
- # Are HTTP/1.1 keep-alive connections enabled?
629
-
630
- def keep_alive
631
- @agent.keep_alive
632
- end
633
-
634
- ##
635
- # Disable HTTP/1.1 keep-alive connections if +enable+ is set to false. If
636
- # you are experiencing "too many connection resets" errors setting this to
637
- # false will eliminate them.
638
- #
639
- # You should first investigate reducing idle_timeout.
640
-
641
- def keep_alive= enable
642
- @agent.keep_alive = enable
643
- end
644
-
645
- ##
646
- # The current logger. If no logger has been set Mechanize.log is used.
647
-
648
- def log
649
- @log || Mechanize.log
650
- end
651
-
652
- ##
653
- # Sets the +logger+ used by this instance of mechanize
654
-
655
- def log= logger
656
- @log = logger
657
- end
658
-
659
- ##
660
- # Responses larger than this will be written to a Tempfile instead of stored
661
- # in memory. The default is 10240 bytes
662
-
663
- def max_file_buffer
664
- @agent.max_file_buffer
665
- end
666
-
667
- ##
668
- # Sets the maximum size of a response body that will be stored in memory to
669
- # +bytes+
670
-
671
- def max_file_buffer= bytes
672
- @agent.max_file_buffer = bytes
673
- end
674
-
675
- ##
676
- # Length of time to wait until a connection is opened in seconds
677
-
678
- def open_timeout
679
- @agent.open_timeout
680
- end
681
-
682
- ##
683
- # Sets the connection open timeout to +open_timeout+
684
-
685
- def open_timeout= open_timeout
686
- @agent.open_timeout = open_timeout
687
- end
688
-
689
- ##
690
- # Length of time to wait for data from the server
691
-
692
- def read_timeout
693
- @agent.read_timeout
694
- end
695
-
696
- ##
697
- # Sets the timeout for each chunk of data read from the server to
698
- # +read_timeout+. A single request may read many chunks of data.
699
-
700
- def read_timeout= read_timeout
701
- @agent.read_timeout = read_timeout
702
- end
703
-
704
- ##
705
- # Controls how mechanize deals with redirects. The following values are
706
- # allowed:
707
- #
708
- # :all, true:: All 3xx redirects are followed (default)
709
- # :permanent:: Only 301 Moved Permanantly redirects are followed
710
- # false:: No redirects are followed
711
-
712
- def redirect_ok
713
- @agent.redirect_ok
714
- end
715
-
716
- alias follow_redirect? redirect_ok
717
-
718
- ##
719
- # Sets the mechanize redirect handling policy. See redirect_ok for allowed
720
- # values
721
-
722
- def redirect_ok= follow
723
- @agent.redirect_ok = follow
724
- end
725
-
726
- ##
727
- # Maximum number of redirections to follow
728
-
729
- def redirection_limit
730
- @agent.redirection_limit
731
- end
732
-
733
- ##
734
- # Sets the maximum number of redirections to follow to +limit+
735
-
736
- def redirection_limit= limit
737
- @agent.redirection_limit = limit
738
- end
739
-
740
- ##
741
- # A hash of custom request headers that will be sent on every request
742
-
743
- def request_headers
744
- @agent.request_headers
745
- end
746
-
747
- ##
748
- # Replaces the custom request headers that will be sent on every request
749
- # with +request_headers+
750
-
751
- def request_headers= request_headers
752
- @agent.request_headers = request_headers
753
- end
754
-
755
- ##
756
- # Retry POST and other non-idempotent requests. See RFC 2616 9.1.2.
757
-
758
- def retry_change_requests
759
- @agent.retry_change_requests
760
- end
761
-
762
- ##
763
- # When setting +retry_change_requests+ to true you are stating that, for all
764
- # the URLs you access with mechanize, making POST and other non-idempotent
765
- # requests is safe and will not cause data duplication or other harmful
766
- # results.
767
- #
768
- # If you are experiencing "too many connection resets" errors you should
769
- # instead investigate reducing the idle_timeout or disabling keep_alive
770
- # connections.
771
-
772
- def retry_change_requests= retry_change_requests
773
- @agent.retry_change_requests = retry_change_requests
774
- end
775
-
776
- ##
777
- # Will <code>/robots.txt</code> files be obeyed?
778
-
779
- def robots
780
- @agent.robots
781
- end
782
-
783
- ##
784
- # When +enabled+ mechanize will retrieve and obey <code>robots.txt</code>
785
- # files
786
-
787
- def robots= enabled
788
- @agent.robots = enabled
789
- end
790
-
791
- ##
792
- # The handlers for HTTP and other URI protocols.
793
-
794
- def scheme_handlers
795
- @agent.scheme_handlers
796
- end
797
-
798
- ##
799
- # Replaces the URI scheme handler table with +scheme_handlers+
800
-
801
- def scheme_handlers= scheme_handlers
802
- @agent.scheme_handlers = scheme_handlers
803
- end
804
-
805
- ##
806
- # The identification string for the client initiating a web request
807
-
808
- def user_agent
809
- @agent.user_agent
810
- end
811
-
812
- ##
813
- # Sets the User-Agent used by mechanize to +user_agent+. See also
814
- # user_agent_alias
815
-
816
- def user_agent= user_agent
817
- @agent.user_agent = user_agent
818
- end
819
-
820
- ##
821
- # Set the user agent for the Mechanize object based on the given +name+.
822
- #
823
- # See also AGENT_ALIASES
824
-
825
- def user_agent_alias= name
826
- self.user_agent = AGENT_ALIASES[name] ||
827
- raise(ArgumentError, "unknown agent alias #{name.inspect}")
828
- end
829
-
830
- ##
831
- # The value of watch_for_set is passed to pluggable parsers for retrieved
832
- # content
833
-
834
- attr_accessor :watch_for_set
835
-
836
- # :section: SSL
837
- #
838
- # SSL settings for mechanize. These must be set in the block given to
839
- # Mechanize.new
840
-
841
- ##
842
- # Path to an OpenSSL server certificate file
843
-
844
- def ca_file
845
- @agent.ca_file
846
- end
847
-
848
- ##
849
- # Sets the certificate file used for SSL connections
850
-
851
- def ca_file= ca_file
852
- @agent.ca_file = ca_file
853
- end
854
-
855
- ##
856
- # An OpenSSL client certificate or the path to a certificate file.
857
-
858
- def cert
859
- @agent.cert
860
- end
861
-
862
- ##
863
- # Sets the OpenSSL client certificate +cert+ to the given path or
864
- # certificate instance
865
-
866
- def cert= cert
867
- @agent.cert = cert
868
- end
869
-
870
- ##
871
- # An OpenSSL certificate store for verifying server certificates. This
872
- # defaults to the default certificate store.
873
-
874
- def cert_store
875
- @agent.cert_store
876
- end
877
-
878
- ##
879
- # Sets the OpenSSL certificate store to +store+.
880
-
881
- def cert_store= cert_store
882
- @agent.cert_store = cert_store
883
- end
884
-
885
- ##
886
- # What is this?
887
- #
888
- # Why is it different from #cert?
889
-
890
- def certificate # :nodoc:
891
- @agent.certificate
892
- end
893
-
894
- ##
895
- # An OpenSSL private key or the path to a private key
896
-
897
- def key
898
- @agent.key
899
- end
900
-
901
- ##
902
- # Sets the OpenSSL client +key+ to the given path or key instance
903
-
904
- def key= key
905
- @agent.key = key
906
- end
907
-
908
- ##
909
- # OpenSSL client key password
910
-
911
- def pass
912
- @agent.pass
913
- end
914
-
915
- ##
916
- # Sets the client key password to +pass+
917
-
918
- def pass= pass
919
- @agent.pass = pass
920
- end
921
-
922
- ##
923
- # A callback for additional certificate verification. See
924
- # OpenSSL::SSL::SSLContext#verify_callback
925
- #
926
- # The callback can be used for debugging or to ignore errors by always
927
- # returning +true+. Specifying nil uses the default method that was valid
928
- # when the SSLContext was created
929
-
930
- def verify_callback
931
- @agent.verify_callback
932
- end
933
-
934
- ##
935
- # Sets the OpenSSL certificate verification callback
936
-
937
- def verify_callback= verify_callback
938
- @agent.verify_callback = verify_callback
939
- end
940
-
941
- ##
942
- # the OpenSSL server certificate verification method. The default is
943
- # OpenSSL::SSL::VERIFY_PEER and certificate verification uses the default
944
- # system certificates. See also cert_store
945
-
946
- def verify_mode
947
- @agent.verify_mode
948
- end
949
-
950
- ##
951
- # Sets the OpenSSL server certificate verification method.
952
-
953
- def verify_mode= verify_mode
954
- @agent.verify_mode = verify_mode
955
- end
956
-
957
- # :section: Utilities
958
-
959
- attr_reader :agent # :nodoc:
960
-
961
- attr_reader :pluggable_parser # :nodoc:
962
-
963
- ##
964
- # Parses the +body+ of the +response+ from +uri+ using the pluggable parser
965
- # that matches its content type
966
-
967
- def parse uri, response, body
968
- content_type = nil
969
-
970
- unless response['Content-Type'].nil?
971
- data, = response['Content-Type'].split ';', 2
972
- content_type, = data.downcase.split ',', 2 unless data.nil?
973
- end
974
-
975
- # Find our pluggable parser
976
- parser_klass = @pluggable_parser.parser content_type
977
-
978
- unless parser_klass <= Mechanize::Download then
979
- body = case body
980
- when IO, Tempfile, StringIO then
981
- body.read
982
- else
983
- body
984
- end
985
- end
986
-
987
- parser_klass.new uri, response, body, response.code do |parser|
988
- parser.mech = self if parser.respond_to? :mech=
989
-
990
- parser.watch_for_set = @watch_for_set if
991
- @watch_for_set and parser.respond_to?(:watch_for_set=)
992
- end
993
- end
994
-
995
- def pretty_print(q) # :nodoc:
996
- q.object_group(self) {
997
- q.breakable
998
- q.pp cookie_jar
999
- q.breakable
1000
- q.pp current_page
1001
- }
1002
- end
1003
-
1004
- ##
1005
- # Sets the proxy +address+ at +port+ with an optional +user+ and +password+
1006
-
1007
- def set_proxy address, port, user = nil, password = nil
1008
- @proxy_addr = address
1009
- @proxy_port = port
1010
- @proxy_user = user
1011
- @proxy_pass = password
1012
-
1013
- @agent.set_proxy address, port, user, password
1014
- @agent.set_http
1015
- end
1016
-
1017
- private
1018
-
1019
- ##
1020
- # Posts +form+ to +uri+
1021
-
1022
- def post_form(uri, form, headers = {})
1023
- cur_page = form.page || current_page ||
1024
- Page.new(nil, {'content-type'=>'text/html'})
1025
-
1026
- request_data = form.request_data
1027
-
1028
- log.debug("query: #{ request_data.inspect }") if log
1029
-
1030
- headers = {
1031
- 'Content-Type' => form.enctype,
1032
- 'Content-Length' => request_data.size.to_s,
1033
- }.merge headers
1034
-
1035
- # fetch the page
1036
- page = @agent.fetch uri, :post, headers, [request_data], cur_page
1037
- add_to_history(page)
1038
- page
1039
- end
1040
-
1041
- ##
1042
- # Adds +page+ to the history
1043
-
1044
- def add_to_history(page)
1045
- @agent.history.push(page, @agent.resolve(page.uri))
1046
- @history_added.call(page) if @history_added
1047
- end
1048
-
1049
- end
1050
-
1051
- require 'mechanize/content_type_error'
1052
- require 'mechanize/cookie'
1053
- require 'mechanize/cookie_jar'
1054
- require 'mechanize/parser'
1055
- require 'mechanize/download'
1056
- require 'mechanize/file'
1057
- require 'mechanize/file_connection'
1058
- require 'mechanize/file_request'
1059
- require 'mechanize/file_response'
1060
- require 'mechanize/form'
1061
- require 'mechanize/history'
1062
- require 'mechanize/http'
1063
- require 'mechanize/http/agent'
1064
- require 'mechanize/http/auth_challenge'
1065
- require 'mechanize/http/auth_realm'
1066
- require 'mechanize/http/content_disposition_parser'
1067
- require 'mechanize/http/www_authenticate_parser'
1068
- require 'mechanize/page'
1069
- require 'mechanize/monkey_patch'
1070
- require 'mechanize/pluggable_parsers'
1071
- require 'mechanize/redirect_limit_reached_error'
1072
- require 'mechanize/redirect_not_get_or_head_error'
1073
- require 'mechanize/response_code_error'
1074
- require 'mechanize/unauthorized_error'
1075
- require 'mechanize/response_read_error'
1076
- require 'mechanize/robots_disallowed_error'
1077
- require 'mechanize/unsupported_scheme_error'
1078
- require 'mechanize/util'
1079
-