diamond-mechanize 2.1 → 2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (147) hide show
  1. metadata +222 -167
  2. data/Rakefile +0 -49
  3. data/lib/mechanize/content_type_error.rb +0 -13
  4. data/lib/mechanize/cookie.rb +0 -232
  5. data/lib/mechanize/cookie_jar.rb +0 -194
  6. data/lib/mechanize/download.rb +0 -59
  7. data/lib/mechanize/element_matcher.rb +0 -36
  8. data/lib/mechanize/file.rb +0 -65
  9. data/lib/mechanize/file_connection.rb +0 -17
  10. data/lib/mechanize/file_request.rb +0 -26
  11. data/lib/mechanize/file_response.rb +0 -74
  12. data/lib/mechanize/file_saver.rb +0 -39
  13. data/lib/mechanize/form/button.rb +0 -6
  14. data/lib/mechanize/form/check_box.rb +0 -12
  15. data/lib/mechanize/form/field.rb +0 -54
  16. data/lib/mechanize/form/file_upload.rb +0 -21
  17. data/lib/mechanize/form/hidden.rb +0 -3
  18. data/lib/mechanize/form/image_button.rb +0 -19
  19. data/lib/mechanize/form/keygen.rb +0 -34
  20. data/lib/mechanize/form/multi_select_list.rb +0 -94
  21. data/lib/mechanize/form/option.rb +0 -50
  22. data/lib/mechanize/form/radio_button.rb +0 -55
  23. data/lib/mechanize/form/reset.rb +0 -3
  24. data/lib/mechanize/form/select_list.rb +0 -44
  25. data/lib/mechanize/form/submit.rb +0 -3
  26. data/lib/mechanize/form/text.rb +0 -3
  27. data/lib/mechanize/form/textarea.rb +0 -3
  28. data/lib/mechanize/form.rb +0 -543
  29. data/lib/mechanize/headers.rb +0 -23
  30. data/lib/mechanize/history.rb +0 -82
  31. data/lib/mechanize/http/agent.rb +0 -1004
  32. data/lib/mechanize/http/auth_challenge.rb +0 -59
  33. data/lib/mechanize/http/auth_realm.rb +0 -31
  34. data/lib/mechanize/http/content_disposition_parser.rb +0 -188
  35. data/lib/mechanize/http/www_authenticate_parser.rb +0 -155
  36. data/lib/mechanize/http.rb +0 -8
  37. data/lib/mechanize/monkey_patch.rb +0 -16
  38. data/lib/mechanize/page/base.rb +0 -7
  39. data/lib/mechanize/page/frame.rb +0 -27
  40. data/lib/mechanize/page/image.rb +0 -30
  41. data/lib/mechanize/page/label.rb +0 -20
  42. data/lib/mechanize/page/link.rb +0 -98
  43. data/lib/mechanize/page/meta_refresh.rb +0 -68
  44. data/lib/mechanize/page.rb +0 -440
  45. data/lib/mechanize/parser.rb +0 -173
  46. data/lib/mechanize/pluggable_parsers.rb +0 -144
  47. data/lib/mechanize/redirect_limit_reached_error.rb +0 -19
  48. data/lib/mechanize/redirect_not_get_or_head_error.rb +0 -21
  49. data/lib/mechanize/response_code_error.rb +0 -21
  50. data/lib/mechanize/response_read_error.rb +0 -27
  51. data/lib/mechanize/robots_disallowed_error.rb +0 -28
  52. data/lib/mechanize/test_case.rb +0 -663
  53. data/lib/mechanize/unauthorized_error.rb +0 -3
  54. data/lib/mechanize/unsupported_scheme_error.rb +0 -6
  55. data/lib/mechanize/util.rb +0 -101
  56. data/lib/mechanize.rb +0 -1079
  57. data/test/data/htpasswd +0 -1
  58. data/test/data/server.crt +0 -16
  59. data/test/data/server.csr +0 -12
  60. data/test/data/server.key +0 -15
  61. data/test/data/server.pem +0 -15
  62. data/test/htdocs/alt_text.html +0 -10
  63. data/test/htdocs/bad_form_test.html +0 -9
  64. data/test/htdocs/button.jpg +0 -0
  65. data/test/htdocs/canonical_uri.html +0 -9
  66. data/test/htdocs/dir with spaces/foo.html +0 -1
  67. data/test/htdocs/empty_form.html +0 -6
  68. data/test/htdocs/file_upload.html +0 -26
  69. data/test/htdocs/find_link.html +0 -41
  70. data/test/htdocs/form_multi_select.html +0 -16
  71. data/test/htdocs/form_multival.html +0 -37
  72. data/test/htdocs/form_no_action.html +0 -18
  73. data/test/htdocs/form_no_input_name.html +0 -16
  74. data/test/htdocs/form_order_test.html +0 -11
  75. data/test/htdocs/form_select.html +0 -16
  76. data/test/htdocs/form_set_fields.html +0 -14
  77. data/test/htdocs/form_test.html +0 -188
  78. data/test/htdocs/frame_referer_test.html +0 -10
  79. data/test/htdocs/frame_test.html +0 -30
  80. data/test/htdocs/google.html +0 -13
  81. data/test/htdocs/index.html +0 -6
  82. data/test/htdocs/link with space.html +0 -5
  83. data/test/htdocs/meta_cookie.html +0 -11
  84. data/test/htdocs/no_title_test.html +0 -6
  85. data/test/htdocs/noindex.html +0 -9
  86. data/test/htdocs/rails_3_encoding_hack_form_test.html +0 -27
  87. data/test/htdocs/relative/tc_relative_links.html +0 -21
  88. data/test/htdocs/robots.html +0 -8
  89. data/test/htdocs/robots.txt +0 -2
  90. data/test/htdocs/tc_bad_charset.html +0 -9
  91. data/test/htdocs/tc_bad_links.html +0 -5
  92. data/test/htdocs/tc_base_link.html +0 -8
  93. data/test/htdocs/tc_blank_form.html +0 -11
  94. data/test/htdocs/tc_charset.html +0 -6
  95. data/test/htdocs/tc_checkboxes.html +0 -19
  96. data/test/htdocs/tc_encoded_links.html +0 -5
  97. data/test/htdocs/tc_field_precedence.html +0 -11
  98. data/test/htdocs/tc_follow_meta.html +0 -8
  99. data/test/htdocs/tc_form_action.html +0 -48
  100. data/test/htdocs/tc_links.html +0 -19
  101. data/test/htdocs/tc_meta_in_body.html +0 -9
  102. data/test/htdocs/tc_pretty_print.html +0 -17
  103. data/test/htdocs/tc_referer.html +0 -16
  104. data/test/htdocs/tc_relative_links.html +0 -19
  105. data/test/htdocs/tc_textarea.html +0 -23
  106. data/test/htdocs/test_click.html +0 -11
  107. data/test/htdocs/unusual______.html +0 -5
  108. data/test/test_mechanize.rb +0 -1164
  109. data/test/test_mechanize_cookie.rb +0 -451
  110. data/test/test_mechanize_cookie_jar.rb +0 -483
  111. data/test/test_mechanize_download.rb +0 -43
  112. data/test/test_mechanize_file.rb +0 -61
  113. data/test/test_mechanize_file_connection.rb +0 -21
  114. data/test/test_mechanize_file_request.rb +0 -19
  115. data/test/test_mechanize_file_saver.rb +0 -21
  116. data/test/test_mechanize_form.rb +0 -875
  117. data/test/test_mechanize_form_check_box.rb +0 -38
  118. data/test/test_mechanize_form_encoding.rb +0 -114
  119. data/test/test_mechanize_form_field.rb +0 -63
  120. data/test/test_mechanize_form_file_upload.rb +0 -20
  121. data/test/test_mechanize_form_image_button.rb +0 -12
  122. data/test/test_mechanize_form_keygen.rb +0 -32
  123. data/test/test_mechanize_form_multi_select_list.rb +0 -84
  124. data/test/test_mechanize_form_option.rb +0 -55
  125. data/test/test_mechanize_form_radio_button.rb +0 -78
  126. data/test/test_mechanize_form_select_list.rb +0 -76
  127. data/test/test_mechanize_form_textarea.rb +0 -52
  128. data/test/test_mechanize_headers.rb +0 -35
  129. data/test/test_mechanize_history.rb +0 -103
  130. data/test/test_mechanize_http_agent.rb +0 -1225
  131. data/test/test_mechanize_http_auth_challenge.rb +0 -39
  132. data/test/test_mechanize_http_auth_realm.rb +0 -49
  133. data/test/test_mechanize_http_content_disposition_parser.rb +0 -118
  134. data/test/test_mechanize_http_www_authenticate_parser.rb +0 -146
  135. data/test/test_mechanize_link.rb +0 -80
  136. data/test/test_mechanize_page.rb +0 -118
  137. data/test/test_mechanize_page_encoding.rb +0 -182
  138. data/test/test_mechanize_page_frame.rb +0 -16
  139. data/test/test_mechanize_page_link.rb +0 -390
  140. data/test/test_mechanize_page_meta_refresh.rb +0 -127
  141. data/test/test_mechanize_parser.rb +0 -289
  142. data/test/test_mechanize_pluggable_parser.rb +0 -52
  143. data/test/test_mechanize_redirect_limit_reached_error.rb +0 -24
  144. data/test/test_mechanize_redirect_not_get_or_head_error.rb +0 -14
  145. data/test/test_mechanize_subclass.rb +0 -22
  146. data/test/test_mechanize_util.rb +0 -103
  147. data/test/test_multi_select.rb +0 -119
@@ -1,3 +0,0 @@
1
- class Mechanize::UnauthorizedError < Mechanize::ResponseCodeError
2
- end
3
-
@@ -1,6 +0,0 @@
1
- class Mechanize::UnsupportedSchemeError < Mechanize::Error
2
- attr_accessor :scheme
3
- def initialize(scheme)
4
- @scheme = scheme
5
- end
6
- end
@@ -1,101 +0,0 @@
1
- require 'cgi'
2
-
3
- class Mechanize::Util
4
- CODE_DIC = {
5
- :JIS => "ISO-2022-JP",
6
- :EUC => "EUC-JP",
7
- :SJIS => "SHIFT_JIS",
8
- :UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"}
9
-
10
- # true if RUBY_VERSION is 1.9.0 or later
11
- NEW_RUBY_ENCODING = RUBY_VERSION >= '1.9.0'
12
-
13
- # contains encoding error classes to raise
14
- ENCODING_ERRORS = if NEW_RUBY_ENCODING
15
- [EncodingError]
16
- else
17
- [Iconv::InvalidEncoding, Iconv::IllegalSequence]
18
- end
19
-
20
- def self.build_query_string(parameters, enc=nil)
21
- parameters.map { |k,v|
22
- # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
23
- [CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") if k
24
- }.compact.join('&')
25
- end
26
-
27
- # Converts string +s+ from +code+ to UTF-8.
28
- def self.from_native_charset(s, code, ignore_encoding_error=false, log=nil)
29
- return s unless s && code
30
- return s unless Mechanize.html_parser == Nokogiri::HTML
31
-
32
- begin
33
- encode_to(code, s)
34
- rescue *ENCODING_ERRORS => ex
35
- log.debug("from_native_charset: #{ex.class}: form encoding: #{code.inspect} string: #{s}") if log
36
- if ignore_encoding_error
37
- s
38
- else
39
- raise
40
- end
41
- end
42
- end
43
-
44
- # inner convert method of Util.from_native_charset
45
- def self.encode_to(encoding, str)
46
- if NEW_RUBY_ENCODING
47
- str.encode(encoding)
48
- else
49
- Iconv.conv(encoding.to_s, "UTF-8", str)
50
- end
51
- end
52
- private_class_method :encode_to
53
-
54
- def self.html_unescape(s)
55
- return s unless s
56
- s.gsub(/&(\w+|#[0-9]+);/) { |match|
57
- number = case match
58
- when /&(\w+);/
59
- Mechanize.html_parser::NamedCharacters[$1]
60
- when /&#([0-9]+);/
61
- $1.to_i
62
- end
63
-
64
- number ? ([number].pack('U') rescue match) : match
65
- }
66
- end
67
-
68
- def self.detect_charset(src)
69
- tmp = NKF.guess(src || "<html></html>")
70
- if RUBY_VERSION >= "1.9.0"
71
- enc = tmp.to_s.upcase
72
- else
73
- enc = NKF.constants.find{|c|
74
- NKF.const_get(c) == tmp
75
- }
76
- enc = CODE_DIC[enc.intern]
77
- end
78
- enc || "ISO-8859-1"
79
- end
80
-
81
- def self.uri_escape str
82
- @parser ||= begin
83
- URI::Parser.new
84
- rescue NameError
85
- URI
86
- end
87
-
88
- @parser.escape str
89
- end
90
-
91
- def self.uri_unescape str
92
- @parser ||= begin
93
- URI::Parser.new
94
- rescue NameError
95
- URI
96
- end
97
-
98
- @parser.unescape str
99
- end
100
-
101
- end
data/lib/mechanize.rb DELETED
@@ -1,1079 +0,0 @@
1
- require 'fileutils'
2
- require 'forwardable'
3
- require 'iconv' if RUBY_VERSION < '1.9.2'
4
- require 'mutex_m'
5
- require 'net/http/digest_auth'
6
- require 'net/http/persistent'
7
- require 'nkf'
8
- require 'nokogiri'
9
- require 'openssl'
10
- require 'pp'
11
- require 'stringio'
12
- require 'uri'
13
- require 'webrick/httputils'
14
- require 'zlib'
15
-
16
- ##
17
- # The Mechanize library is used for automating interactions with a website. It
18
- # can follow links and submit forms. Form fields can be populated and
19
- # submitted. A history of URL's is maintained and can be queried.
20
- #
21
- # == Example
22
- #
23
- # require 'mechanize'
24
- # require 'logger'
25
- #
26
- # agent = Mechanize.new
27
- # agent.log = Logger.new "mech.log"
28
- # agent.user_agent_alias = 'Mac Safari'
29
- #
30
- # page = agent.get "http://www.google.com/"
31
- # search_form = page.form_with :name => "f"
32
- # search_form.field_with(:name => "q").value = "Hello"
33
- #
34
- # search_results = agent.submit search_form
35
- # puts search_results.body
36
-
37
- class Mechanize
38
-
39
- ##
40
- # The version of Mechanize you are using.
41
-
42
- VERSION = '2.1'
43
-
44
- ##
45
- # Base mechanize error class
46
-
47
- class Error < RuntimeError
48
- end
49
-
50
- ruby_version = if RUBY_PATCHLEVEL >= 0 then
51
- "#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}"
52
- else
53
- "#{RUBY_VERSION}dev#{RUBY_REVISION}"
54
- end
55
-
56
- ##
57
- # Supported User-Agent aliases for use with user_agent_alias=. The
58
- # description in parenthesis is for informative purposes and is not part of
59
- # the alias name.
60
- #
61
- # * Linux Firefox (3.6.1)
62
- # * Linux Konqueror (3)
63
- # * Linux Mozilla
64
- # * Mac Firefox (3.6)
65
- # * Mac Mozilla
66
- # * Mac Safari (5)
67
- # * Mac Safari 4
68
- # * Mechanize (default)
69
- # * Windows IE 6
70
- # * Windows IE 7
71
- # * Windows IE 8
72
- # * Windows IE 9
73
- # * Windows Mozilla
74
- # * iPhone (3.0)
75
- #
76
- # Example:
77
- #
78
- # agent = Mechanize.new
79
- # agent.user_agent_alias = 'Mac Safari'
80
-
81
- AGENT_ALIASES = {
82
- 'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/tenderlove/mechanize/)",
83
- 'Linux Firefox' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/20100122 firefox/3.6.1',
84
- 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
85
- 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
86
- 'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
87
- 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
88
- 'Mac Safari 4' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
89
- 'Mac Safari' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22',
90
- 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
91
- 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
92
- 'Windows IE 8' => 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
93
- 'Windows IE 9' => 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
94
- 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
95
- 'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
96
- }
97
-
98
- def self.inherited(child) # :nodoc:
99
- child.html_parser ||= html_parser
100
- child.log ||= log
101
- super
102
- end
103
-
104
- ##
105
- # Creates a new mechanize instance. If a block is given, the created
106
- # instance is yielded to the block for setting up pre-connection state such
107
- # as SSL parameters or proxies:
108
- #
109
- # agent = Mechanize.new do |a|
110
- # a.proxy_host = 'proxy.example'
111
- # a.proxy_port = 8080
112
- # end
113
-
114
- def initialize
115
- @agent = Mechanize::HTTP::Agent.new
116
- @agent.context = self
117
- @log = nil
118
-
119
- # attr_accessors
120
- @agent.user_agent = AGENT_ALIASES['Mechanize']
121
- @watch_for_set = nil
122
- @history_added = nil
123
-
124
- # attr_readers
125
- @pluggable_parser = PluggableParser.new
126
-
127
- @keep_alive_time = 0
128
-
129
- # Proxy
130
- @proxy_addr = nil
131
- @proxy_port = nil
132
- @proxy_user = nil
133
- @proxy_pass = nil
134
-
135
- @html_parser = self.class.html_parser
136
-
137
- @default_encoding = nil
138
- @force_default_encoding = false
139
-
140
- yield self if block_given?
141
-
142
- @agent.set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass
143
- @agent.set_http
144
- end
145
-
146
- # :section: History
147
- #
148
- # Methods for navigating and controlling history
149
-
150
- ##
151
- # Equivalent to the browser back button. Returns the previous page visited.
152
-
153
- def back
154
- @agent.history.pop
155
- end
156
-
157
- ##
158
- # Returns the latest page loaded by Mechanize
159
-
160
- def current_page
161
- @agent.current_page
162
- end
163
-
164
- alias page current_page
165
-
166
- ##
167
- # The history of this mechanize run
168
-
169
- def history
170
- @agent.history
171
- end
172
-
173
- ##
174
- # Maximum number of items allowed in the history.
175
-
176
- def max_history
177
- @agent.history.max_size
178
- end
179
-
180
- ##
181
- # Sets the maximum number of items allowed in the history to +length+.
182
-
183
- def max_history= length
184
- @agent.history.max_size = length
185
- end
186
-
187
- ##
188
- # Returns a visited page for the +url+ passed in, otherwise nil
189
-
190
- def visited? url
191
- url = url.href if url.respond_to? :href
192
-
193
- @agent.visited_page url
194
- end
195
-
196
- ##
197
- # Returns whether or not a url has been visited
198
-
199
- alias visited_page visited?
200
-
201
- # :section: Hooks
202
- #
203
- # Hooks into the operation of mechanize
204
-
205
- ##
206
- # A list of hooks to call before reading response header 'content-encoding'.
207
- #
208
- # The hook is called with the agent making the request, the URI of the
209
- # request, the response an IO containing the response body.
210
-
211
- def content_encoding_hooks
212
- @agent.content_encoding_hooks
213
- end
214
-
215
- ##
216
- # Callback which is invoked with the page that was added to history.
217
-
218
- attr_accessor :history_added
219
-
220
- ##
221
- # A list of hooks to call after retrieving a response. Hooks are called with
222
- # the agent and the response returned.
223
-
224
- def post_connect_hooks
225
- @agent.post_connect_hooks
226
- end
227
-
228
- ##
229
- # A list of hooks to call before making a request. Hooks are called with
230
- # the agent and the request to be performed.
231
-
232
- def pre_connect_hooks
233
- @agent.pre_connect_hooks
234
- end
235
-
236
- # :section: Requests
237
- #
238
- # Methods for making HTTP requests
239
-
240
- ##
241
- # If the parameter is a string, finds the button or link with the
242
- # value of the string on the current page and clicks it. Otherwise, clicks
243
- # the Mechanize::Page::Link object passed in. Returns the page fetched.
244
-
245
- def click link
246
- case link
247
- when Page::Link then
248
- referer = link.page || current_page()
249
- if @agent.robots
250
- if (referer.is_a?(Page) and referer.parser.nofollow?) or
251
- link.rel?('nofollow') then
252
- raise RobotsDisallowedError.new(link.href)
253
- end
254
- end
255
- if link.rel?('noreferrer')
256
- href = @agent.resolve(link.href, link.page || current_page)
257
- referer = Page.new(nil, {'content-type'=>'text/html'})
258
- else
259
- href = link.href
260
- end
261
- get href, [], referer
262
- when String, Regexp then
263
- if real_link = page.link_with(:text => link)
264
- click real_link
265
- else
266
- button = nil
267
- form = page.forms.find do |f|
268
- button = f.button_with(:value => link)
269
- button.is_a? Form::Submit
270
- end
271
- submit form, button if form
272
- end
273
- else
274
- referer = current_page()
275
- href = link.respond_to?(:href) ? link.href :
276
- (link['href'] || link['src'])
277
- get href, [], referer
278
- end
279
- end
280
-
281
- ##
282
- # DELETE +uri+ with +query_params+, and setting +headers+:
283
- #
284
- # delete('http://example/', {'q' => 'foo'}, {})
285
-
286
- def delete(uri, query_params = {}, headers = {})
287
- page = @agent.fetch(uri, :delete, headers, query_params)
288
- add_to_history(page)
289
- page
290
- end
291
-
292
- ##
293
- # GET the +uri+ with the given request +parameters+, +referer+ and
294
- # +headers+.
295
- #
296
- # The +referer+ may be a URI or a page.
297
-
298
- def get(uri, parameters = [], referer = nil, headers = {})
299
- method = :get
300
-
301
- referer ||=
302
- if uri.to_s =~ %r{\Ahttps?://}
303
- Page.new(nil, {'content-type'=>'text/html'})
304
- else
305
- current_page || Page.new(nil, {'content-type'=>'text/html'})
306
- end
307
-
308
- # FIXME: Huge hack so that using a URI as a referer works. I need to
309
- # refactor everything to pass around URIs but still support
310
- # Mechanize::Page#base
311
- unless Mechanize::Parser === referer then
312
- referer = referer.is_a?(String) ?
313
- Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
314
- Page.new(referer, {'content-type' => 'text/html'})
315
- end
316
-
317
- # fetch the page
318
- headers ||= {}
319
- page = @agent.fetch uri, method, headers, parameters, referer
320
- add_to_history(page)
321
- yield page if block_given?
322
- page
323
- end
324
-
325
- ##
326
- # GET +url+ and return only its contents
327
-
328
- def get_file(url)
329
- get(url).body
330
- end
331
-
332
- ##
333
- # HEAD +uri+ with +query_params+, and setting +headers+:
334
- #
335
- # head('http://example/', {'q' => 'foo'}, {})
336
-
337
- def head(uri, query_params = {}, headers = {})
338
- # fetch the page
339
- page = @agent.fetch(uri, :head, headers, query_params)
340
- yield page if block_given?
341
- page
342
- end
343
-
344
- ##
345
- # POST to the given +uri+ with the given +query+. The query is specified by
346
- # either a string, or a list of key-value pairs represented by a hash or an
347
- # array of arrays.
348
- #
349
- # Examples:
350
- # agent.post 'http://example.com/', "foo" => "bar"
351
- #
352
- # agent.post 'http://example.com/', [%w[foo bar]]
353
- #
354
- # agent.post('http://example.com/', "<message>hello</message>",
355
- # 'Content-Type' => 'application/xml')
356
-
357
- def post(uri, query={}, headers={})
358
- return request_with_entity(:post, uri, query, headers) if String === query
359
-
360
- node = {}
361
- # Create a fake form
362
- class << node
363
- def search(*args); []; end
364
- end
365
- node['method'] = 'POST'
366
- node['enctype'] = 'application/x-www-form-urlencoded'
367
-
368
- form = Form.new(node)
369
-
370
- query.each { |k, v|
371
- if v.is_a?(IO)
372
- form.enctype = 'multipart/form-data'
373
- ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path))
374
- ul.file_data = v.read
375
- form.file_uploads << ul
376
- else
377
- form.fields << Form::Field.new({'name' => k.to_s},v)
378
- end
379
- }
380
- post_form(uri, form, headers)
381
- end
382
-
383
- ##
384
- # PUT to +uri+ with +entity+, and setting +headers+:
385
- #
386
- # put('http://example/', 'new content', {'Content-Type' => 'text/plain'})
387
-
388
- def put(uri, entity, headers = {})
389
- request_with_entity(:put, uri, entity, headers)
390
- end
391
-
392
- ##
393
- # Makes an HTTP request to +url+ using HTTP method +verb+. +entity+ is used
394
- # as the request body, if allowed.
395
-
396
- def request_with_entity(verb, uri, entity, headers = {})
397
- cur_page = current_page || Page.new(nil, {'content-type'=>'text/html'})
398
-
399
- headers = {
400
- 'Content-Type' => 'application/octet-stream',
401
- 'Content-Length' => entity.size.to_s,
402
- }.update headers
403
-
404
- page = @agent.fetch uri, verb, headers, [entity], cur_page
405
- add_to_history(page)
406
- page
407
- end
408
-
409
- ##
410
- # Submits +form+ with an optional +button+.
411
- #
412
- # Without a button:
413
- #
414
- # page = agent.get('http://example.com')
415
- # agent.submit(page.forms.first)
416
- #
417
- # With a button:
418
- #
419
- # agent.submit(page.forms.first, page.forms.first.buttons.first)
420
-
421
- def submit(form, button=nil, headers={})
422
- form.add_button_to_query(button) if button
423
-
424
- case form.method.upcase
425
- when 'POST'
426
- post_form(form.action, form, headers)
427
- when 'GET'
428
- get(form.action.gsub(/\?[^\?]*$/, ''),
429
- form.build_query,
430
- form.page,
431
- headers)
432
- else
433
- raise ArgumentError, "unsupported method: #{form.method.upcase}"
434
- end
435
- end
436
-
437
- ##
438
- # Runs given block, then resets the page history as it was before. self is
439
- # given as a parameter to the block. Returns the value of the block.
440
-
441
- def transact
442
- history_backup = @agent.history.dup
443
- begin
444
- yield self
445
- ensure
446
- @agent.history = history_backup
447
- end
448
- end
449
-
450
- # :section: Settings
451
- #
452
- # Settings that adjust how mechanize makes HTTP requests including timeouts,
453
- # keep-alives, compression, redirects and headers.
454
-
455
- @html_parser = Nokogiri::HTML
456
-
457
- class << self
458
-
459
- ##
460
- # Default HTML parser for all mechanize instances
461
- #
462
- # Mechanize.html_parser = Nokogiri::XML
463
-
464
- attr_accessor :html_parser
465
-
466
- ##
467
- # Default logger for all mechanize instances
468
- #
469
- # Mechanize.log = Logger.new $stderr
470
-
471
- attr_accessor :log
472
-
473
- end
474
-
475
- ##
476
- # A default encoding name used when parsing HTML parsing. When set it is
477
- # used after any other encoding. The default is nil.
478
-
479
- attr_accessor :default_encoding
480
-
481
- ##
482
- # Overrides the encodings given by the HTTP server and the HTML page with
483
- # the default_encoding when set to true.
484
-
485
- attr_accessor :force_default_encoding
486
-
487
- ##
488
- # The HTML parser to be used when parsing documents
489
-
490
- attr_accessor :html_parser
491
-
492
- ##
493
- # HTTP/1.0 keep-alive time. This is no longer supported by mechanize as it
494
- # now uses net-http-persistent which only supports HTTP/1.1 persistent
495
- # connections
496
-
497
- attr_accessor :keep_alive_time
498
-
499
- ##
500
- # The HTTP proxy address
501
-
502
- attr_reader :proxy_addr
503
-
504
- ##
505
- # The HTTP proxy password
506
-
507
- attr_reader :proxy_pass
508
-
509
- ##
510
- # The HTTP proxy port
511
-
512
- attr_reader :proxy_port
513
-
514
- ##
515
- # The HTTP proxy username
516
-
517
- attr_reader :proxy_user
518
-
519
- ##
520
- # Sets the user and password to be used for HTTP authentication.
521
-
522
- def auth(user, password)
523
- @agent.user = user
524
- @agent.password = password
525
- end
526
-
527
- alias basic_auth auth
528
-
529
- ##
530
- # Are If-Modified-Since conditional requests enabled?
531
-
532
- def conditional_requests
533
- @agent.conditional_requests
534
- end
535
-
536
- ##
537
- # Disables If-Modified-Since conditional requests (enabled by default)
538
-
539
- def conditional_requests= enabled
540
- @agent.conditional_requests = enabled
541
- end
542
-
543
- ##
544
- # A Mechanize::CookieJar which stores cookies
545
-
546
- def cookie_jar
547
- @agent.cookie_jar
548
- end
549
-
550
- ##
551
- # Replaces the cookie jar with +cookie_jar+
552
-
553
- def cookie_jar= cookie_jar
554
- @agent.cookie_jar = cookie_jar
555
- end
556
-
557
- ##
558
- # Returns a list of cookies stored in the cookie jar.
559
-
560
- def cookies
561
- @agent.cookie_jar.to_a
562
- end
563
-
564
- ##
565
- # Follow HTML meta refresh and HTTP Refresh headers. If set to +:anywhere+
566
- # meta refresh tags outside of the head element will be followed.
567
-
568
- def follow_meta_refresh
569
- @agent.follow_meta_refresh
570
- end
571
-
572
- ##
573
- # Controls following of HTML meta refresh and HTTP Refresh headers in
574
- # responses.
575
-
576
- def follow_meta_refresh= follow
577
- @agent.follow_meta_refresh = follow
578
- end
579
-
580
- ##
581
- # Follow an HTML meta refresh and HTTP Refresh headers that have no "url="
582
- # in the content attribute.
583
- #
584
- # Defaults to false to prevent infinite refresh loops.
585
-
586
- def follow_meta_refresh_self
587
- @agent.follow_meta_refresh_self
588
- end
589
-
590
- ##
591
- # Alters the following of HTML meta refresh and HTTP Refresh headers that
592
- # point to the same page.
593
-
594
- def follow_meta_refresh_self= follow
595
- @agent.follow_meta_refresh_self = follow
596
- end
597
-
598
- ##
599
- # Is gzip compression of responses enabled?
600
-
601
- def gzip_enabled
602
- @agent.gzip_enabled
603
- end
604
-
605
- ##
606
- # Disables HTTP/1.1 gzip compression (enabled by default)
607
-
608
- def gzip_enabled=enabled
609
- @agent.gzip_enabled = enabled
610
- end
611
-
612
- ##
613
- # Connections that have not been used in this many seconds will be reset.
614
-
615
- def idle_timeout
616
- @agent.idle_timeout
617
- end
618
-
619
- # Sets the idle timeout to +idle_timeout+. The default timeout is 5
620
- # seconds. If you experience "too many connection resets", reducing this
621
- # value may help.
622
-
623
- def idle_timeout= idle_timeout
624
- @agent.idle_timeout = idle_timeout
625
- end
626
-
627
- ##
628
- # Are HTTP/1.1 keep-alive connections enabled?
629
-
630
- def keep_alive
631
- @agent.keep_alive
632
- end
633
-
634
- ##
635
- # Disable HTTP/1.1 keep-alive connections if +enable+ is set to false. If
636
- # you are experiencing "too many connection resets" errors setting this to
637
- # false will eliminate them.
638
- #
639
- # You should first investigate reducing idle_timeout.
640
-
641
- def keep_alive= enable
642
- @agent.keep_alive = enable
643
- end
644
-
645
- ##
646
- # The current logger. If no logger has been set Mechanize.log is used.
647
-
648
- def log
649
- @log || Mechanize.log
650
- end
651
-
652
- ##
653
- # Sets the +logger+ used by this instance of mechanize
654
-
655
- def log= logger
656
- @log = logger
657
- end
658
-
659
- ##
660
- # Responses larger than this will be written to a Tempfile instead of stored
661
- # in memory. The default is 10240 bytes
662
-
663
- def max_file_buffer
664
- @agent.max_file_buffer
665
- end
666
-
667
- ##
668
- # Sets the maximum size of a response body that will be stored in memory to
669
- # +bytes+
670
-
671
- def max_file_buffer= bytes
672
- @agent.max_file_buffer = bytes
673
- end
674
-
675
- ##
676
- # Length of time to wait until a connection is opened in seconds
677
-
678
- def open_timeout
679
- @agent.open_timeout
680
- end
681
-
682
- ##
683
- # Sets the connection open timeout to +open_timeout+
684
-
685
- def open_timeout= open_timeout
686
- @agent.open_timeout = open_timeout
687
- end
688
-
689
- ##
690
- # Length of time to wait for data from the server
691
-
692
- def read_timeout
693
- @agent.read_timeout
694
- end
695
-
696
- ##
697
- # Sets the timeout for each chunk of data read from the server to
698
- # +read_timeout+. A single request may read many chunks of data.
699
-
700
- def read_timeout= read_timeout
701
- @agent.read_timeout = read_timeout
702
- end
703
-
704
- ##
705
- # Controls how mechanize deals with redirects. The following values are
706
- # allowed:
707
- #
708
- # :all, true:: All 3xx redirects are followed (default)
709
- # :permanent:: Only 301 Moved Permanantly redirects are followed
710
- # false:: No redirects are followed
711
-
712
- def redirect_ok
713
- @agent.redirect_ok
714
- end
715
-
716
- alias follow_redirect? redirect_ok
717
-
718
- ##
719
- # Sets the mechanize redirect handling policy. See redirect_ok for allowed
720
- # values
721
-
722
- def redirect_ok= follow
723
- @agent.redirect_ok = follow
724
- end
725
-
726
- ##
727
- # Maximum number of redirections to follow
728
-
729
- def redirection_limit
730
- @agent.redirection_limit
731
- end
732
-
733
- ##
734
- # Sets the maximum number of redirections to follow to +limit+
735
-
736
- def redirection_limit= limit
737
- @agent.redirection_limit = limit
738
- end
739
-
740
- ##
741
- # A hash of custom request headers that will be sent on every request
742
-
743
- def request_headers
744
- @agent.request_headers
745
- end
746
-
747
- ##
748
- # Replaces the custom request headers that will be sent on every request
749
- # with +request_headers+
750
-
751
- def request_headers= request_headers
752
- @agent.request_headers = request_headers
753
- end
754
-
755
- ##
756
- # Retry POST and other non-idempotent requests. See RFC 2616 9.1.2.
757
-
758
- def retry_change_requests
759
- @agent.retry_change_requests
760
- end
761
-
762
- ##
763
- # When setting +retry_change_requests+ to true you are stating that, for all
764
- # the URLs you access with mechanize, making POST and other non-idempotent
765
- # requests is safe and will not cause data duplication or other harmful
766
- # results.
767
- #
768
- # If you are experiencing "too many connection resets" errors you should
769
- # instead investigate reducing the idle_timeout or disabling keep_alive
770
- # connections.
771
-
772
- def retry_change_requests= retry_change_requests
773
- @agent.retry_change_requests = retry_change_requests
774
- end
775
-
776
- ##
777
- # Will <code>/robots.txt</code> files be obeyed?
778
-
779
- def robots
780
- @agent.robots
781
- end
782
-
783
- ##
784
- # When +enabled+ mechanize will retrieve and obey <code>robots.txt</code>
785
- # files
786
-
787
- def robots= enabled
788
- @agent.robots = enabled
789
- end
790
-
791
- ##
792
- # The handlers for HTTP and other URI protocols.
793
-
794
- def scheme_handlers
795
- @agent.scheme_handlers
796
- end
797
-
798
- ##
799
- # Replaces the URI scheme handler table with +scheme_handlers+
800
-
801
- def scheme_handlers= scheme_handlers
802
- @agent.scheme_handlers = scheme_handlers
803
- end
804
-
805
- ##
806
- # The identification string for the client initiating a web request
807
-
808
- def user_agent
809
- @agent.user_agent
810
- end
811
-
812
- ##
813
- # Sets the User-Agent used by mechanize to +user_agent+. See also
814
- # user_agent_alias
815
-
816
- def user_agent= user_agent
817
- @agent.user_agent = user_agent
818
- end
819
-
820
- ##
821
- # Set the user agent for the Mechanize object based on the given +name+.
822
- #
823
- # See also AGENT_ALIASES
824
-
825
- def user_agent_alias= name
826
- self.user_agent = AGENT_ALIASES[name] ||
827
- raise(ArgumentError, "unknown agent alias #{name.inspect}")
828
- end
829
-
830
- ##
831
- # The value of watch_for_set is passed to pluggable parsers for retrieved
832
- # content
833
-
834
- attr_accessor :watch_for_set
835
-
836
- # :section: SSL
837
- #
838
- # SSL settings for mechanize. These must be set in the block given to
839
- # Mechanize.new
840
-
841
- ##
842
- # Path to an OpenSSL server certificate file
843
-
844
- def ca_file
845
- @agent.ca_file
846
- end
847
-
848
- ##
849
- # Sets the certificate file used for SSL connections
850
-
851
- def ca_file= ca_file
852
- @agent.ca_file = ca_file
853
- end
854
-
855
- ##
856
- # An OpenSSL client certificate or the path to a certificate file.
857
-
858
- def cert
859
- @agent.cert
860
- end
861
-
862
- ##
863
- # Sets the OpenSSL client certificate +cert+ to the given path or
864
- # certificate instance
865
-
866
- def cert= cert
867
- @agent.cert = cert
868
- end
869
-
870
- ##
871
- # An OpenSSL certificate store for verifying server certificates. This
872
- # defaults to the default certificate store.
873
-
874
- def cert_store
875
- @agent.cert_store
876
- end
877
-
878
- ##
879
- # Sets the OpenSSL certificate store to +store+.
880
-
881
- def cert_store= cert_store
882
- @agent.cert_store = cert_store
883
- end
884
-
885
- ##
886
- # What is this?
887
- #
888
- # Why is it different from #cert?
889
-
890
- def certificate # :nodoc:
891
- @agent.certificate
892
- end
893
-
894
- ##
895
- # An OpenSSL private key or the path to a private key
896
-
897
- def key
898
- @agent.key
899
- end
900
-
901
- ##
902
- # Sets the OpenSSL client +key+ to the given path or key instance
903
-
904
- def key= key
905
- @agent.key = key
906
- end
907
-
908
- ##
909
- # OpenSSL client key password
910
-
911
- def pass
912
- @agent.pass
913
- end
914
-
915
- ##
916
- # Sets the client key password to +pass+
917
-
918
- def pass= pass
919
- @agent.pass = pass
920
- end
921
-
922
- ##
923
- # A callback for additional certificate verification. See
924
- # OpenSSL::SSL::SSLContext#verify_callback
925
- #
926
- # The callback can be used for debugging or to ignore errors by always
927
- # returning +true+. Specifying nil uses the default method that was valid
928
- # when the SSLContext was created
929
-
930
- def verify_callback
931
- @agent.verify_callback
932
- end
933
-
934
- ##
935
- # Sets the OpenSSL certificate verification callback
936
-
937
- def verify_callback= verify_callback
938
- @agent.verify_callback = verify_callback
939
- end
940
-
941
- ##
942
- # the OpenSSL server certificate verification method. The default is
943
- # OpenSSL::SSL::VERIFY_PEER and certificate verification uses the default
944
- # system certificates. See also cert_store
945
-
946
- def verify_mode
947
- @agent.verify_mode
948
- end
949
-
950
- ##
951
- # Sets the OpenSSL server certificate verification method.
952
-
953
- def verify_mode= verify_mode
954
- @agent.verify_mode = verify_mode
955
- end
956
-
957
- # :section: Utilities
958
-
959
- attr_reader :agent # :nodoc:
960
-
961
- attr_reader :pluggable_parser # :nodoc:
962
-
963
- ##
964
- # Parses the +body+ of the +response+ from +uri+ using the pluggable parser
965
- # that matches its content type
966
-
967
- def parse uri, response, body
968
- content_type = nil
969
-
970
- unless response['Content-Type'].nil?
971
- data, = response['Content-Type'].split ';', 2
972
- content_type, = data.downcase.split ',', 2 unless data.nil?
973
- end
974
-
975
- # Find our pluggable parser
976
- parser_klass = @pluggable_parser.parser content_type
977
-
978
- unless parser_klass <= Mechanize::Download then
979
- body = case body
980
- when IO, Tempfile, StringIO then
981
- body.read
982
- else
983
- body
984
- end
985
- end
986
-
987
- parser_klass.new uri, response, body, response.code do |parser|
988
- parser.mech = self if parser.respond_to? :mech=
989
-
990
- parser.watch_for_set = @watch_for_set if
991
- @watch_for_set and parser.respond_to?(:watch_for_set=)
992
- end
993
- end
994
-
995
- def pretty_print(q) # :nodoc:
996
- q.object_group(self) {
997
- q.breakable
998
- q.pp cookie_jar
999
- q.breakable
1000
- q.pp current_page
1001
- }
1002
- end
1003
-
1004
- ##
1005
- # Sets the proxy +address+ at +port+ with an optional +user+ and +password+
1006
-
1007
- def set_proxy address, port, user = nil, password = nil
1008
- @proxy_addr = address
1009
- @proxy_port = port
1010
- @proxy_user = user
1011
- @proxy_pass = password
1012
-
1013
- @agent.set_proxy address, port, user, password
1014
- @agent.set_http
1015
- end
1016
-
1017
- private
1018
-
1019
- ##
1020
- # Posts +form+ to +uri+
1021
-
1022
- def post_form(uri, form, headers = {})
1023
- cur_page = form.page || current_page ||
1024
- Page.new(nil, {'content-type'=>'text/html'})
1025
-
1026
- request_data = form.request_data
1027
-
1028
- log.debug("query: #{ request_data.inspect }") if log
1029
-
1030
- headers = {
1031
- 'Content-Type' => form.enctype,
1032
- 'Content-Length' => request_data.size.to_s,
1033
- }.merge headers
1034
-
1035
- # fetch the page
1036
- page = @agent.fetch uri, :post, headers, [request_data], cur_page
1037
- add_to_history(page)
1038
- page
1039
- end
1040
-
1041
- ##
1042
- # Adds +page+ to the history
1043
-
1044
- def add_to_history(page)
1045
- @agent.history.push(page, @agent.resolve(page.uri))
1046
- @history_added.call(page) if @history_added
1047
- end
1048
-
1049
- end
1050
-
1051
- require 'mechanize/content_type_error'
1052
- require 'mechanize/cookie'
1053
- require 'mechanize/cookie_jar'
1054
- require 'mechanize/parser'
1055
- require 'mechanize/download'
1056
- require 'mechanize/file'
1057
- require 'mechanize/file_connection'
1058
- require 'mechanize/file_request'
1059
- require 'mechanize/file_response'
1060
- require 'mechanize/form'
1061
- require 'mechanize/history'
1062
- require 'mechanize/http'
1063
- require 'mechanize/http/agent'
1064
- require 'mechanize/http/auth_challenge'
1065
- require 'mechanize/http/auth_realm'
1066
- require 'mechanize/http/content_disposition_parser'
1067
- require 'mechanize/http/www_authenticate_parser'
1068
- require 'mechanize/page'
1069
- require 'mechanize/monkey_patch'
1070
- require 'mechanize/pluggable_parsers'
1071
- require 'mechanize/redirect_limit_reached_error'
1072
- require 'mechanize/redirect_not_get_or_head_error'
1073
- require 'mechanize/response_code_error'
1074
- require 'mechanize/unauthorized_error'
1075
- require 'mechanize/response_read_error'
1076
- require 'mechanize/robots_disallowed_error'
1077
- require 'mechanize/unsupported_scheme_error'
1078
- require 'mechanize/util'
1079
-