diamond-mechanize 2.1 → 2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. metadata +222 -167
  2. data/Rakefile +0 -49
  3. data/lib/mechanize/content_type_error.rb +0 -13
  4. data/lib/mechanize/cookie.rb +0 -232
  5. data/lib/mechanize/cookie_jar.rb +0 -194
  6. data/lib/mechanize/download.rb +0 -59
  7. data/lib/mechanize/element_matcher.rb +0 -36
  8. data/lib/mechanize/file.rb +0 -65
  9. data/lib/mechanize/file_connection.rb +0 -17
  10. data/lib/mechanize/file_request.rb +0 -26
  11. data/lib/mechanize/file_response.rb +0 -74
  12. data/lib/mechanize/file_saver.rb +0 -39
  13. data/lib/mechanize/form/button.rb +0 -6
  14. data/lib/mechanize/form/check_box.rb +0 -12
  15. data/lib/mechanize/form/field.rb +0 -54
  16. data/lib/mechanize/form/file_upload.rb +0 -21
  17. data/lib/mechanize/form/hidden.rb +0 -3
  18. data/lib/mechanize/form/image_button.rb +0 -19
  19. data/lib/mechanize/form/keygen.rb +0 -34
  20. data/lib/mechanize/form/multi_select_list.rb +0 -94
  21. data/lib/mechanize/form/option.rb +0 -50
  22. data/lib/mechanize/form/radio_button.rb +0 -55
  23. data/lib/mechanize/form/reset.rb +0 -3
  24. data/lib/mechanize/form/select_list.rb +0 -44
  25. data/lib/mechanize/form/submit.rb +0 -3
  26. data/lib/mechanize/form/text.rb +0 -3
  27. data/lib/mechanize/form/textarea.rb +0 -3
  28. data/lib/mechanize/form.rb +0 -543
  29. data/lib/mechanize/headers.rb +0 -23
  30. data/lib/mechanize/history.rb +0 -82
  31. data/lib/mechanize/http/agent.rb +0 -1004
  32. data/lib/mechanize/http/auth_challenge.rb +0 -59
  33. data/lib/mechanize/http/auth_realm.rb +0 -31
  34. data/lib/mechanize/http/content_disposition_parser.rb +0 -188
  35. data/lib/mechanize/http/www_authenticate_parser.rb +0 -155
  36. data/lib/mechanize/http.rb +0 -8
  37. data/lib/mechanize/monkey_patch.rb +0 -16
  38. data/lib/mechanize/page/base.rb +0 -7
  39. data/lib/mechanize/page/frame.rb +0 -27
  40. data/lib/mechanize/page/image.rb +0 -30
  41. data/lib/mechanize/page/label.rb +0 -20
  42. data/lib/mechanize/page/link.rb +0 -98
  43. data/lib/mechanize/page/meta_refresh.rb +0 -68
  44. data/lib/mechanize/page.rb +0 -440
  45. data/lib/mechanize/parser.rb +0 -173
  46. data/lib/mechanize/pluggable_parsers.rb +0 -144
  47. data/lib/mechanize/redirect_limit_reached_error.rb +0 -19
  48. data/lib/mechanize/redirect_not_get_or_head_error.rb +0 -21
  49. data/lib/mechanize/response_code_error.rb +0 -21
  50. data/lib/mechanize/response_read_error.rb +0 -27
  51. data/lib/mechanize/robots_disallowed_error.rb +0 -28
  52. data/lib/mechanize/test_case.rb +0 -663
  53. data/lib/mechanize/unauthorized_error.rb +0 -3
  54. data/lib/mechanize/unsupported_scheme_error.rb +0 -6
  55. data/lib/mechanize/util.rb +0 -101
  56. data/lib/mechanize.rb +0 -1079
  57. data/test/data/htpasswd +0 -1
  58. data/test/data/server.crt +0 -16
  59. data/test/data/server.csr +0 -12
  60. data/test/data/server.key +0 -15
  61. data/test/data/server.pem +0 -15
  62. data/test/htdocs/alt_text.html +0 -10
  63. data/test/htdocs/bad_form_test.html +0 -9
  64. data/test/htdocs/button.jpg +0 -0
  65. data/test/htdocs/canonical_uri.html +0 -9
  66. data/test/htdocs/dir with spaces/foo.html +0 -1
  67. data/test/htdocs/empty_form.html +0 -6
  68. data/test/htdocs/file_upload.html +0 -26
  69. data/test/htdocs/find_link.html +0 -41
  70. data/test/htdocs/form_multi_select.html +0 -16
  71. data/test/htdocs/form_multival.html +0 -37
  72. data/test/htdocs/form_no_action.html +0 -18
  73. data/test/htdocs/form_no_input_name.html +0 -16
  74. data/test/htdocs/form_order_test.html +0 -11
  75. data/test/htdocs/form_select.html +0 -16
  76. data/test/htdocs/form_set_fields.html +0 -14
  77. data/test/htdocs/form_test.html +0 -188
  78. data/test/htdocs/frame_referer_test.html +0 -10
  79. data/test/htdocs/frame_test.html +0 -30
  80. data/test/htdocs/google.html +0 -13
  81. data/test/htdocs/index.html +0 -6
  82. data/test/htdocs/link with space.html +0 -5
  83. data/test/htdocs/meta_cookie.html +0 -11
  84. data/test/htdocs/no_title_test.html +0 -6
  85. data/test/htdocs/noindex.html +0 -9
  86. data/test/htdocs/rails_3_encoding_hack_form_test.html +0 -27
  87. data/test/htdocs/relative/tc_relative_links.html +0 -21
  88. data/test/htdocs/robots.html +0 -8
  89. data/test/htdocs/robots.txt +0 -2
  90. data/test/htdocs/tc_bad_charset.html +0 -9
  91. data/test/htdocs/tc_bad_links.html +0 -5
  92. data/test/htdocs/tc_base_link.html +0 -8
  93. data/test/htdocs/tc_blank_form.html +0 -11
  94. data/test/htdocs/tc_charset.html +0 -6
  95. data/test/htdocs/tc_checkboxes.html +0 -19
  96. data/test/htdocs/tc_encoded_links.html +0 -5
  97. data/test/htdocs/tc_field_precedence.html +0 -11
  98. data/test/htdocs/tc_follow_meta.html +0 -8
  99. data/test/htdocs/tc_form_action.html +0 -48
  100. data/test/htdocs/tc_links.html +0 -19
  101. data/test/htdocs/tc_meta_in_body.html +0 -9
  102. data/test/htdocs/tc_pretty_print.html +0 -17
  103. data/test/htdocs/tc_referer.html +0 -16
  104. data/test/htdocs/tc_relative_links.html +0 -19
  105. data/test/htdocs/tc_textarea.html +0 -23
  106. data/test/htdocs/test_click.html +0 -11
  107. data/test/htdocs/unusual______.html +0 -5
  108. data/test/test_mechanize.rb +0 -1164
  109. data/test/test_mechanize_cookie.rb +0 -451
  110. data/test/test_mechanize_cookie_jar.rb +0 -483
  111. data/test/test_mechanize_download.rb +0 -43
  112. data/test/test_mechanize_file.rb +0 -61
  113. data/test/test_mechanize_file_connection.rb +0 -21
  114. data/test/test_mechanize_file_request.rb +0 -19
  115. data/test/test_mechanize_file_saver.rb +0 -21
  116. data/test/test_mechanize_form.rb +0 -875
  117. data/test/test_mechanize_form_check_box.rb +0 -38
  118. data/test/test_mechanize_form_encoding.rb +0 -114
  119. data/test/test_mechanize_form_field.rb +0 -63
  120. data/test/test_mechanize_form_file_upload.rb +0 -20
  121. data/test/test_mechanize_form_image_button.rb +0 -12
  122. data/test/test_mechanize_form_keygen.rb +0 -32
  123. data/test/test_mechanize_form_multi_select_list.rb +0 -84
  124. data/test/test_mechanize_form_option.rb +0 -55
  125. data/test/test_mechanize_form_radio_button.rb +0 -78
  126. data/test/test_mechanize_form_select_list.rb +0 -76
  127. data/test/test_mechanize_form_textarea.rb +0 -52
  128. data/test/test_mechanize_headers.rb +0 -35
  129. data/test/test_mechanize_history.rb +0 -103
  130. data/test/test_mechanize_http_agent.rb +0 -1225
  131. data/test/test_mechanize_http_auth_challenge.rb +0 -39
  132. data/test/test_mechanize_http_auth_realm.rb +0 -49
  133. data/test/test_mechanize_http_content_disposition_parser.rb +0 -118
  134. data/test/test_mechanize_http_www_authenticate_parser.rb +0 -146
  135. data/test/test_mechanize_link.rb +0 -80
  136. data/test/test_mechanize_page.rb +0 -118
  137. data/test/test_mechanize_page_encoding.rb +0 -182
  138. data/test/test_mechanize_page_frame.rb +0 -16
  139. data/test/test_mechanize_page_link.rb +0 -390
  140. data/test/test_mechanize_page_meta_refresh.rb +0 -127
  141. data/test/test_mechanize_parser.rb +0 -289
  142. data/test/test_mechanize_pluggable_parser.rb +0 -52
  143. data/test/test_mechanize_redirect_limit_reached_error.rb +0 -24
  144. data/test/test_mechanize_redirect_not_get_or_head_error.rb +0 -14
  145. data/test/test_mechanize_subclass.rb +0 -22
  146. data/test/test_mechanize_util.rb +0 -103
  147. data/test/test_multi_select.rb +0 -119
@@ -1,23 +0,0 @@
1
- class Mechanize::Headers < Hash
2
- def [](key)
3
- super(key.downcase)
4
- end
5
-
6
- def []=(key, value)
7
- super(key.downcase, value)
8
- end
9
-
10
- def key?(key)
11
- super(key.downcase)
12
- end
13
-
14
- def canonical_each
15
- block_given? or return enum_for(__method__)
16
- each { |key, value|
17
- key = key.capitalize
18
- key.gsub!(/-([a-z])/) { "-#{$1.upcase}" }
19
- yield [key, value]
20
- }
21
- end
22
- end
23
-
@@ -1,82 +0,0 @@
1
- ##
2
- # This class manages history for your mechanize object.
3
-
4
- class Mechanize::History < Array
5
-
6
- attr_accessor :max_size
7
-
8
- def initialize(max_size = nil)
9
- @max_size = max_size
10
- @history_index = {}
11
- end
12
-
13
- def initialize_copy(orig)
14
- super
15
- @history_index = orig.instance_variable_get(:@history_index).dup
16
- end
17
-
18
- def inspect # :nodoc:
19
- uris = map { |page| page.uri }.join ', '
20
-
21
- "[#{uris}]"
22
- end
23
-
24
- def push(page, uri = nil)
25
- super page
26
-
27
- index = uri ? uri : page.uri
28
- @history_index[index.to_s] = page
29
-
30
- shift while length > @max_size if @max_size
31
-
32
- self
33
- end
34
-
35
- alias :<< :push
36
-
37
- def visited? uri
38
- page = @history_index[uri.to_s]
39
-
40
- return page if page # HACK
41
-
42
- uri = uri.dup
43
- uri.path = '/' if uri.path.empty?
44
-
45
- @history_index[uri.to_s]
46
- end
47
-
48
- alias visited_page visited?
49
-
50
- def clear
51
- @history_index.clear
52
- super
53
- end
54
-
55
- def shift
56
- return nil if length == 0
57
- page = self[0]
58
- self[0] = nil
59
-
60
- super
61
-
62
- remove_from_index(page)
63
- page
64
- end
65
-
66
- def pop
67
- return nil if length == 0
68
- page = super
69
- remove_from_index(page)
70
- page
71
- end
72
-
73
- private
74
-
75
- def remove_from_index(page)
76
- @history_index.each do |k,v|
77
- @history_index.delete(k) if v == page
78
- end
79
- end
80
-
81
- end
82
-
@@ -1,1004 +0,0 @@
1
- require 'tempfile'
2
- require 'net/ntlm'
3
- require 'kconv'
4
- require 'webrobots'
5
-
6
- ##
7
- # An HTTP (and local disk access) user agent. This class is an implementation
8
- # detail and is subject to change at any time.
9
-
10
- class Mechanize::HTTP::Agent
11
-
12
- # :section: Headers
13
-
14
- # Disables If-Modified-Since conditional requests (enabled by default)
15
- attr_accessor :conditional_requests
16
-
17
- # Is gzip compression of requests enabled?
18
- attr_accessor :gzip_enabled
19
-
20
- # A hash of request headers to be used for every request
21
- attr_accessor :request_headers
22
-
23
- # The User-Agent header to send
24
- attr_reader :user_agent
25
-
26
- # :section: History
27
-
28
- # history of requests made
29
- attr_accessor :history
30
-
31
- # :section: Hooks
32
-
33
- # A list of hooks to call after retrieving a response. Hooks are called with
34
- # the agent and the response returned.
35
- attr_reader :post_connect_hooks
36
-
37
- # A list of hooks to call before making a request. Hooks are called with
38
- # the agent and the request to be performed.
39
- attr_reader :pre_connect_hooks
40
-
41
- # A list of hooks to call to handle the content-encoding of a request.
42
- attr_reader :content_encoding_hooks
43
-
44
- # :section: HTTP Authentication
45
-
46
- attr_reader :authenticate_methods # :nodoc:
47
- attr_reader :digest_challenges # :nodoc:
48
- attr_accessor :user
49
- attr_accessor :password
50
-
51
- # :section: Redirection
52
-
53
- # Follow HTML meta refresh and HTTP Refresh. If set to +:anywhere+ meta
54
- # refresh tags outside of the head element will be followed.
55
- attr_accessor :follow_meta_refresh
56
-
57
- # Follow an HTML meta refresh that has no "url=" in the content attribute.
58
- #
59
- # Defaults to false to prevent infinite refresh loops.
60
- attr_accessor :follow_meta_refresh_self
61
-
62
- # Controls how this agent deals with redirects. The following values are
63
- # allowed:
64
- #
65
- # :all, true:: All 3xx redirects are followed (default)
66
- # :permanent:: Only 301 Moved Permanantly redirects are followed
67
- # false:: No redirects are followed
68
- attr_accessor :redirect_ok
69
-
70
- # Maximum number of redirects to follow
71
- attr_accessor :redirection_limit
72
-
73
- # :section: Robots
74
-
75
- # When true, this agent will consult the site's robots.txt for each access.
76
- attr_reader :robots
77
-
78
- # :section: SSL
79
-
80
- # Path to an OpenSSL server certificate file
81
- attr_accessor :ca_file
82
-
83
- # An OpenSSL private key or the path to a private key
84
- attr_accessor :key
85
-
86
- # An OpenSSL client certificate or the path to a certificate file.
87
- attr_accessor :cert
88
-
89
- # An SSL certificate store
90
- attr_accessor :cert_store
91
-
92
- # OpenSSL key password
93
- attr_accessor :pass
94
-
95
- # A callback for additional certificate verification. See
96
- # OpenSSL::SSL::SSLContext#verify_callback
97
- #
98
- # The callback can be used for debugging or to ignore errors by always
99
- # returning +true+. Specifying nil uses the default method that was valid
100
- # when the SSLContext was created
101
- attr_accessor :verify_callback
102
-
103
- # How to verify SSL connections. Defaults to VERIFY_PEER
104
- attr_accessor :verify_mode
105
-
106
- # :section: Timeouts
107
-
108
- # Reset connections that have not been used in this many seconds
109
- attr_reader :idle_timeout
110
-
111
- # Set to false to disable HTTP/1.1 keep-alive requests
112
- attr_accessor :keep_alive
113
-
114
- # Length of time to wait until a connection is opened in seconds
115
- attr_accessor :open_timeout
116
-
117
- # Length of time to attempt to read data from the server
118
- attr_accessor :read_timeout
119
-
120
- # :section:
121
-
122
- # The cookies for this agent
123
- attr_accessor :cookie_jar
124
-
125
- # URI for a proxy connection
126
- attr_reader :proxy_uri
127
-
128
- # Retry non-idempotent requests?
129
- attr_reader :retry_change_requests
130
-
131
- # Responses larger than this will be written to a Tempfile instead of stored
132
- # in memory.
133
- attr_accessor :max_file_buffer
134
-
135
- # :section: Utility
136
-
137
- # The context parses responses into pages
138
- attr_accessor :context
139
-
140
- attr_reader :http # :nodoc:
141
-
142
- # Handlers for various URI schemes
143
- attr_accessor :scheme_handlers
144
-
145
- # :section:
146
-
147
- # Creates a new Mechanize HTTP user agent. The user agent is an
148
- # implementation detail of mechanize and its API may change at any time.
149
-
150
- def initialize
151
- @conditional_requests = true
152
- @context = nil
153
- @content_encoding_hooks = []
154
- @cookie_jar = Mechanize::CookieJar.new
155
- @follow_meta_refresh = false
156
- @follow_meta_refresh_self = false
157
- @gzip_enabled = true
158
- @history = Mechanize::History.new
159
- @idle_timeout = nil
160
- @keep_alive = true
161
- @keep_alive_time = 300
162
- @max_file_buffer = 10240
163
- @open_timeout = nil
164
- @post_connect_hooks = []
165
- @pre_connect_hooks = []
166
- @proxy_uri = nil
167
- @read_timeout = nil
168
- @redirect_ok = true
169
- @redirection_limit = 20
170
- @request_headers = {}
171
- @retry_change_requests = false
172
- @robots = false
173
- @user_agent = nil
174
- @webrobots = nil
175
-
176
- # HTTP Authentication
177
- @authenticate_parser = Mechanize::HTTP::WWWAuthenticateParser.new
178
- @authenticate_methods = Hash.new do |methods, uri|
179
- methods[uri] = Hash.new do |realms, auth_scheme|
180
- realms[auth_scheme] = []
181
- end
182
- end
183
- @digest_auth = Net::HTTP::DigestAuth.new
184
- @digest_challenges = {}
185
- @password = nil # HTTP auth password
186
- @user = nil # HTTP auth user
187
-
188
- # SSL
189
- @ca_file = nil
190
- @cert = nil
191
- @cert_store = nil
192
- @key = nil
193
- @pass = nil
194
- @verify_callback = nil
195
- @verify_mode = nil
196
-
197
- @scheme_handlers = Hash.new { |h, scheme|
198
- h[scheme] = lambda { |link, page|
199
- raise Mechanize::UnsupportedSchemeError, scheme
200
- }
201
- }
202
-
203
- @scheme_handlers['http'] = lambda { |link, page| link }
204
- @scheme_handlers['https'] = @scheme_handlers['http']
205
- @scheme_handlers['relative'] = @scheme_handlers['http']
206
- @scheme_handlers['file'] = @scheme_handlers['http']
207
- end
208
-
209
- # Retrieves +uri+ and parses it into a page or other object according to
210
- # PluggableParser. If the URI is an HTTP or HTTPS scheme URI the given HTTP
211
- # +method+ is used to retrieve it, along with the HTTP +headers+, request
212
- # +params+ and HTTP +referer+.
213
- #
214
- # +redirects+ tracks the number of redirects experienced when retrieving the
215
- # page. If it is over the redirection_limit an error will be raised.
216
-
217
- def fetch uri, method = :get, headers = {}, params = [],
218
- referer = current_page, redirects = 0
219
- referer_uri = referer ? referer.uri : nil
220
-
221
- uri = resolve uri, referer
222
-
223
- uri, params = resolve_parameters uri, method, params
224
-
225
- request = http_request uri, method, params
226
-
227
- connection = connection_for uri
228
-
229
- request_auth request, uri
230
-
231
- disable_keep_alive request
232
- enable_gzip request
233
-
234
- request_language_charset request
235
- request_cookies request, uri
236
- request_host request, uri
237
- request_referer request, uri, referer_uri
238
- request_user_agent request
239
- request_add_headers request, headers
240
-
241
- pre_connect request
242
-
243
- # Consult robots.txt
244
- if robots && uri.is_a?(URI::HTTP)
245
- robots_allowed?(uri) or raise Mechanize::RobotsDisallowedError.new(uri)
246
- end
247
-
248
- # Add If-Modified-Since if page is in history
249
- page = visited_page(uri)
250
-
251
- if (page = visited_page(uri)) and page.response['Last-Modified']
252
- request['If-Modified-Since'] = page.response['Last-Modified']
253
- end if(@conditional_requests)
254
-
255
- # Specify timeouts if given
256
- connection.open_timeout = @open_timeout if @open_timeout
257
- connection.read_timeout = @read_timeout if @read_timeout
258
-
259
- request_log request
260
-
261
- response_body_io = nil
262
-
263
- # Send the request
264
- response = connection.request(uri, request) { |res|
265
- response_log res
266
-
267
- response_body_io = response_read res, request
268
-
269
- res
270
- }
271
-
272
- hook_content_encoding response, uri, response_body_io
273
-
274
- response_body_io = response_content_encoding response, response_body_io
275
-
276
- post_connect uri, response, response_body_io
277
-
278
- page = response_parse response, response_body_io, uri
279
-
280
- response_cookies response, uri, page
281
-
282
- meta = response_follow_meta_refresh response, uri, page, redirects
283
- return meta if meta
284
-
285
- case response
286
- when Net::HTTPSuccess
287
- if robots && page.is_a?(Mechanize::Page)
288
- page.parser.noindex? and raise Mechanize::RobotsDisallowedError.new(uri)
289
- end
290
-
291
- page
292
- when Mechanize::FileResponse
293
- page
294
- when Net::HTTPNotModified
295
- log.debug("Got cached page") if log
296
- visited_page(uri) || page
297
- when Net::HTTPRedirection
298
- response_redirect response, method, page, redirects, referer
299
- when Net::HTTPUnauthorized
300
- response_authenticate(response, page, uri, request, headers, params,
301
- referer)
302
- else
303
- raise Mechanize::ResponseCodeError.new(page), "Unhandled response"
304
- end
305
- end
306
-
307
- # Retry non-idempotent requests
308
-
309
- def retry_change_requests= retri
310
- @retry_change_requests = retri
311
- @http.retry_change_requests = retri if @http
312
- end
313
-
314
- # :section: Headers
315
-
316
- def user_agent= user_agent
317
- @webrobots = nil if user_agent != @user_agent
318
- @user_agent = user_agent
319
- end
320
-
321
- # :section: History
322
-
323
- # Equivalent to the browser back button. Returns the most recent page
324
- # visited.
325
- def back
326
- @history.pop
327
- end
328
-
329
- ##
330
- # Returns the latest page loaded by the agent
331
-
332
- def current_page
333
- @history.last
334
- end
335
-
336
- def max_history
337
- @history.max_size
338
- end
339
-
340
- def max_history=(length)
341
- @history.max_size = length
342
- end
343
-
344
- # Returns a visited page for the url passed in, otherwise nil
345
- def visited_page url
346
- @history.visited_page resolve url
347
- end
348
-
349
- # :section: Hooks
350
-
351
- def hook_content_encoding response, uri, response_body_io
352
- @content_encoding_hooks.each do |hook|
353
- hook.call self, uri, response, response_body_io
354
- end
355
- end
356
-
357
- ##
358
- # Invokes hooks added to post_connect_hooks after a +response+ is returned
359
- # and the response +body+ is handled.
360
- #
361
- # Yields the +context+, the +uri+ for the request, the +response+ and the
362
- # response +body+.
363
-
364
- def post_connect uri, response, body_io # :yields: agent, uri, response, body
365
- @post_connect_hooks.each do |hook|
366
- begin
367
- hook.call self, uri, response, body_io.read
368
- ensure
369
- body_io.rewind
370
- end
371
- end
372
- end
373
-
374
- ##
375
- # Invokes hooks added to pre_connect_hooks before a +request+ is made.
376
- # Yields the +agent+ and the +request+ that will be performed to each hook.
377
-
378
- def pre_connect request # :yields: agent, request
379
- @pre_connect_hooks.each do |hook|
380
- hook.call self, request
381
- end
382
- end
383
-
384
- # :section: Request
385
-
386
- def connection_for uri
387
- case uri.scheme.downcase
388
- when 'http', 'https' then
389
- return @http
390
- when 'file' then
391
- return Mechanize::FileConnection.new
392
- end
393
- end
394
-
395
- def disable_keep_alive request
396
- request['connection'] = 'close' unless @keep_alive
397
- end
398
-
399
- def enable_gzip request
400
- request['accept-encoding'] = if @gzip_enabled
401
- 'gzip,deflate,identity'
402
- else
403
- 'identity'
404
- end
405
- end
406
-
407
- def http_request uri, method, params = nil
408
- case uri.scheme.downcase
409
- when 'http', 'https' then
410
- klass = Net::HTTP.const_get(method.to_s.capitalize)
411
-
412
- request ||= klass.new(uri.request_uri)
413
- request.body = params.first if params
414
-
415
- request
416
- when 'file' then
417
- Mechanize::FileRequest.new uri
418
- end
419
- end
420
-
421
- def request_add_headers request, headers = {}
422
- @request_headers.each do |k,v|
423
- request[k] = v
424
- end
425
-
426
- headers.each do |field, value|
427
- case field
428
- when :etag then request["ETag"] = value
429
- when :if_modified_since then request["If-Modified-Since"] = value
430
- when Symbol then
431
- raise ArgumentError, "unknown header symbol #{field}"
432
- else
433
- request[field] = value
434
- end
435
- end
436
- end
437
-
438
- def request_auth request, uri
439
- base_uri = uri + '/'
440
- schemes = @authenticate_methods[base_uri]
441
-
442
- if realm = schemes[:digest].find { |r| r.uri == base_uri } then
443
- request_auth_digest request, uri, realm, base_uri, false
444
- elsif realm = schemes[:iis_digest].find { |r| r.uri == base_uri } then
445
- request_auth_digest request, uri, realm, base_uri, true
446
- elsif schemes[:basic].find { |r| r.uri == base_uri } then
447
- request.basic_auth @user, @password
448
- end
449
- end
450
-
451
- def request_auth_digest request, uri, realm, base_uri, iis
452
- challenge = @digest_challenges[realm]
453
-
454
- uri.user = @user
455
- uri.password = @password
456
-
457
- auth = @digest_auth.auth_header uri, challenge.to_s, request.method, iis
458
- request['Authorization'] = auth
459
- end
460
-
461
- def request_cookies request, uri
462
- return if @cookie_jar.empty? uri
463
-
464
- cookies = @cookie_jar.cookies uri
465
-
466
- return if cookies.empty?
467
-
468
- request.add_field 'Cookie', cookies.join('; ')
469
- end
470
-
471
- def request_host request, uri
472
- port = [80, 443].include?(uri.port.to_i) ? nil : uri.port
473
- host = uri.host
474
-
475
- request['Host'] = [host, port].compact.join ':'
476
- end
477
-
478
- def request_language_charset request
479
- request['accept-charset'] = 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'
480
- request['accept-language'] = 'en-us,en;q=0.5'
481
- end
482
-
483
- # Log specified headers for the request
484
- def request_log request
485
- return unless log
486
-
487
- log.info("#{request.class}: #{request.path}")
488
-
489
- request.each_header do |k, v|
490
- log.debug("request-header: #{k} => #{v}")
491
- end
492
- end
493
-
494
- def request_referer request, uri, referer
495
- return unless referer
496
- return if 'https' == referer.scheme.downcase and
497
- 'https' != uri.scheme.downcase
498
-
499
- request['Referer'] = referer
500
- end
501
-
502
- def request_user_agent request
503
- request['User-Agent'] = @user_agent if @user_agent
504
- end
505
-
506
- def resolve(uri, referer = current_page)
507
- uri = uri.dup if uri.is_a?(URI)
508
-
509
- unless uri.is_a?(URI)
510
- uri = uri.to_s.strip.gsub(/[^#{0.chr}-#{126.chr}]/o) { |match|
511
- if RUBY_VERSION >= "1.9.0"
512
- Mechanize::Util.uri_escape(match)
513
- else
514
- sprintf('%%%X', match.unpack($KCODE == 'UTF8' ? 'U' : 'C')[0])
515
- end
516
- }
517
-
518
- unescaped = uri.split(/(?:%[0-9A-Fa-f]{2})+|#/)
519
- escaped = uri.scan(/(?:%[0-9A-Fa-f]{2})+|#/)
520
-
521
- escaped_uri = Mechanize::Util.html_unescape(
522
- unescaped.zip(escaped).map { |x,y|
523
- "#{WEBrick::HTTPUtils.escape(x)}#{y}"
524
- }.join('')
525
- )
526
-
527
- begin
528
- uri = URI.parse(escaped_uri)
529
- rescue
530
- uri = URI.parse(WEBrick::HTTPUtils.escape(escaped_uri))
531
- end
532
- end
533
-
534
- scheme = uri.relative? ? 'relative' : uri.scheme.downcase
535
- uri = @scheme_handlers[scheme].call(uri, referer)
536
-
537
- if referer && referer.uri
538
- if uri.path.length == 0 && uri.relative?
539
- uri.path = referer.uri.path
540
- end
541
- end
542
-
543
- uri.path = '/' if uri.path.length == 0
544
-
545
- if uri.relative?
546
- raise ArgumentError, "absolute URL needed (not #{uri})" unless
547
- referer && referer.uri
548
-
549
- base = nil
550
- if referer.respond_to?(:bases) && referer.parser
551
- base = referer.bases.last
552
- end
553
-
554
- uri = ((base && base.uri && base.uri.absolute?) ?
555
- base.uri :
556
- referer.uri) + uri
557
- uri = referer.uri + uri
558
- # Strip initial "/.." bits from the path
559
- uri.path.sub!(/^(\/\.\.)+(?=\/)/, '')
560
- end
561
-
562
- unless ['http', 'https', 'file'].include?(uri.scheme.downcase)
563
- raise ArgumentError, "unsupported scheme: #{uri.scheme}"
564
- end
565
-
566
- uri
567
- end
568
-
569
- def resolve_parameters uri, method, parameters
570
- case method
571
- when :head, :get, :delete, :trace then
572
- if parameters and parameters.length > 0
573
- uri.query ||= ''
574
- uri.query << '&' if uri.query.length > 0
575
- uri.query << Mechanize::Util.build_query_string(parameters)
576
- end
577
-
578
- return uri, nil
579
- end
580
-
581
- return uri, parameters
582
- end
583
-
584
- # :section: Response
585
-
586
- def get_meta_refresh response, uri, page
587
- return nil unless @follow_meta_refresh
588
-
589
- if page.respond_to?(:meta_refresh) and
590
- (redirect = page.meta_refresh.first) then
591
- [redirect.delay, redirect.href] unless
592
- not @follow_meta_refresh_self and redirect.link_self
593
- elsif refresh = response['refresh']
594
- delay, href, link_self = Mechanize::Page::MetaRefresh.parse refresh, uri
595
- raise Mechanize::Error, 'Invalid refresh http header' unless delay
596
- [delay.to_f, href] unless
597
- not @follow_meta_refresh_self and link_self
598
- end
599
- end
600
-
601
- def response_authenticate(response, page, uri, request, headers, params,
602
- referer)
603
- raise Mechanize::UnauthorizedError, page unless @user || @password
604
-
605
- challenges = @authenticate_parser.parse response['www-authenticate']
606
-
607
- if challenge = challenges.find { |c| c.scheme =~ /^Digest$/i } then
608
- realm = challenge.realm uri
609
-
610
- auth_scheme = if response['server'] =~ /Microsoft-IIS/ then
611
- :iis_digest
612
- else
613
- :digest
614
- end
615
-
616
- existing_realms = @authenticate_methods[realm.uri][auth_scheme]
617
-
618
- raise Mechanize::UnauthorizedError, page if
619
- existing_realms.include? realm
620
-
621
- existing_realms << realm
622
- @digest_challenges[realm] = challenge
623
- elsif challenge = challenges.find { |c| c.scheme == 'NTLM' } then
624
- existing_realms = @authenticate_methods[uri + '/'][:ntlm]
625
-
626
- raise Mechanize::UnauthorizedError, page if
627
- existing_realms.include?(realm) and not challenge.params
628
-
629
- existing_realms << realm
630
-
631
- if challenge.params then
632
- type_2 = Net::NTLM::Message.decode64 challenge.params
633
-
634
- type_3 = type_2.response({ :user => @user, :password => @password, },
635
- { :ntlmv2 => true }).encode64
636
-
637
- headers['Authorization'] = "NTLM #{type_3}"
638
- else
639
- type_1 = Net::NTLM::Message::Type1.new.encode64
640
- headers['Authorization'] = "NTLM #{type_1}"
641
- end
642
- elsif challenge = challenges.find { |c| c.scheme == 'Basic' } then
643
- realm = challenge.realm uri
644
-
645
- existing_realms = @authenticate_methods[realm.uri][:basic]
646
-
647
- raise Mechanize::UnauthorizedError, page if
648
- existing_realms.include? realm
649
-
650
- existing_realms << realm
651
- else
652
- raise Mechanize::UnauthorizedError, page
653
- end
654
-
655
- fetch uri, request.method.downcase.to_sym, headers, params, referer
656
- end
657
-
658
- def response_content_encoding response, body_io
659
- length = response.content_length
660
-
661
- length = case body_io
662
- when IO, Tempfile then
663
- body_io.stat.size
664
- else
665
- body_io.length
666
- end unless length
667
-
668
- out_io = nil
669
-
670
- case response['Content-Encoding']
671
- when nil, 'none', '7bit' then
672
- out_io = body_io
673
- when 'deflate' then
674
- log.debug('deflate body') if log
675
-
676
- return if length.zero?
677
-
678
- begin
679
- out_io = inflate body_io
680
- rescue Zlib::BufError, Zlib::DataError
681
- log.error('Unable to inflate page, retrying with raw deflate') if log
682
- body_io.rewind
683
- begin
684
- out_io = inflate body_io, -Zlib::MAX_WBITS
685
- rescue Zlib::BufError, Zlib::DataError
686
- log.error("unable to inflate page: #{$!}") if log
687
- nil
688
- end
689
- end
690
- when 'gzip', 'x-gzip' then
691
- log.debug('gzip body') if log
692
-
693
- return if length.zero?
694
-
695
- begin
696
- zio = Zlib::GzipReader.new body_io
697
- out_io = Tempfile.new 'mechanize-decode', :encoding => 'ascii-8bit'
698
- out_io.binmode
699
-
700
- until zio.eof? do
701
- out_io.write zio.read 16384
702
- end
703
- rescue Zlib::BufError, Zlib::GzipFile::Error
704
- log.error('Unable to gunzip body, trying raw inflate') if log
705
- body_io.rewind
706
- body_io.read 10
707
-
708
- out_io = inflate body_io, -Zlib::MAX_WBITS
709
- rescue Zlib::DataError
710
- log.error("unable to gunzip page: #{$!}") if log
711
- ''
712
- ensure
713
- zio.close if zio and not zio.closed?
714
- end
715
- else
716
- raise Mechanize::Error,
717
- "Unsupported Content-Encoding: #{response['Content-Encoding']}"
718
- end
719
-
720
- out_io.flush
721
- out_io.rewind
722
-
723
- out_io
724
- end
725
-
726
- def response_cookies response, uri, page
727
- if Mechanize::Page === page and page.body =~ /Set-Cookie/n
728
- page.search('//head/meta[@http-equiv="Set-Cookie"]').each do |meta|
729
- save_cookies(uri, meta['content'])
730
- end
731
- end
732
-
733
- header_cookies = response.get_fields 'Set-Cookie'
734
-
735
- return unless header_cookies
736
-
737
- header_cookies.each do |set_cookie|
738
- save_cookies(uri, set_cookie)
739
- end
740
- end
741
-
742
- def save_cookies(uri, set_cookie)
743
- log = log() # reduce method calls
744
- Mechanize::Cookie.parse(uri, set_cookie, log) { |c|
745
- if @cookie_jar.add(uri, c)
746
- log.debug("saved cookie: #{c}") if log
747
- else
748
- log.debug("rejected cookie: #{c}") if log
749
- end
750
- }
751
- end
752
-
753
- def response_follow_meta_refresh response, uri, page, redirects
754
- delay, new_url = get_meta_refresh(response, uri, page)
755
- return nil unless new_url
756
-
757
- raise Mechanize::RedirectLimitReachedError.new(page, redirects) if
758
- redirects + 1 > @redirection_limit
759
-
760
- sleep delay
761
- @history.push(page, page.uri)
762
- fetch new_url, :get, {}, [],
763
- Mechanize::Page.new(nil, {'content-type'=>'text/html'}), redirects
764
- end
765
-
766
- def response_log response
767
- return unless log
768
-
769
- log.info("status: #{response.class} #{response.http_version} " \
770
- "#{response.code} #{response.message}")
771
-
772
- response.each_header do |k, v|
773
- log.debug("response-header: #{k} => #{v}")
774
- end
775
- end
776
-
777
- def response_parse response, body_io, uri
778
- @context.parse uri, response, body_io
779
- end
780
-
781
- def response_read response, request
782
- content_length = response.content_length
783
-
784
- if content_length and content_length > @max_file_buffer then
785
- body_io = Tempfile.new 'mechanize-raw'
786
- body_io.binmode if defined? body_io.binmode
787
- else
788
- body_io = StringIO.new
789
- end
790
-
791
- body_io.set_encoding Encoding::BINARY if body_io.respond_to? :set_encoding
792
- total = 0
793
-
794
- begin
795
- response.read_body { |part|
796
- total += part.length
797
-
798
- if StringIO === body_io and total > @max_file_buffer then
799
- new_io = Tempfile.new 'mechanize-raw'
800
- new_io.binmode if defined? binmode
801
- new_io.set_encoding(body_io.external_encoding)
802
- new_io.write body_io.string
803
-
804
- body_io = new_io
805
- end
806
-
807
- body_io.write(part)
808
- log.debug("Read #{part.length} bytes (#{total} total)") if log
809
- }
810
- rescue Net::HTTP::Persistent::Error => e
811
- body_io.rewind
812
- raise Mechanize::ResponseReadError.new(e, response, body_io)
813
- end
814
-
815
- body_io.flush
816
- body_io.rewind
817
-
818
- raise Mechanize::ResponseCodeError, response if
819
- Net::HTTPUnknownResponse === response
820
-
821
- content_length = response.content_length
822
-
823
- unless Net::HTTP::Head === request or Net::HTTPRedirection === response then
824
- raise EOFError, "Content-Length (#{content_length}) does not match " \
825
- "response body length (#{body_io.length})" if
826
- content_length and content_length != body_io.length
827
- end
828
-
829
- body_io
830
- end
831
-
832
- def response_redirect response, method, page, redirects, referer = current_page
833
- case @redirect_ok
834
- when true, :all
835
- # shortcut
836
- when false, nil
837
- return page
838
- when :permanent
839
- return page unless Net::HTTPMovedPermanently === response
840
- end
841
-
842
- log.info("follow redirect to: #{response['Location']}") if log
843
-
844
- raise Mechanize::RedirectLimitReachedError.new(page, redirects) if
845
- redirects + 1 > @redirection_limit
846
-
847
- redirect_method = method == :head ? :head : :get
848
-
849
- from_uri = page.uri
850
- @history.push(page, from_uri)
851
- new_uri = from_uri + response['Location'].to_s
852
-
853
- fetch new_uri, redirect_method, {}, [], referer, redirects + 1
854
- end
855
-
856
- # :section: Robots
857
-
858
- def get_robots(uri) # :nodoc:
859
- fetch(uri).body
860
- rescue Mechanize::ResponseCodeError => e
861
- return '' if e.response_code == '404'
862
- raise e
863
- end
864
-
865
- def robots= value
866
- require 'webrobots' if value
867
- @webrobots = nil if value != @robots
868
- @robots = value
869
- end
870
-
871
- ##
872
- # Tests if this agent is allowed to access +url+, consulting the site's
873
- # robots.txt.
874
-
875
- def robots_allowed? uri
876
- return true if uri.request_uri == '/robots.txt'
877
-
878
- webrobots.allowed? uri
879
- end
880
-
881
- # Opposite of robots_allowed?
882
-
883
- def robots_disallowed? url
884
- !robots_allowed? url
885
- end
886
-
887
- # Returns an error object if there is an error in fetching or parsing
888
- # robots.txt of the site +url+.
889
- def robots_error(url)
890
- webrobots.error(url)
891
- end
892
-
893
- # Raises the error if there is an error in fetching or parsing robots.txt of
894
- # the site +url+.
895
- def robots_error!(url)
896
- webrobots.error!(url)
897
- end
898
-
899
- # Removes robots.txt cache for the site +url+.
900
- def robots_reset(url)
901
- webrobots.reset(url)
902
- end
903
-
904
- def webrobots
905
- @webrobots ||= WebRobots.new(@user_agent, :http_get => method(:get_robots))
906
- end
907
-
908
- # :section: SSL
909
-
910
- def certificate
911
- @http.certificate
912
- end
913
-
914
- # :section: Timeouts
915
-
916
- # Sets the conection idle timeout for persistent connections
917
- def idle_timeout= timeout
918
- @idle_timeout = timeout
919
- @http.idle_timeout = timeout if @http
920
- end
921
-
922
- # :section: Utility
923
-
924
- def inflate compressed, window_bits = nil
925
- inflate = Zlib::Inflate.new window_bits
926
- out_io = Tempfile.new 'mechanize-decode'
927
-
928
- until compressed.eof? do
929
- out_io.write inflate.inflate compressed.read 1024
930
- end
931
-
932
- out_io.write inflate.finish
933
-
934
- out_io
935
- end
936
-
937
- def log
938
- @context.log
939
- end
940
-
941
- def set_http
942
- @http = Net::HTTP::Persistent.new 'mechanize', @proxy_uri
943
-
944
- @http.keep_alive = @keep_alive_time
945
- @http.idle_timeout = @idle_timeout if @idle_timeout
946
- @http.retry_change_requests = @retry_change_requests
947
-
948
- @http.ca_file = @ca_file
949
- @http.cert_store = @cert_store if @cert_store
950
- @http.verify_callback = @verify_callback
951
- @http.verify_mode = @verify_mode if @verify_mode
952
-
953
- # update our cached value
954
- @verify_mode = @http.verify_mode
955
- @cert_store = @http.cert_store
956
-
957
- if @cert and @key then
958
- cert = if OpenSSL::X509::Certificate === @cert then
959
- @cert
960
- else
961
- OpenSSL::X509::Certificate.new ::File.read @cert
962
- end
963
-
964
- key = if OpenSSL::PKey::PKey === @key then
965
- @key
966
- else
967
- OpenSSL::PKey::RSA.new ::File.read(@key), @pass
968
- end
969
-
970
- @http.certificate = cert
971
- @http.private_key = key
972
- end
973
- end
974
-
975
- ##
976
- # Sets the proxy address, port, user, and password +addr+ should be a host,
977
- # with no "http://", +port+ may be a port number, service name or port
978
- # number string.
979
-
980
- def set_proxy(addr, port, user = nil, pass = nil)
981
- return unless addr and port
982
-
983
- unless Integer === port then
984
- begin
985
- port = Socket.getservbyname port
986
- rescue SocketError
987
- begin
988
- port = Integer port
989
- rescue ArgumentError
990
- raise ArgumentError, "invalid value for port: #{port.inspect}"
991
- end
992
- end
993
- end
994
-
995
- @proxy_uri = URI "http://#{addr}"
996
- @proxy_uri.port = port
997
- @proxy_uri.user = user if user
998
- @proxy_uri.password = pass if pass
999
-
1000
- @proxy_uri
1001
- end
1002
-
1003
- end
1004
-