mechanize 2.0.pre.1 → 2.0.pre.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (50) hide show
  1. data.tar.gz.sig +2 -2
  2. data/CHANGELOG.rdoc +24 -2
  3. data/Manifest.txt +15 -19
  4. data/Rakefile +6 -3
  5. data/lib/mechanize.rb +168 -28
  6. data/lib/mechanize/form.rb +14 -2
  7. data/lib/mechanize/page.rb +43 -14
  8. data/lib/mechanize/page/link.rb +10 -0
  9. data/lib/mechanize/redirect_not_get_or_head_error.rb +2 -1
  10. data/lib/mechanize/robots_disallowed_error.rb +29 -0
  11. data/lib/mechanize/util.rb +30 -6
  12. data/test/helper.rb +6 -0
  13. data/test/htdocs/canonical_uri.html +9 -0
  14. data/test/htdocs/nofollow.html +9 -0
  15. data/test/htdocs/noindex.html +9 -0
  16. data/test/htdocs/norobots.html +8 -0
  17. data/test/htdocs/rel_nofollow.html +8 -0
  18. data/test/htdocs/robots.html +8 -0
  19. data/test/htdocs/robots.txt +2 -0
  20. data/test/htdocs/tc_links.html +3 -3
  21. data/test/test_links.rb +9 -0
  22. data/test/test_mechanize.rb +617 -2
  23. data/test/{test_forms.rb → test_mechanize_form.rb} +45 -1
  24. data/test/test_mechanize_form_check_box.rb +37 -0
  25. data/test/test_mechanize_form_encoding.rb +118 -0
  26. data/test/{test_field_precedence.rb → test_mechanize_form_field.rb} +4 -16
  27. data/test/test_mechanize_page.rb +60 -1
  28. data/test/test_mechanize_redirect_not_get_or_head_error.rb +18 -0
  29. data/test/test_mechanize_subclass.rb +22 -0
  30. data/test/test_mechanize_util.rb +87 -2
  31. data/test/test_robots.rb +87 -0
  32. metadata +51 -43
  33. metadata.gz.sig +0 -0
  34. data/lib/mechanize/uri_resolver.rb +0 -82
  35. data/test/test_authenticate.rb +0 -71
  36. data/test/test_bad_links.rb +0 -25
  37. data/test/test_blank_form.rb +0 -16
  38. data/test/test_checkboxes.rb +0 -61
  39. data/test/test_content_type.rb +0 -13
  40. data/test/test_encoded_links.rb +0 -20
  41. data/test/test_errors.rb +0 -49
  42. data/test/test_follow_meta.rb +0 -119
  43. data/test/test_get_headers.rb +0 -52
  44. data/test/test_gzipping.rb +0 -22
  45. data/test/test_hash_api.rb +0 -45
  46. data/test/test_mech.rb +0 -283
  47. data/test/test_mech_proxy.rb +0 -16
  48. data/test/test_mechanize_uri_resolver.rb +0 -29
  49. data/test/test_redirect_verb_handling.rb +0 -49
  50. data/test/test_subclass.rb +0 -30
@@ -0,0 +1,87 @@
1
+ require "helper"
2
+
3
+ class TestRobots < Test::Unit::TestCase
4
+ def setup
5
+ @agent = Mechanize.new
6
+ @robot = Mechanize.new { |a|
7
+ a.robots = true
8
+ }
9
+ end
10
+
11
+ def test_mechanize_webrobots_http_get
12
+ robotstxt = @agent.__send__(:webrobots_http_get, 'http://localhost/robots.txt')
13
+ assert_not_equal '', robotstxt
14
+
15
+ robotstxt = @agent.__send__(:webrobots_http_get, 'http://localhost/response_code?code=404')
16
+ assert_equal '', robotstxt
17
+ end
18
+
19
+ def test_robots
20
+ assert_equal "Welcome!", @robot.get("http://localhost/robots.html").title
21
+
22
+ assert_raise(Mechanize::RobotsDisallowedError) {
23
+ @robot.get("http://localhost/norobots.html")
24
+ }
25
+ end
26
+
27
+ def test_robots_allowed_eh
28
+ allowed = URI.parse 'http://localhost/robots.html'
29
+ disallowed = URI.parse 'http://localhost/norobots.html'
30
+ assert @agent.robots_allowed?(allowed)
31
+ assert !@agent.robots_allowed?(disallowed)
32
+
33
+ assert !@agent.robots_disallowed?(allowed)
34
+ assert @agent.robots_disallowed?(disallowed)
35
+ end
36
+
37
+ def test_noindex
38
+ assert_nothing_raised {
39
+ @agent.get("http://localhost/noindex.html")
40
+ }
41
+
42
+ assert @robot.robots_allowed?(URI.parse("http://localhost/noindex.html"))
43
+ assert_raise(Mechanize::RobotsDisallowedError) {
44
+ @robot.get("http://localhost/noindex.html")
45
+ }
46
+ end
47
+
48
+ def test_nofollow
49
+ page = @agent.get("http://localhost/nofollow.html")
50
+
51
+ assert_nothing_raised {
52
+ page.links[0].click
53
+ }
54
+ assert_nothing_raised {
55
+ page.links[1].click
56
+ }
57
+
58
+ page = @robot.get("http://localhost/nofollow.html")
59
+
60
+ assert_raise(Mechanize::RobotsDisallowedError) {
61
+ page.links[0].click
62
+ }
63
+ assert_raise(Mechanize::RobotsDisallowedError) {
64
+ page.links[1].click
65
+ }
66
+ end
67
+
68
+ def test_rel_nofollow
69
+ page = @agent.get("http://localhost/rel_nofollow.html")
70
+
71
+ assert_nothing_raised {
72
+ page.links[0].click
73
+ }
74
+ assert_nothing_raised {
75
+ page.links[1].click
76
+ }
77
+
78
+ page = @robot.get("http://localhost/rel_nofollow.html")
79
+
80
+ assert_nothing_raised {
81
+ page.links[0].click
82
+ }
83
+ assert_raise(Mechanize::RobotsDisallowedError) {
84
+ page.links[1].click
85
+ }
86
+ end
87
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mechanize
3
3
  version: !ruby/object:Gem::Version
4
- hash: 1923831931
4
+ hash: 1923831933
5
5
  prerelease:
6
6
  segments:
7
7
  - 2
8
8
  - 0
9
9
  - pre
10
- - 1
11
- version: 2.0.pre.1
10
+ - 2
11
+ version: 2.0.pre.2
12
12
  platform: ruby
13
13
  authors:
14
14
  - Eric Hodel
@@ -39,7 +39,7 @@ cert_chain:
39
39
  x52qPcexcYZR7w==
40
40
  -----END CERTIFICATE-----
41
41
 
42
- date: 2011-04-10 00:00:00 Z
42
+ date: 2011-04-18 00:00:00 Z
43
43
  dependencies:
44
44
  - !ruby/object:Gem::Dependency
45
45
  name: nokogiri
@@ -95,9 +95,32 @@ dependencies:
95
95
  type: :runtime
96
96
  version_requirements: *id003
97
97
  - !ruby/object:Gem::Dependency
98
- name: hoe
98
+ name: webrobots
99
99
  prerelease: false
100
100
  requirement: &id004 !ruby/object:Gem::Requirement
101
+ none: false
102
+ requirements:
103
+ - - ~>
104
+ - !ruby/object:Gem::Version
105
+ hash: 11
106
+ segments:
107
+ - 0
108
+ - 0
109
+ version: "0.0"
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ hash: 19
113
+ segments:
114
+ - 0
115
+ - 0
116
+ - 6
117
+ version: 0.0.6
118
+ type: :runtime
119
+ version_requirements: *id004
120
+ - !ruby/object:Gem::Dependency
121
+ name: hoe
122
+ prerelease: false
123
+ requirement: &id005 !ruby/object:Gem::Requirement
101
124
  none: false
102
125
  requirements:
103
126
  - - ">="
@@ -109,7 +132,7 @@ dependencies:
109
132
  - 4
110
133
  version: 2.9.4
111
134
  type: :development
112
- version_requirements: *id004
135
+ version_requirements: *id005
113
136
  description: |-
114
137
  The Mechanize library is used for automating interaction with websites.
115
138
  Mechanize automatically stores and sends cookies, follows redirects,
@@ -181,8 +204,8 @@ files:
181
204
  - lib/mechanize/redirect_limit_reached_error.rb
182
205
  - lib/mechanize/redirect_not_get_or_head_error.rb
183
206
  - lib/mechanize/response_code_error.rb
207
+ - lib/mechanize/robots_disallowed_error.rb
184
208
  - lib/mechanize/unsupported_scheme_error.rb
185
- - lib/mechanize/uri_resolver.rb
186
209
  - lib/mechanize/util.rb
187
210
  - test/data/htpasswd
188
211
  - test/data/server.crt
@@ -193,6 +216,7 @@ files:
193
216
  - test/htdocs/alt_text.html
194
217
  - test/htdocs/bad_form_test.html
195
218
  - test/htdocs/button.jpg
219
+ - test/htdocs/canonical_uri.html
196
220
  - test/htdocs/dir with spaces/foo.html
197
221
  - test/htdocs/empty_form.html
198
222
  - test/htdocs/file_upload.html
@@ -214,8 +238,14 @@ files:
214
238
  - test/htdocs/link with space.html
215
239
  - test/htdocs/meta_cookie.html
216
240
  - test/htdocs/no_title_test.html
241
+ - test/htdocs/nofollow.html
242
+ - test/htdocs/noindex.html
243
+ - test/htdocs/norobots.html
217
244
  - test/htdocs/rails_3_encoding_hack_form_test.html
245
+ - test/htdocs/rel_nofollow.html
218
246
  - test/htdocs/relative/tc_relative_links.html
247
+ - test/htdocs/robots.html
248
+ - test/htdocs/robots.txt
219
249
  - test/htdocs/tc_bad_charset.html
220
250
  - test/htdocs/tc_bad_links.html
221
251
  - test/htdocs/tc_base_images.html
@@ -241,25 +271,12 @@ files:
241
271
  - test/htdocs/unusual______.html
242
272
  - test/servlets.rb
243
273
  - test/ssl_server.rb
244
- - test/test_authenticate.rb
245
- - test/test_bad_links.rb
246
- - test/test_blank_form.rb
247
- - test/test_checkboxes.rb
248
- - test/test_content_type.rb
249
274
  - test/test_cookies.rb
250
- - test/test_encoded_links.rb
251
- - test/test_errors.rb
252
- - test/test_field_precedence.rb
253
- - test/test_follow_meta.rb
254
275
  - test/test_form_action.rb
255
276
  - test/test_form_as_hash.rb
256
277
  - test/test_form_button.rb
257
278
  - test/test_form_no_inputname.rb
258
- - test/test_forms.rb
259
279
  - test/test_frames.rb
260
- - test/test_get_headers.rb
261
- - test/test_gzipping.rb
262
- - test/test_hash_api.rb
263
280
  - test/test_headers.rb
264
281
  - test/test_history.rb
265
282
  - test/test_history_added.rb
@@ -267,17 +284,20 @@ files:
267
284
  - test/test_if_modified_since.rb
268
285
  - test/test_images.rb
269
286
  - test/test_links.rb
270
- - test/test_mech.rb
271
- - test/test_mech_proxy.rb
272
287
  - test/test_mechanize.rb
273
288
  - test/test_mechanize_cookie.rb
274
289
  - test/test_mechanize_cookie_jar.rb
275
290
  - test/test_mechanize_file.rb
276
291
  - test/test_mechanize_file_request.rb
277
292
  - test/test_mechanize_file_response.rb
293
+ - test/test_mechanize_form.rb
294
+ - test/test_mechanize_form_check_box.rb
295
+ - test/test_mechanize_form_encoding.rb
296
+ - test/test_mechanize_form_field.rb
278
297
  - test/test_mechanize_form_image_button.rb
279
298
  - test/test_mechanize_page.rb
280
- - test/test_mechanize_uri_resolver.rb
299
+ - test/test_mechanize_redirect_not_get_or_head_error.rb
300
+ - test/test_mechanize_subclass.rb
281
301
  - test/test_mechanize_util.rb
282
302
  - test/test_meta.rb
283
303
  - test/test_multi_select.rb
@@ -288,11 +308,11 @@ files:
288
308
  - test/test_pretty_print.rb
289
309
  - test/test_radiobutton.rb
290
310
  - test/test_redirect_limit_reached.rb
291
- - test/test_redirect_verb_handling.rb
292
311
  - test/test_referer.rb
293
312
  - test/test_relative_links.rb
294
313
  - test/test_request.rb
295
314
  - test/test_response_code.rb
315
+ - test/test_robots.rb
296
316
  - test/test_save_file.rb
297
317
  - test/test_scheme.rb
298
318
  - test/test_select.rb
@@ -301,7 +321,6 @@ files:
301
321
  - test/test_select_noopts.rb
302
322
  - test/test_set_fields.rb
303
323
  - test/test_ssl_server.rb
304
- - test/test_subclass.rb
305
324
  - test/test_textarea.rb
306
325
  - test/test_upload.rb
307
326
  - test/test_verbs.rb
@@ -343,25 +362,12 @@ signing_key:
343
362
  specification_version: 3
344
363
  summary: The Mechanize library is used for automating interaction with websites
345
364
  test_files:
346
- - test/test_authenticate.rb
347
- - test/test_bad_links.rb
348
- - test/test_blank_form.rb
349
- - test/test_checkboxes.rb
350
- - test/test_content_type.rb
351
365
  - test/test_cookies.rb
352
- - test/test_encoded_links.rb
353
- - test/test_errors.rb
354
- - test/test_field_precedence.rb
355
- - test/test_follow_meta.rb
356
366
  - test/test_form_action.rb
357
367
  - test/test_form_as_hash.rb
358
368
  - test/test_form_button.rb
359
369
  - test/test_form_no_inputname.rb
360
- - test/test_forms.rb
361
370
  - test/test_frames.rb
362
- - test/test_get_headers.rb
363
- - test/test_gzipping.rb
364
- - test/test_hash_api.rb
365
371
  - test/test_headers.rb
366
372
  - test/test_history.rb
367
373
  - test/test_history_added.rb
@@ -369,17 +375,20 @@ test_files:
369
375
  - test/test_if_modified_since.rb
370
376
  - test/test_images.rb
371
377
  - test/test_links.rb
372
- - test/test_mech.rb
373
- - test/test_mech_proxy.rb
374
378
  - test/test_mechanize.rb
375
379
  - test/test_mechanize_cookie.rb
376
380
  - test/test_mechanize_cookie_jar.rb
377
381
  - test/test_mechanize_file.rb
378
382
  - test/test_mechanize_file_request.rb
379
383
  - test/test_mechanize_file_response.rb
384
+ - test/test_mechanize_form.rb
385
+ - test/test_mechanize_form_check_box.rb
386
+ - test/test_mechanize_form_encoding.rb
387
+ - test/test_mechanize_form_field.rb
380
388
  - test/test_mechanize_form_image_button.rb
381
389
  - test/test_mechanize_page.rb
382
- - test/test_mechanize_uri_resolver.rb
390
+ - test/test_mechanize_redirect_not_get_or_head_error.rb
391
+ - test/test_mechanize_subclass.rb
383
392
  - test/test_mechanize_util.rb
384
393
  - test/test_meta.rb
385
394
  - test/test_multi_select.rb
@@ -390,11 +399,11 @@ test_files:
390
399
  - test/test_pretty_print.rb
391
400
  - test/test_radiobutton.rb
392
401
  - test/test_redirect_limit_reached.rb
393
- - test/test_redirect_verb_handling.rb
394
402
  - test/test_referer.rb
395
403
  - test/test_relative_links.rb
396
404
  - test/test_request.rb
397
405
  - test/test_response_code.rb
406
+ - test/test_robots.rb
398
407
  - test/test_save_file.rb
399
408
  - test/test_scheme.rb
400
409
  - test/test_select.rb
@@ -403,7 +412,6 @@ test_files:
403
412
  - test/test_select_noopts.rb
404
413
  - test/test_set_fields.rb
405
414
  - test/test_ssl_server.rb
406
- - test/test_subclass.rb
407
415
  - test/test_textarea.rb
408
416
  - test/test_upload.rb
409
417
  - test/test_verbs.rb
metadata.gz.sig CHANGED
Binary file
@@ -1,82 +0,0 @@
1
- class Mechanize::URIResolver
2
-
3
- attr_reader :scheme_handlers
4
-
5
- def initialize
6
- @scheme_handlers = Hash.new { |h, scheme|
7
- h[scheme] = lambda { |link, page|
8
- raise Mechanize::UnsupportedSchemeError, scheme
9
- }
10
- }
11
-
12
- @scheme_handlers['http'] = lambda { |link, page| link }
13
- @scheme_handlers['https'] = @scheme_handlers['http']
14
- @scheme_handlers['relative'] = @scheme_handlers['http']
15
- @scheme_handlers['file'] = @scheme_handlers['http']
16
- end
17
-
18
- def resolve uri, referer = nil
19
- uri = uri.dup if uri.is_a?(URI)
20
-
21
- unless uri.is_a?(URI)
22
- uri = uri.to_s.strip.gsub(/[^#{0.chr}-#{126.chr}]/o) { |match|
23
- if RUBY_VERSION >= "1.9.0"
24
- Mechanize::Util.uri_escape(match)
25
- else
26
- sprintf('%%%X', match.unpack($KCODE == 'UTF8' ? 'U' : 'C')[0])
27
- end
28
- }
29
-
30
- unescaped = uri.split(/(?:%[0-9A-Fa-f]{2})+|#/)
31
- escaped = uri.scan(/(?:%[0-9A-Fa-f]{2})+|#/)
32
-
33
- escaped_uri = Mechanize::Util.html_unescape(
34
- unescaped.zip(escaped).map { |x,y|
35
- "#{WEBrick::HTTPUtils.escape(x)}#{y}"
36
- }.join('')
37
- )
38
-
39
- begin
40
- uri = URI.parse(escaped_uri)
41
- rescue
42
- uri = URI.parse(WEBrick::HTTPUtils.escape(escaped_uri))
43
- end
44
- end
45
-
46
- scheme = uri.relative? ? 'relative' : uri.scheme.downcase
47
- uri = @scheme_handlers[scheme].call(uri, referer)
48
-
49
- if referer && referer.uri
50
- if uri.path.length == 0 && uri.relative?
51
- uri.path = referer.uri.path
52
- end
53
- end
54
-
55
- uri.path = '/' if uri.path.length == 0
56
-
57
- if uri.relative?
58
- raise ArgumentError, "absolute URL needed (not #{uri})" unless
59
- referer && referer.uri
60
-
61
- base = nil
62
- if referer.respond_to?(:bases) && referer.parser
63
- base = referer.bases.last
64
- end
65
-
66
- uri = ((base && base.uri && base.uri.absolute?) ?
67
- base.uri :
68
- referer.uri) + uri
69
- uri = referer.uri + uri
70
- # Strip initial "/.." bits from the path
71
- uri.path.sub!(/^(\/\.\.)+(?=\/)/, '')
72
- end
73
-
74
- unless ['http', 'https', 'file'].include?(uri.scheme.downcase)
75
- raise ArgumentError, "unsupported scheme: #{uri.scheme}"
76
- end
77
-
78
- uri
79
- end
80
-
81
- end
82
-
@@ -1,71 +0,0 @@
1
- require "helper"
2
- require 'pp'
3
-
4
- class BasicAuthTest < Test::Unit::TestCase
5
- def setup
6
- @agent = Mechanize.new
7
- end
8
-
9
- def test_auth_success
10
- @agent.basic_auth('user', 'pass')
11
- page = @agent.get("http://localhost/basic_auth")
12
- assert_equal('You are authenticated', page.body)
13
- end
14
-
15
- def test_digest_auth_success
16
- @agent.basic_auth('user', 'pass')
17
- page = @agent.get("http://localhost/digest_auth")
18
- assert_equal('You are authenticated', page.body)
19
- end
20
-
21
- def test_no_duplicate_headers
22
- block_called = false
23
- @agent.pre_connect_hooks << lambda { |_, request|
24
- block_called = true
25
- request.to_hash.each do |k,v|
26
- assert_equal(1, v.length)
27
- end
28
- }
29
- @agent.basic_auth('user', 'pass')
30
- @agent.get("http://localhost/digest_auth")
31
- assert block_called
32
- end
33
-
34
- def test_post_auth_success
35
- class << @agent
36
- alias :old_fetch_page :fetch_page
37
- attr_accessor :requests
38
- def fetch_page(uri, method, *args)
39
- @requests ||= []
40
- x = old_fetch_page(uri, method, *args)
41
- @requests << method
42
- x
43
- end
44
- end
45
- @agent.basic_auth('user', 'pass')
46
- page = @agent.post("http://localhost/basic_auth")
47
- assert_equal('You are authenticated', page.body)
48
- assert_equal(2, @agent.requests.length)
49
- r1 = @agent.requests[0]
50
- r2 = @agent.requests[1]
51
- assert_equal(r1, r2)
52
- end
53
-
54
- def test_auth_bad_user_pass
55
- @agent.basic_auth('aaron', 'aaron')
56
- begin
57
- @agent.get("http://localhost/basic_auth")
58
- rescue Mechanize::ResponseCodeError => e
59
- assert_equal("401", e.response_code)
60
- end
61
- end
62
-
63
- def test_auth_failure
64
- begin
65
- @agent.get("http://localhost/basic_auth")
66
- rescue Mechanize::ResponseCodeError => e
67
- assert_equal("401", e.response_code)
68
- end
69
- end
70
-
71
- end