mechanize 2.0.pre.1 → 2.0.pre.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (50) hide show
  1. data.tar.gz.sig +2 -2
  2. data/CHANGELOG.rdoc +24 -2
  3. data/Manifest.txt +15 -19
  4. data/Rakefile +6 -3
  5. data/lib/mechanize.rb +168 -28
  6. data/lib/mechanize/form.rb +14 -2
  7. data/lib/mechanize/page.rb +43 -14
  8. data/lib/mechanize/page/link.rb +10 -0
  9. data/lib/mechanize/redirect_not_get_or_head_error.rb +2 -1
  10. data/lib/mechanize/robots_disallowed_error.rb +29 -0
  11. data/lib/mechanize/util.rb +30 -6
  12. data/test/helper.rb +6 -0
  13. data/test/htdocs/canonical_uri.html +9 -0
  14. data/test/htdocs/nofollow.html +9 -0
  15. data/test/htdocs/noindex.html +9 -0
  16. data/test/htdocs/norobots.html +8 -0
  17. data/test/htdocs/rel_nofollow.html +8 -0
  18. data/test/htdocs/robots.html +8 -0
  19. data/test/htdocs/robots.txt +2 -0
  20. data/test/htdocs/tc_links.html +3 -3
  21. data/test/test_links.rb +9 -0
  22. data/test/test_mechanize.rb +617 -2
  23. data/test/{test_forms.rb → test_mechanize_form.rb} +45 -1
  24. data/test/test_mechanize_form_check_box.rb +37 -0
  25. data/test/test_mechanize_form_encoding.rb +118 -0
  26. data/test/{test_field_precedence.rb → test_mechanize_form_field.rb} +4 -16
  27. data/test/test_mechanize_page.rb +60 -1
  28. data/test/test_mechanize_redirect_not_get_or_head_error.rb +18 -0
  29. data/test/test_mechanize_subclass.rb +22 -0
  30. data/test/test_mechanize_util.rb +87 -2
  31. data/test/test_robots.rb +87 -0
  32. metadata +51 -43
  33. metadata.gz.sig +0 -0
  34. data/lib/mechanize/uri_resolver.rb +0 -82
  35. data/test/test_authenticate.rb +0 -71
  36. data/test/test_bad_links.rb +0 -25
  37. data/test/test_blank_form.rb +0 -16
  38. data/test/test_checkboxes.rb +0 -61
  39. data/test/test_content_type.rb +0 -13
  40. data/test/test_encoded_links.rb +0 -20
  41. data/test/test_errors.rb +0 -49
  42. data/test/test_follow_meta.rb +0 -119
  43. data/test/test_get_headers.rb +0 -52
  44. data/test/test_gzipping.rb +0 -22
  45. data/test/test_hash_api.rb +0 -45
  46. data/test/test_mech.rb +0 -283
  47. data/test/test_mech_proxy.rb +0 -16
  48. data/test/test_mechanize_uri_resolver.rb +0 -29
  49. data/test/test_redirect_verb_handling.rb +0 -49
  50. data/test/test_subclass.rb +0 -30
@@ -1,10 +1,42 @@
1
1
  require "helper"
2
2
 
3
- class FormsMechTest < Test::Unit::TestCase
3
+ class TestMechanizeForm < Test::Unit::TestCase
4
+
4
5
  def setup
5
6
  @agent = Mechanize.new
6
7
  end
7
8
 
9
+ def test_build_query_blank_form
10
+ page = @agent.get('http://localhost/tc_blank_form.html')
11
+ form = page.forms.first
12
+ query = form.build_query
13
+ assert(query.length > 0)
14
+ assert query.all? { |x| x[1] == '' }
15
+ end
16
+
17
+ def test_field_with
18
+ page = @agent.get("http://localhost/google.html")
19
+ search = page.forms.find { |f| f.name == "f" }
20
+ assert_not_nil(search)
21
+ assert_not_nil(search.field_with(:name => 'q'))
22
+ assert_not_nil(search.field_with(:name => 'hl'))
23
+ assert_not_nil(search.fields.find { |f| f.name == 'ie' })
24
+ end
25
+
26
+ def test_parse_textarea
27
+ form = Nokogiri::HTML <<-FORM
28
+ <form>
29
+ <textarea name="t">hi</textarea>
30
+ </form>
31
+ FORM
32
+
33
+ form = Mechanize::Form.new form, @agent
34
+ textarea = form.fields.first
35
+
36
+ assert_kind_of Mechanize::Form::Textarea, textarea
37
+ assert_equal 'hi', textarea.value
38
+ end
39
+
8
40
  def test_post_with_rails_3_encoding_hack
9
41
  page = @agent.get("http://localhost/rails_3_encoding_hack_form_test.html")
10
42
  form = page.forms.first
@@ -25,6 +57,18 @@ class FormsMechTest < Test::Unit::TestCase
25
57
  assert_match('/form_no_action.html?first=Aaron', page.uri.to_s)
26
58
  end
27
59
 
60
+ def test_submit_first_field_wins
61
+ page = @agent.get('http://localhost/tc_field_precedence.html')
62
+ form = page.forms.first
63
+
64
+ assert !form.checkboxes.empty?
65
+ assert_equal "1", form.checkboxes.first.value
66
+
67
+ submitted = form.submit
68
+
69
+ assert_equal 'ticky=1&ticky=0', submitted.parser.at('#query').text
70
+ end
71
+
28
72
  def test_submit_takes_arbirary_headers
29
73
  page = @agent.get('http://localhost:2000/form_no_action.html')
30
74
  assert form = page.forms.first
@@ -0,0 +1,37 @@
1
+ require 'helper'
2
+
3
+ class TestMechanizeFormCheckBox < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @agent = Mechanize.new
7
+ @page = @agent.get('http://localhost/tc_checkboxes.html')
8
+ end
9
+
10
+ def test_check
11
+ form = @page.forms.first
12
+
13
+ form.checkbox_with(:name => 'green').check
14
+
15
+ assert(form.checkbox_with(:name => 'green').checked)
16
+
17
+ %w{ red blue yellow brown }.each do |color|
18
+ assert_equal(false, form.checkbox_with(:name => color).checked)
19
+ end
20
+ end
21
+
22
+ def test_uncheck
23
+ form = @page.forms.first
24
+
25
+ checkbox = form.checkbox_with(:name => 'green')
26
+
27
+ checkbox.check
28
+
29
+ assert form.checkbox_with(:name => 'green').checked
30
+
31
+ checkbox.uncheck
32
+
33
+ assert !form.checkbox_with(:name => 'green').checked
34
+ end
35
+
36
+ end
37
+
@@ -0,0 +1,118 @@
1
+ # coding: utf-8
2
+ require "helper"
3
+
4
+ class TestMechanizeFormEncoding < Test::Unit::TestCase
5
+
6
+ # See also: tests of Util.from_native_charset
7
+ # Encoding test should do with non-utf-8 characters
8
+
9
+ INPUTTED_VALUE = "テスト" # "test" in Japanese UTF-8 encoding
10
+ CONTENT_ENCODING = 'Shift_JIS' # one of Japanese encoding
11
+ encoded_value = "\x83\x65\x83\x58\x83\x67" # "test" in Japanese Shift_JIS encoding
12
+ encoded_value.force_encoding(::Encoding::SHIFT_JIS) if encoded_value.respond_to?(:force_encoding)
13
+ EXPECTED_QUERY = "first_name=#{CGI.escape(encoded_value)}&first_name=&gender=&green%5Beggs%5D="
14
+
15
+ if Mechanize::Util::NEW_RUBY_ENCODING
16
+ ENCODING_ERRORS = [EncodingError, Encoding::ConverterNotFoundError] # and so on
17
+ else
18
+ ENCODING_ERRORS = [Iconv::InvalidEncoding, Iconv::IllegalSequence]
19
+ end
20
+
21
+ ENCODING_LOG_MESSAGE = /INFO -- : form encoding: Shift_JIS/
22
+ INVALID_ENCODING = 'UTF-eight'
23
+
24
+ def setup
25
+ @agent = Mechanize.new
26
+ end
27
+
28
+ def set_form_with_encoding(enc)
29
+ page = @agent.get("http://localhost/form_set_fields.html")
30
+ form = page.forms.first
31
+ form.encoding = enc
32
+ form['first_name'] = INPUTTED_VALUE
33
+ form
34
+ end
35
+
36
+
37
+ def test_form_encoding_returns_accept_charset
38
+ page = @agent.get("http://localhost/rails_3_encoding_hack_form_test.html")
39
+ form = page.forms.first
40
+ accept_charset = form.form_node['accept-charset']
41
+
42
+ assert accept_charset
43
+ assert_equal accept_charset, form.encoding
44
+ assert_not_equal page.encoding, form.encoding
45
+ end
46
+
47
+ def test_form_encoding_returns_page_encoding_when_no_accept_charset
48
+ page = @agent.get("http://localhost/form_set_fields.html")
49
+ form = page.forms.first
50
+ accept_charset = form.form_node['accept-charset']
51
+
52
+ assert_nil accept_charset
53
+ assert_not_equal accept_charset, form.encoding
54
+ assert_equal page.encoding, form.encoding
55
+ end
56
+
57
+ def test_form_encoding_equals_sets_new_encoding
58
+ page = @agent.get("http://localhost/form_set_fields.html")
59
+ form = page.forms.first
60
+
61
+ assert_not_equal CONTENT_ENCODING, form.encoding
62
+
63
+ form.encoding = CONTENT_ENCODING
64
+
65
+ assert_equal CONTENT_ENCODING, form.encoding
66
+ end
67
+
68
+ def test_form_encoding_returns_nil_when_no_page_in_initialize
69
+ # this sequence is seen at Mechanize#post(url, query_hash)
70
+
71
+ node = {}
72
+ # Create a fake form
73
+ class << node
74
+ def search(*args); []; end
75
+ end
76
+ node['method'] = 'POST'
77
+ node['enctype'] = 'application/x-www-form-urlencoded'
78
+ form = Mechanize::Form.new(node)
79
+
80
+ assert_equal nil, form.encoding
81
+ end
82
+
83
+
84
+ def test_post_form_with_form_encoding
85
+ form = set_form_with_encoding CONTENT_ENCODING
86
+ form.submit
87
+
88
+ # we can not use "links.find{|l| l.text == 'key:val'}" assertion here
89
+ # because the link text encoding is always UTF-8 regaredless of html encoding
90
+ assert EXPECTED_QUERY, @agent.page.at('div#query').inner_text
91
+ end
92
+
93
+ def test_post_form_with_problematic_encoding
94
+ form = set_form_with_encoding INVALID_ENCODING
95
+
96
+ assert_raise(*ENCODING_ERRORS){ form.submit }
97
+ end
98
+
99
+ def test_form_ignore_encoding_error_is_true
100
+ form = set_form_with_encoding INVALID_ENCODING
101
+ form.ignore_encoding_error = true
102
+
103
+ assert_nothing_raised(*ENCODING_ERRORS){ form.submit }
104
+ end
105
+
106
+ def test_post_form_logs_form_encoding
107
+ sio = StringIO.new
108
+ @agent.log = Logger.new(sio)
109
+ @agent.log.level = Logger::INFO
110
+
111
+ form = set_form_with_encoding CONTENT_ENCODING
112
+ form.submit
113
+
114
+ assert_match ENCODING_LOG_MESSAGE, sio.string
115
+
116
+ @agent.log = nil
117
+ end
118
+ end
@@ -1,22 +1,8 @@
1
1
  require 'helper'
2
2
 
3
- class TestFieldPrecedence < Test::Unit::TestCase
4
- def setup
5
- @agent = Mechanize.new
6
- @page = @agent.get('http://localhost/tc_field_precedence.html')
7
- end
8
-
9
- def test_first_field_wins
10
- form = @page.forms.first
11
- assert !form.checkboxes.empty?
12
- assert_equal "1", form.checkboxes.first.value
3
+ class TestMechanizeFormField < Test::Unit::TestCase
13
4
 
14
- submitted = form.submit
15
-
16
- assert_equal 'ticky=1&ticky=0', submitted.parser.at('#query').text
17
- end
18
-
19
- def test_field_sort
5
+ def test_field_spaceship
20
6
  doc = Nokogiri::HTML::Document.new
21
7
  node = doc.create_element('input')
22
8
  node['name'] = 'foo'
@@ -30,4 +16,6 @@ class TestFieldPrecedence < Test::Unit::TestCase
30
16
  assert_equal [a, b], [b, a].sort
31
17
  assert_equal [b, c].sort, [b, c].sort
32
18
  end
19
+
33
20
  end
21
+
@@ -40,15 +40,45 @@ class TestMechanizePage < Test::Unit::TestCase
40
40
 
41
41
  def setup
42
42
  @agent = Mechanize.new
43
- @uri = URI.parse 'http://example'
43
+ @uri = URI('http://example')
44
44
  @res = { 'content-type' => 'text/html' }
45
45
  @body = '<title>hi</title>'
46
46
  end
47
47
 
48
48
  def util_page body = @body, res = @res
49
+ body.force_encoding Encoding::BINARY if body.respond_to? :force_encoding
49
50
  Mechanize::Page.new @uri, res, body, 200, @agent
50
51
  end
51
52
 
53
+ def test_initialize_content_type
54
+ assert Mechanize::Page.new nil, 'content-type' => 'application/xhtml+xml'
55
+ assert Mechanize::Page.new nil, 'content-type' => 'text/html'
56
+
57
+ e = assert_raises Mechanize::ContentTypeError do
58
+ Mechanize::Page.new nil, 'content-type' => 'text/plain'
59
+ end
60
+
61
+ assert_equal 'text/plain', e.content_type
62
+ end
63
+
64
+ def test_canonical_uri
65
+ page = @agent.get("http://localhost/canonical_uri.html")
66
+ assert_equal(URI("http://localhost/canonical_uri"), page.canonical_uri)
67
+
68
+ page = @agent.get("http://localhost/file_upload.html")
69
+ assert_equal(nil, page.canonical_uri)
70
+ end
71
+
72
+ def test_canonical_uri_unescaped
73
+ page = util_page <<-BODY
74
+ <head>
75
+ <link rel="canonical" href="http://example/white space"/>
76
+ </head>
77
+ BODY
78
+
79
+ assert_equal @uri + '/white%20space', page.canonical_uri
80
+ end
81
+
52
82
  def test_charset
53
83
  charset = util_page.charset 'text/html;charset=UTF-8'
54
84
 
@@ -85,11 +115,27 @@ class TestMechanizePage < Test::Unit::TestCase
85
115
  assert_equal 'SHIFT_JIS', page.encoding
86
116
  end
87
117
 
118
+ def test_encoding_charset_bad
119
+ page = util_page "<title>#{UTF8_TITLE}</title>"
120
+ page.encodings.replace %w[
121
+ UTF-8
122
+ Shift_JIS
123
+ ]
124
+
125
+ assert_equal [], page.parser.errors
126
+
127
+ assert_equal 'UTF-8', page.encoding
128
+ end
129
+
88
130
  def test_encoding_equals
89
131
  page = util_page
132
+ page.meta
133
+ assert page.instance_variable_get(:@meta)
90
134
 
91
135
  page.encoding = 'UTF-8'
92
136
 
137
+ assert_nil page.instance_variable_get(:@meta)
138
+
93
139
  assert_equal 'UTF-8', page.encoding
94
140
  assert_equal 'UTF-8', page.parser.encoding
95
141
  end
@@ -122,6 +168,19 @@ class TestMechanizePage < Test::Unit::TestCase
122
168
  assert_equal 'ISO-8859-2', page.parser.encoding
123
169
  end
124
170
 
171
+ def test_frames_with
172
+ page = @agent.get("http://localhost/frame_test.html")
173
+ assert_equal(3, page.frames.size)
174
+
175
+ find_orig = page.frames.find_all { |f| f.name == 'frame1' }
176
+
177
+ find1 = page.frames_with(:name => 'frame1')
178
+
179
+ find_orig.zip(find1).each { |a,b|
180
+ assert_equal(a, b)
181
+ }
182
+ end
183
+
125
184
  def test_title
126
185
  page = util_page
127
186
 
@@ -0,0 +1,18 @@
1
+ require 'helper'
2
+
3
+ class TestMechanizeRedirectNotGetOrHead < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @agent = Mechanize.new
7
+ end
8
+
9
+ def test_to_s
10
+ page = MechTestHelper.fake_page(@agent)
11
+
12
+ error = Mechanize::RedirectNotGetOrHeadError.new(page, :put)
13
+
14
+ assert_match(/ PUT /, error.to_s)
15
+ end
16
+
17
+ end
18
+
@@ -0,0 +1,22 @@
1
+ require "helper"
2
+
3
+ class TestMechanizeSubclass < Test::Unit::TestCase
4
+
5
+ class Parent < Mechanize
6
+ @html_parser = :parser
7
+ @log = :log
8
+ end
9
+
10
+ class Child < Parent
11
+ end
12
+
13
+ def test_subclass_inherits_html_parser
14
+ assert_equal :parser, Child.html_parser
15
+ end
16
+
17
+ def test_subclass_inherits_log
18
+ assert_equal :log, Child.log
19
+ end
20
+
21
+ end
22
+
@@ -1,7 +1,92 @@
1
- require "helper"
1
+ # coding: utf-8
2
+ require 'helper'
2
3
 
3
4
  class TestMechanizeUtil < Test::Unit::TestCase
5
+
6
+ INPUTTED_VALUE = "テスト" # "test" in Japanese UTF-8 encoding
7
+ CONTENT_ENCODING = 'Shift_JIS' # one of Japanese encoding
8
+ ENCODED_VALUE = "\x83\x65\x83\x58\x83\x67" # "test" in Japanese Shift_JIS encoding
9
+
10
+ if Mechanize::Util::NEW_RUBY_ENCODING
11
+ ENCODING_ERRORS = [EncodingError, Encoding::ConverterNotFoundError] # and so on
12
+ ERROR_LOG_MESSAGE = /from_native_charset: Encoding::ConverterNotFoundError: form encoding: "UTF-eight"/
13
+ ENCODED_VALUE.force_encoding(::Encoding::SHIFT_JIS)
14
+ else
15
+ ENCODING_ERRORS = [Iconv::InvalidEncoding, Iconv::IllegalSequence]
16
+ ERROR_LOG_MESSAGE = /from_native_charset: Iconv::InvalidEncoding: form encoding: "UTF-eight"/
17
+ end
18
+
19
+ INVALID_ENCODING = 'UTF-eight'
20
+
21
+ def setup
22
+ @result = "non_nil"
23
+ end
24
+
4
25
  def test_from_native_charset
5
- assert_equal 'foo', Mechanize::Util.from_native_charset('foo', nil)
26
+ @result = Mechanize::Util.from_native_charset(INPUTTED_VALUE, CONTENT_ENCODING)
27
+ assert_equal ENCODED_VALUE, @result
28
+ end
29
+
30
+ def test_from_native_charset_returns_nil_when_no_string
31
+ @result = Mechanize::Util.from_native_charset(nil, CONTENT_ENCODING)
32
+ assert_equal nil, @result
33
+ end
34
+
35
+ def test_from_native_charset_doesnot_convert_when_no_encoding
36
+ @result = Mechanize::Util.from_native_charset(INPUTTED_VALUE, nil)
37
+ assert_not_equal ENCODED_VALUE, @result
38
+ assert_equal INPUTTED_VALUE, @result
39
+ end
40
+
41
+ def test_from_native_charset_doesnot_convert_when_not_nokogiri
42
+ parser = Mechanize.html_parser
43
+ Mechanize.html_parser = 'Another HTML Parser'
44
+
45
+ @result = Mechanize::Util.from_native_charset(INPUTTED_VALUE, CONTENT_ENCODING)
46
+ assert_not_equal ENCODED_VALUE, @result
47
+ assert_equal INPUTTED_VALUE, @result
48
+
49
+ Mechanize.html_parser = parser
50
+ end
51
+
52
+ def test_from_native_charset_raises_error_with_bad_encoding
53
+ assert_raise(*ENCODING_ERRORS){
54
+ @result = Mechanize::Util.from_native_charset(INPUTTED_VALUE, INVALID_ENCODING)
55
+ }
56
+ end
57
+
58
+ def test_from_native_charset_suppress_encoding_error_when_3rd_arg_is_true
59
+ assert_nothing_raised(*ENCODING_ERRORS){
60
+ @result = Mechanize::Util.from_native_charset(INPUTTED_VALUE, INVALID_ENCODING, true)
61
+ }
62
+ end
63
+
64
+ def test_from_native_charset_doesnot_convert_when_encoding_error_raised_and_ignored
65
+ @result = Mechanize::Util.from_native_charset(INPUTTED_VALUE, INVALID_ENCODING, true)
66
+
67
+ assert_not_equal ENCODED_VALUE, @result
68
+ assert_equal INPUTTED_VALUE, @result
69
+ end
70
+
71
+ def test_from_native_charset_logs_form_when_encoding_error_raised
72
+ sio = StringIO.new
73
+ log = Logger.new(sio)
74
+ log.level = Logger::DEBUG
75
+
76
+ assert_raise(*ENCODING_ERRORS){
77
+ @result = Mechanize::Util.from_native_charset(INPUTTED_VALUE, INVALID_ENCODING, nil, log)
78
+ }
79
+ assert_match ERROR_LOG_MESSAGE, sio.string
80
+ end
81
+
82
+ def test_from_native_charset_logs_form_when_encoding_error_is_ignored
83
+ sio = StringIO.new
84
+ log = Logger.new(sio)
85
+ log.level = Logger::DEBUG
86
+
87
+ assert_nothing_raised(*ENCODING_ERRORS){
88
+ @result = Mechanize::Util.from_native_charset(INPUTTED_VALUE, INVALID_ENCODING, true, log)
89
+ }
90
+ assert_match ERROR_LOG_MESSAGE, sio.string
6
91
  end
7
92
  end