mechanize 2.0.pre.2 → 2.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (46) hide show
  1. data.tar.gz.sig +0 -0
  2. data/CHANGELOG.rdoc +22 -0
  3. data/Manifest.txt +11 -8
  4. data/Rakefile +2 -2
  5. data/examples/flickr_upload.rb +6 -7
  6. data/examples/mech-dump.rb +0 -2
  7. data/examples/proxy_req.rb +0 -2
  8. data/examples/rubyforge.rb +1 -3
  9. data/examples/spider.rb +2 -3
  10. data/lib/mechanize.rb +228 -680
  11. data/lib/mechanize/form/field.rb +1 -1
  12. data/lib/mechanize/history.rb +23 -5
  13. data/lib/mechanize/http.rb +3 -0
  14. data/lib/mechanize/http/agent.rb +738 -0
  15. data/lib/mechanize/inspect.rb +2 -2
  16. data/lib/mechanize/page.rb +101 -42
  17. data/lib/mechanize/page/frame.rb +24 -17
  18. data/lib/mechanize/page/link.rb +72 -54
  19. data/lib/mechanize/page/meta_refresh.rb +56 -0
  20. data/lib/mechanize/response_read_error.rb +27 -0
  21. data/test/htdocs/frame_referer_test.html +10 -0
  22. data/test/htdocs/tc_referer.html +4 -0
  23. data/test/test_frames.rb +9 -0
  24. data/test/test_history.rb +74 -98
  25. data/test/test_mechanize.rb +334 -812
  26. data/test/test_mechanize_form.rb +32 -3
  27. data/test/{test_textarea.rb → test_mechanize_form_textarea.rb} +1 -1
  28. data/test/test_mechanize_http_agent.rb +697 -0
  29. data/test/test_mechanize_link.rb +83 -0
  30. data/test/test_mechanize_page_encoding.rb +147 -0
  31. data/test/test_mechanize_page_link.rb +379 -0
  32. data/test/test_mechanize_page_meta_refresh.rb +115 -0
  33. data/test/test_pretty_print.rb +1 -1
  34. data/test/test_referer.rb +29 -5
  35. data/test/test_response_code.rb +21 -20
  36. data/test/test_robots.rb +13 -17
  37. data/test/test_scheme.rb +1 -1
  38. metadata +30 -31
  39. metadata.gz.sig +0 -0
  40. data/lib/mechanize/page/meta.rb +0 -48
  41. data/test/test_form_no_inputname.rb +0 -15
  42. data/test/test_links.rb +0 -146
  43. data/test/test_mechanize_page.rb +0 -224
  44. data/test/test_meta.rb +0 -67
  45. data/test/test_upload.rb +0 -109
  46. data/test/test_verbs.rb +0 -25
metadata.gz.sig CHANGED
Binary file
@@ -1,48 +0,0 @@
1
- class Mechanize
2
- class Page < Mechanize::File
3
- # This class encapsulates a Meta tag. Mechanize treats meta tags just
4
- # like 'a' tags. Meta objects will contain links, but most likely will
5
- # have no text.
6
- class Meta < Link
7
-
8
- # Matches the content attribute of a meta tag. After the match:
9
- #
10
- # $1:: delay
11
- # $3:: url
12
- #
13
- CONTENT_REGEXP = /^\s*(\d+\.?\d*)(;|;\s*url=\s*['"]?(\S*?)['"]?)?\s*$/i
14
-
15
- class << self
16
- # Parses the delay and url from the content attribute of a meta tag.
17
- # Parse requires the uri of the current page to infer a url when no
18
- # url is specified. If a block is given, the parsed delay and url
19
- # will be passed to it for further processing.
20
- #
21
- # Returns nil if the delay and url cannot be parsed.
22
- #
23
- # # <meta http-equiv="refresh" content="5;url=http://example.com/" />
24
- # uri = URI.parse('http://current.com/')
25
- #
26
- # Meta.parse("5;url=http://example.com/", uri) # => ['5', 'http://example.com/']
27
- # Meta.parse("5;url=", uri) # => ['5', 'http://current.com/']
28
- # Meta.parse("5", uri) # => ['5', 'http://current.com/']
29
- # Meta.parse("invalid content", uri) # => nil
30
- #
31
- def parse(content, uri)
32
- if content && content =~ CONTENT_REGEXP
33
- delay, url = $1, $3
34
-
35
- dest = uri
36
- dest += url if url
37
- url = dest.to_s
38
-
39
- block_given? ? yield(delay, url) : [delay, url]
40
- else
41
- nil
42
- end
43
- end
44
- end
45
- end
46
- end
47
- end
48
-
@@ -1,15 +0,0 @@
1
- require "helper"
2
-
3
- class FormNoInputNameTest < Test::Unit::TestCase
4
- def setup
5
- @agent = Mechanize.new
6
- @page = @agent.get('http://localhost/form_no_input_name.html')
7
- end
8
-
9
- def test_no_input_name
10
- form = @page.forms.first
11
- assert_equal(0, form.fields.length)
12
- assert_equal(0, form.radiobuttons.length)
13
- assert_equal(0, form.checkboxes.length)
14
- end
15
- end
@@ -1,146 +0,0 @@
1
- require "helper"
2
-
3
- class LinksMechTest < Test::Unit::TestCase
4
- def setup
5
- @agent = Mechanize.new
6
- end
7
-
8
- def test_weird_uri
9
- doc = Nokogiri::HTML::Document.new
10
- node = Nokogiri::XML::Node.new('foo', doc)
11
- node['href'] = 'http://foo.bar/ baz'
12
- link = Mechanize::Page::Link.new(node, nil, nil)
13
- assert_equal 'http://foo.bar/%20baz', link.uri.to_s
14
- end
15
-
16
- def test_unsupported_link_types
17
- page = @agent.get("http://google.com/tc_links.html")
18
- link = page.link_with(:text => 'javascript link')
19
- assert_raise(Mechanize::UnsupportedSchemeError) {
20
- link.click
21
- }
22
-
23
- @agent.scheme_handlers['javascript'] = lambda { |my_link, my_page|
24
- URI.parse('http://localhost/tc_links.html')
25
- }
26
- assert_nothing_raised {
27
- link.click
28
- }
29
- end
30
-
31
- def test_link_with_no_path
32
- page = @agent.get("http://localhost/relative/tc_relative_links.html")
33
- page = page.link_with(:text => 'just the query string').click
34
- assert_equal('http://localhost/relative/tc_relative_links.html?a=b', page.uri.to_s)
35
- end
36
-
37
- def test_base
38
- page = @agent.get("http://google.com/tc_base_link.html")
39
- page = page.links.first.click
40
- assert @agent.visited?("http://localhost/index.html")
41
- end
42
-
43
- def test_find_meta
44
- page = @agent.get("http://localhost/find_link.html")
45
- assert_equal(3, page.meta.length)
46
- assert_equal(%w{
47
- http://www.drphil.com/
48
- http://www.upcase.com/
49
- http://tenderlovemaking.com/ }.sort,
50
- page.meta.map { |x| x.href.downcase }.sort)
51
- end
52
-
53
- def test_find_link
54
- page = @agent.get("http://localhost/find_link.html")
55
- assert_equal(18, page.links.length)
56
- end
57
-
58
- def test_alt_text
59
- page = @agent.get("http://localhost/alt_text.html")
60
- assert_equal(5, page.links.length)
61
- assert_equal(1, page.meta.length)
62
-
63
- assert_equal('', page.meta.first.text)
64
- assert_equal('alt text', page.link_with(:href => 'alt_text.html').text)
65
- assert_equal('', page.link_with(:href => 'no_alt_text.html').text)
66
- assert_equal('no image', page.link_with(:href => 'no_image.html').text)
67
- assert_equal('', page.link_with(:href => 'no_text.html').text)
68
- assert_equal('', page.link_with(:href => 'nil_alt_text.html').text)
69
- end
70
-
71
- def test_click_link
72
- @agent.user_agent_alias = 'Mac Safari'
73
- page = @agent.get("http://localhost/frame_test.html")
74
- link = page.link_with(:text => "Form Test")
75
- assert_not_nil(link)
76
- assert_equal('Form Test', link.text)
77
- page = @agent.click(link)
78
- assert_equal("http://localhost/form_test.html",
79
- @agent.history.last.uri.to_s)
80
- end
81
-
82
- def test_click_method
83
- page = @agent.get("http://localhost/frame_test.html")
84
- link = page.link_with(:text => "Form Test")
85
- assert_not_nil(link)
86
- assert_equal('Form Test', link.text)
87
- page = link.click
88
- assert_equal("http://localhost/form_test.html",
89
- @agent.history.last.uri.to_s)
90
- end
91
-
92
- def test_find_bold_link
93
- page = @agent.get("http://localhost/tc_links.html")
94
- link = page.links_with(:text => /Bold Dude/)
95
- assert_equal(1, link.length)
96
- assert_equal('Bold Dude', link.first.text)
97
- assert_equal [], link.first.rel
98
- assert !link.first.rel?('me')
99
- assert !link.first.rel?('nofollow')
100
-
101
- link = page.links_with(:text => 'Aaron James Patterson')
102
- assert_equal(1, link.length)
103
- assert_equal('Aaron James Patterson', link.first.text)
104
- assert_equal ['me'], link.first.rel
105
- assert link.first.rel?('me')
106
- assert !link.first.rel?('nofollow')
107
-
108
- link = page.links_with(:text => 'Aaron Patterson')
109
- assert_equal(1, link.length)
110
- assert_equal('Aaron Patterson', link.first.text)
111
- assert_equal ['me', 'nofollow'], link.first.rel
112
- assert link.first.rel?('me')
113
- assert link.first.rel?('nofollow')
114
-
115
- link = page.links_with(:text => 'Ruby Rocks!')
116
- assert_equal(1, link.length)
117
- assert_equal('Ruby Rocks!', link.first.text)
118
- end
119
-
120
- def test_link_with_encoded_space
121
- page = @agent.get("http://localhost/tc_links.html")
122
- link = page.link_with(:text => 'encoded space')
123
- page = @agent.click link
124
- end
125
-
126
- def test_link_with_space
127
- page = @agent.get("http://localhost/tc_links.html")
128
- link = page.link_with(:text => 'not encoded space')
129
- page = @agent.click link
130
- end
131
-
132
- def test_link_with_unusual_characters
133
- page = @agent.get("http://localhost/tc_links.html")
134
- link = page.link_with(:text => 'unusual characters')
135
- assert_nothing_raised { @agent.click link }
136
- end
137
-
138
- def test_links_dom_id
139
- page = @agent.get("http://localhost/tc_links.html")
140
- link = page.links_with(:dom_id => 'bold_aaron_link')
141
- link_by_id = page.links_with(:id => 'bold_aaron_link')
142
- assert_equal(1, link.length)
143
- assert_equal('Aaron Patterson', link.first.text)
144
- assert_equal(link, link_by_id)
145
- end
146
- end
@@ -1,224 +0,0 @@
1
- # coding: utf-8
2
-
3
- require 'helper'
4
- require 'cgi'
5
-
6
- class TestMechanizePage < Test::Unit::TestCase
7
-
8
- WINDOWS_1255 = <<-HTML
9
- <meta http-equiv="content-type" content="text/html; charset=windows-1255">
10
- <title>hi</title>
11
- HTML
12
-
13
- BAD = <<-HTML
14
- <meta http-equiv="content-type" content="text/html; charset=windows-1255">
15
- <title>Bia\xB3ystok</title>
16
- HTML
17
- BAD.force_encoding Encoding::BINARY if defined? Encoding
18
-
19
- SJIS_TITLE = "\x83\x65\x83\x58\x83\x67"
20
-
21
- SJIS_AFTER_TITLE = <<-HTML
22
- <title>#{SJIS_TITLE}</title>
23
- <meta http-equiv="Content-Type" content="text/html; charset=Shift_JIS">
24
- HTML
25
-
26
- SJIS_AFTER_TITLE.force_encoding Encoding::BINARY if defined? Encoding
27
-
28
- SJIS_BAD_AFTER_TITLE = <<-HTML
29
- <title>#{SJIS_TITLE}</title>
30
- <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
31
- HTML
32
-
33
- SJIS_BAD_AFTER_TITLE.force_encoding Encoding::BINARY if defined? Encoding
34
-
35
- UTF8_TITLE = 'テスト'
36
- UTF8 = <<-HTML
37
- <title>#{UTF8_TITLE}</title>
38
- <meta http-equiv="Content-Type" content="text/html; charset=Shift_JIS">
39
- HTML
40
-
41
- def setup
42
- @agent = Mechanize.new
43
- @uri = URI('http://example')
44
- @res = { 'content-type' => 'text/html' }
45
- @body = '<title>hi</title>'
46
- end
47
-
48
- def util_page body = @body, res = @res
49
- body.force_encoding Encoding::BINARY if body.respond_to? :force_encoding
50
- Mechanize::Page.new @uri, res, body, 200, @agent
51
- end
52
-
53
- def test_initialize_content_type
54
- assert Mechanize::Page.new nil, 'content-type' => 'application/xhtml+xml'
55
- assert Mechanize::Page.new nil, 'content-type' => 'text/html'
56
-
57
- e = assert_raises Mechanize::ContentTypeError do
58
- Mechanize::Page.new nil, 'content-type' => 'text/plain'
59
- end
60
-
61
- assert_equal 'text/plain', e.content_type
62
- end
63
-
64
- def test_canonical_uri
65
- page = @agent.get("http://localhost/canonical_uri.html")
66
- assert_equal(URI("http://localhost/canonical_uri"), page.canonical_uri)
67
-
68
- page = @agent.get("http://localhost/file_upload.html")
69
- assert_equal(nil, page.canonical_uri)
70
- end
71
-
72
- def test_canonical_uri_unescaped
73
- page = util_page <<-BODY
74
- <head>
75
- <link rel="canonical" href="http://example/white space"/>
76
- </head>
77
- BODY
78
-
79
- assert_equal @uri + '/white%20space', page.canonical_uri
80
- end
81
-
82
- def test_charset
83
- charset = util_page.charset 'text/html;charset=UTF-8'
84
-
85
- assert_equal 'UTF-8', charset
86
- end
87
-
88
- def test_encoding
89
- page = util_page WINDOWS_1255
90
-
91
- assert_equal 'windows-1255', page.encoding
92
- end
93
-
94
- def test_encoding_charset_after_title
95
- page = util_page SJIS_AFTER_TITLE
96
-
97
- assert_equal [], page.parser.errors
98
-
99
- assert_equal 'Shift_JIS', page.encoding
100
- end
101
-
102
- def test_encoding_charset_after_title_bad
103
- page = util_page UTF8
104
-
105
- assert_equal [], page.parser.errors
106
-
107
- assert_equal 'UTF-8', page.encoding
108
- end
109
-
110
- def test_encoding_charset_after_title_double_bad
111
- page = util_page SJIS_BAD_AFTER_TITLE
112
-
113
- assert_equal [], page.parser.errors
114
-
115
- assert_equal 'SHIFT_JIS', page.encoding
116
- end
117
-
118
- def test_encoding_charset_bad
119
- page = util_page "<title>#{UTF8_TITLE}</title>"
120
- page.encodings.replace %w[
121
- UTF-8
122
- Shift_JIS
123
- ]
124
-
125
- assert_equal [], page.parser.errors
126
-
127
- assert_equal 'UTF-8', page.encoding
128
- end
129
-
130
- def test_encoding_equals
131
- page = util_page
132
- page.meta
133
- assert page.instance_variable_get(:@meta)
134
-
135
- page.encoding = 'UTF-8'
136
-
137
- assert_nil page.instance_variable_get(:@meta)
138
-
139
- assert_equal 'UTF-8', page.encoding
140
- assert_equal 'UTF-8', page.parser.encoding
141
- end
142
-
143
- def test_encoding_equals_before_parser
144
- # document has a bad encoding information - windows-1255
145
- page = util_page BAD
146
-
147
- # encoding is wrong, so user wants to force ISO-8859-2
148
- page.encoding = 'ISO-8859-2'
149
-
150
- assert_equal [], page.parser.errors
151
- assert_equal 'ISO-8859-2', page.encoding
152
- assert_equal 'ISO-8859-2', page.parser.encoding
153
- end
154
-
155
- def test_encoding_equals_after_parser
156
- # document has a bad encoding information - windows-1255
157
- page = util_page BAD
158
- page.parser
159
-
160
- # autodetection sets encoding to windows-1255
161
- assert_equal 'windows-1255', page.encoding
162
-
163
- # encoding is wrong, so user wants to force ISO-8859-2
164
- page.encoding = 'ISO-8859-2'
165
-
166
- assert_equal [], page.parser.errors
167
- assert_equal 'ISO-8859-2', page.encoding
168
- assert_equal 'ISO-8859-2', page.parser.encoding
169
- end
170
-
171
- def test_frames_with
172
- page = @agent.get("http://localhost/frame_test.html")
173
- assert_equal(3, page.frames.size)
174
-
175
- find_orig = page.frames.find_all { |f| f.name == 'frame1' }
176
-
177
- find1 = page.frames_with(:name => 'frame1')
178
-
179
- find_orig.zip(find1).each { |a,b|
180
- assert_equal(a, b)
181
- }
182
- end
183
-
184
- def test_title
185
- page = util_page
186
-
187
- assert_equal('hi', page.title)
188
- end
189
-
190
- def test_title_none
191
- page = util_page '' # invalid HTML
192
-
193
- assert_equal(nil, page.title)
194
- end
195
-
196
- def test_page_decoded_with_charset
197
- page = util_page @body, 'content-type' => 'text/html; charset=EUC-JP'
198
-
199
- assert_equal 'EUC-JP', page.encoding
200
- assert_equal 'EUC-JP', page.parser.encoding
201
- end
202
-
203
- def test_form
204
- page = @agent.get("http://localhost/tc_form_action.html")
205
-
206
- form = page.form(:name => 'post_form1')
207
- assert form
208
- yielded = false
209
-
210
- form = page.form(:name => 'post_form1') { |f|
211
- yielded = true
212
- assert f
213
- assert_equal(form, f)
214
- }
215
-
216
- assert yielded
217
-
218
- form_by_action = page.form(:action => '/form_post?a=b&b=c')
219
- assert form_by_action
220
- assert_equal(form, form_by_action)
221
- end
222
-
223
- end
224
-
@@ -1,67 +0,0 @@
1
- require "helper"
2
-
3
- class MetaTest < Test::Unit::TestCase
4
- Meta = Mechanize::Page::Meta
5
-
6
- #
7
- # CONTENT_REGEXP test
8
- #
9
-
10
- def test_content_regexp
11
- r = Meta::CONTENT_REGEXP
12
-
13
- assert r =~ "0; url=http://localhost:8080/path"
14
- assert_equal "0", $1
15
- assert_equal "http://localhost:8080/path", $3
16
-
17
- assert r =~ "100.001; url=http://localhost:8080/path"
18
- assert_equal "100.001", $1
19
- assert_equal "http://localhost:8080/path", $3
20
-
21
- assert r =~ "0; url='http://localhost:8080/path'"
22
- assert_equal "0", $1
23
- assert_equal "http://localhost:8080/path", $3
24
-
25
- assert r =~ "0; url=\"http://localhost:8080/path\""
26
- assert_equal "0", $1
27
- assert_equal "http://localhost:8080/path", $3
28
-
29
- assert r =~ "0; url="
30
- assert_equal "0", $1
31
- assert_equal "", $3
32
-
33
- assert r =~ "0"
34
- assert_equal "0", $1
35
- assert_equal nil, $3
36
-
37
- assert r =~ " 0; "
38
- assert_equal "0", $1
39
- assert_equal nil, $3
40
-
41
- assert r =~ "0; UrL=http://localhost:8080/path"
42
- assert_equal "0", $1
43
- assert_equal "http://localhost:8080/path", $3
44
- end
45
-
46
- #
47
- # parse test
48
- #
49
-
50
- def test_parse_documentation
51
- uri = URI.parse('http://current.com/here/')
52
-
53
- assert_equal ['5', 'http://example.com/'], Meta.parse("5;url=http://example.com/", uri)
54
- assert_equal ['5', 'http://current.com/here/test'], Meta.parse("5;url=test", uri)
55
- assert_equal ['5', 'http://current.com/test'], Meta.parse("5;url=/test", uri)
56
- assert_equal ['5', 'http://current.com/here/'], Meta.parse("5;url=", uri)
57
- assert_equal ['5', 'http://current.com/here/'], Meta.parse("5", uri)
58
- assert_equal nil, Meta.parse("invalid content", uri)
59
- end
60
-
61
- def test_parse_returns_nil_if_no_delay_and_url_can_be_parsed
62
- uri = URI.parse('http://current.com/')
63
-
64
- assert_equal nil, Meta.parse("invalid content", uri)
65
- assert_equal nil, Meta.parse("invalid content", uri) {|delay, url| 'not nil' }
66
- end
67
- end