mechanize 2.0.pre.1 → 2.0.pre.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (50) hide show
  1. data.tar.gz.sig +2 -2
  2. data/CHANGELOG.rdoc +24 -2
  3. data/Manifest.txt +15 -19
  4. data/Rakefile +6 -3
  5. data/lib/mechanize.rb +168 -28
  6. data/lib/mechanize/form.rb +14 -2
  7. data/lib/mechanize/page.rb +43 -14
  8. data/lib/mechanize/page/link.rb +10 -0
  9. data/lib/mechanize/redirect_not_get_or_head_error.rb +2 -1
  10. data/lib/mechanize/robots_disallowed_error.rb +29 -0
  11. data/lib/mechanize/util.rb +30 -6
  12. data/test/helper.rb +6 -0
  13. data/test/htdocs/canonical_uri.html +9 -0
  14. data/test/htdocs/nofollow.html +9 -0
  15. data/test/htdocs/noindex.html +9 -0
  16. data/test/htdocs/norobots.html +8 -0
  17. data/test/htdocs/rel_nofollow.html +8 -0
  18. data/test/htdocs/robots.html +8 -0
  19. data/test/htdocs/robots.txt +2 -0
  20. data/test/htdocs/tc_links.html +3 -3
  21. data/test/test_links.rb +9 -0
  22. data/test/test_mechanize.rb +617 -2
  23. data/test/{test_forms.rb → test_mechanize_form.rb} +45 -1
  24. data/test/test_mechanize_form_check_box.rb +37 -0
  25. data/test/test_mechanize_form_encoding.rb +118 -0
  26. data/test/{test_field_precedence.rb → test_mechanize_form_field.rb} +4 -16
  27. data/test/test_mechanize_page.rb +60 -1
  28. data/test/test_mechanize_redirect_not_get_or_head_error.rb +18 -0
  29. data/test/test_mechanize_subclass.rb +22 -0
  30. data/test/test_mechanize_util.rb +87 -2
  31. data/test/test_robots.rb +87 -0
  32. metadata +51 -43
  33. metadata.gz.sig +0 -0
  34. data/lib/mechanize/uri_resolver.rb +0 -82
  35. data/test/test_authenticate.rb +0 -71
  36. data/test/test_bad_links.rb +0 -25
  37. data/test/test_blank_form.rb +0 -16
  38. data/test/test_checkboxes.rb +0 -61
  39. data/test/test_content_type.rb +0 -13
  40. data/test/test_encoded_links.rb +0 -20
  41. data/test/test_errors.rb +0 -49
  42. data/test/test_follow_meta.rb +0 -119
  43. data/test/test_get_headers.rb +0 -52
  44. data/test/test_gzipping.rb +0 -22
  45. data/test/test_hash_api.rb +0 -45
  46. data/test/test_mech.rb +0 -283
  47. data/test/test_mech_proxy.rb +0 -16
  48. data/test/test_mechanize_uri_resolver.rb +0 -29
  49. data/test/test_redirect_verb_handling.rb +0 -49
  50. data/test/test_subclass.rb +0 -30
@@ -29,8 +29,17 @@ class Mechanize
29
29
  attr_accessor :method, :action, :name
30
30
 
31
31
  attr_reader :fields, :buttons, :file_uploads, :radiobuttons, :checkboxes
32
+
33
+ # Content-Type for form data (i.e. application/x-www-form-urlencoded)
32
34
  attr_accessor :enctype
33
35
 
36
+ # Character encoding of form data (i.e. UTF-8)
37
+ attr_accessor :encoding
38
+
39
+ # When true, character encoding errors will never be never raised on form
40
+ # submission. Default is false
41
+ attr_accessor :ignore_encoding_error
42
+
34
43
  alias :elements :fields
35
44
 
36
45
  attr_reader :form_node
@@ -46,6 +55,8 @@ class Mechanize
46
55
  @page = page
47
56
  @mech = mech
48
57
 
58
+ @encoding = node['accept-charset'] || (page && page.encoding) || nil
59
+ @ignore_encoding_error = false
49
60
  parse
50
61
  end
51
62
 
@@ -173,7 +184,7 @@ class Mechanize
173
184
  private :proc_query
174
185
 
175
186
  def from_native_charset str
176
- Util.from_native_charset(str,page && page.encoding)
187
+ Util.from_native_charset(str, encoding, @ignore_encoding_error, @mech && @mech.log)
177
188
  end
178
189
  private :from_native_charset
179
190
 
@@ -182,6 +193,7 @@ class Mechanize
182
193
  # be used to create a query string for this form.
183
194
  def build_query(buttons = [])
184
195
  query = []
196
+ @mech.log.info("form encoding: #{encoding}") if @mech && @mech.log
185
197
 
186
198
  (fields + checkboxes).sort.each do |f|
187
199
  case f
@@ -395,7 +407,7 @@ class Mechanize
395
407
  # Find all textarea tags
396
408
  form_node.search('textarea').each do |node|
397
409
  next unless node['name']
398
- @fields << Field.new(node, node.inner_text)
410
+ @fields << Textarea.new(node, node.inner_text)
399
411
  end
400
412
 
401
413
  # Find all select tags
@@ -1,34 +1,34 @@
1
- # = Synopsis
1
+ ##
2
2
  # This class encapsulates an HTML page. If Mechanize finds a content
3
3
  # type of 'text/html', this class will be instantiated and returned.
4
4
  #
5
- # == Example
6
- # require 'rubygems'
7
- # require 'mechanize'
5
+ # Example:
8
6
  #
9
- # agent = Mechanize.new
10
- # agent.get('http://google.com/').class #=> Mechanize::Page
7
+ # require 'mechanize'
11
8
  #
9
+ # agent = Mechanize.new
10
+ # agent.get('http://google.com/').class # => Mechanize::Page
11
+
12
12
  class Mechanize::Page < Mechanize::File
13
13
  extend Forwardable
14
14
  extend Mechanize::ElementMatcher
15
15
 
16
16
  attr_accessor :mech
17
17
 
18
+ ##
19
+ # Possible encodings for this page based on HTTP headers and meta elements
20
+
21
+ attr_reader :encodings
22
+
18
23
  def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
19
24
  raise Mechanize::ContentTypeError, response['content-type'] unless
20
25
  response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i
21
26
 
22
- @bases = nil
23
27
  @encoding = nil
24
28
  @encodings = [nil]
25
- @forms = nil
26
- @frames = nil
27
- @iframes = nil
28
- @links = nil
29
29
  @mech = mech
30
- @meta = nil
31
- @parser = nil
30
+
31
+ reset
32
32
 
33
33
  @encodings << Mechanize::Util.detect_charset(body) if body
34
34
 
@@ -71,6 +71,8 @@ class Mechanize::Page < Mechanize::File
71
71
  end
72
72
 
73
73
  def encoding=(encoding)
74
+ reset
75
+
74
76
  @encoding = encoding
75
77
 
76
78
  if @parser
@@ -101,7 +103,9 @@ class Mechanize::Page < Mechanize::File
101
103
  break if @parser.errors.empty?
102
104
 
103
105
  break unless @parser.errors.any? do |error|
104
- error.message =~ /(indicate encoding)|(Invalid char)/
106
+ error.message =~ /(indicate\ encoding)|
107
+ (Invalid\ char)|
108
+ (input\ conversion failed)/x
105
109
  end
106
110
  end
107
111
  end
@@ -111,6 +115,31 @@ class Mechanize::Page < Mechanize::File
111
115
 
112
116
  alias :root :parser
113
117
 
118
+ def reset
119
+ @bases = nil
120
+ @forms = nil
121
+ @frames = nil
122
+ @iframes = nil
123
+ @links = nil
124
+ @labels = nil
125
+ @labels_hash = nil
126
+ @meta = nil
127
+ @parser = nil
128
+ @title = nil
129
+ end
130
+
131
+ # Return the canonical URI for the page if there is a link tag
132
+ # with href="canonical".
133
+ def canonical_uri
134
+ link = at('link[@rel="canonical"][@href]')
135
+ return unless link
136
+ href = link['href']
137
+
138
+ URI href
139
+ rescue URI::InvalidURIError
140
+ URI Mechanize::Util.uri_escape href
141
+ end
142
+
114
143
  # Get the content type
115
144
  def content_type
116
145
  response['content-type']
@@ -39,6 +39,16 @@ class Mechanize
39
39
  @href && URI.parse(WEBrick::HTTPUtils.escape(@href))
40
40
  end
41
41
 
42
+ # A list of words in the rel attribute, all lower-cased.
43
+ def rel
44
+ @rel ||= (val = attributes['rel']) ? val.downcase.split(' ') : []
45
+ end
46
+
47
+ # Test if the rel attribute includes +kind+.
48
+ def rel?(kind)
49
+ rel.include?(kind)
50
+ end
51
+
42
52
  # Click on this link
43
53
  def click
44
54
  @mech.click self
@@ -11,7 +11,8 @@ class Mechanize
11
11
  end
12
12
 
13
13
  def to_s
14
- "#{@response_code} redirect received after a #{@verb} request"
14
+ method = @verb.to_s.upcase
15
+ "#{@response_code} redirect received after a #{method} request"
15
16
  end
16
17
  alias :inspect :to_s
17
18
  end
@@ -0,0 +1,29 @@
1
+ class Mechanize
2
+ # Exception that is raised when an access to a resource is
3
+ # disallowed by robots.txt or by HTML document itself.
4
+ class RobotsDisallowedError < Mechanize::Error
5
+ def initialize(url)
6
+ if url.is_a?(URI)
7
+ @url = url.to_s
8
+ @uri = url
9
+ else
10
+ @url = url.to_s
11
+ end
12
+ end
13
+
14
+ # Returns the URL (string) of the resource that caused this error.
15
+ attr_reader :url
16
+
17
+ # Returns the URL (URI object) of the resource that caused this
18
+ # error. URI::InvalidURIError may be raised if the URL happens to
19
+ # be invalid or not understood by the URI library.
20
+ def uri
21
+ @uri ||= URI.parse(url)
22
+ end
23
+
24
+ def to_s
25
+ "Robots access is disallowed for URL: #{url}"
26
+ end
27
+ alias :inspect :to_s
28
+ end
29
+ end
@@ -7,6 +7,16 @@ class Mechanize::Util
7
7
  :SJIS => "SHIFT_JIS",
8
8
  :UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"}
9
9
 
10
+ # true if RUBY_VERSION is 1.9.0 or later
11
+ NEW_RUBY_ENCODING = RUBY_VERSION >= '1.9.0'
12
+
13
+ # contains encoding error classes to raise
14
+ ENCODING_ERRORS = if NEW_RUBY_ENCODING
15
+ [EncodingError]
16
+ else
17
+ [Iconv::InvalidEncoding, Iconv::IllegalSequence]
18
+ end
19
+
10
20
  def self.build_query_string(parameters, enc=nil)
11
21
  parameters.map { |k,v|
12
22
  # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
@@ -15,6 +25,8 @@ class Mechanize::Util
15
25
  end
16
26
 
17
27
  def self.to_native_charset(s, code=nil)
28
+ location = Gem.location_of_caller.join ':'
29
+ warn "#{location}: Mechanize::Util::to_native_charset is deprecated and will be removed October 2011"
18
30
  if Mechanize.html_parser == Nokogiri::HTML
19
31
  return unless s
20
32
  code ||= detect_charset(s)
@@ -24,20 +36,32 @@ class Mechanize::Util
24
36
  end
25
37
  end
26
38
 
27
- def self.from_native_charset(s, code)
39
+ # Converts string +s+ from +code+ to UTF-8.
40
+ def self.from_native_charset(s, code, ignore_encoding_error=false, log=nil)
28
41
  return s unless s && code
29
42
  return s unless Mechanize.html_parser == Nokogiri::HTML
30
43
 
31
- if RUBY_VERSION < '1.9.2'
32
- begin
33
- Iconv.iconv(code.to_s, "UTF-8", s).join("")
34
- rescue Iconv::InvalidEncoding, Iconv::IllegalSequence
44
+ begin
45
+ encode_to(code, s)
46
+ rescue *ENCODING_ERRORS => ex
47
+ log.debug("from_native_charset: #{ex.class}: form encoding: #{code.inspect} string: #{s}") if log
48
+ if ignore_encoding_error
35
49
  s
50
+ else
51
+ raise
36
52
  end
53
+ end
54
+ end
55
+
56
+ # inner convert method of Util.from_native_charset
57
+ def self.encode_to(encoding, str)
58
+ if NEW_RUBY_ENCODING
59
+ str.encode(encoding)
37
60
  else
38
- s.encode("UTF-8") rescue s
61
+ Iconv.conv(encoding.to_s, "UTF-8", str)
39
62
  end
40
63
  end
64
+ private_class_method :encode_to
41
65
 
42
66
  def self.html_unescape(s)
43
67
  return s unless s
@@ -94,6 +94,12 @@ class Net::HTTP
94
94
  end
95
95
 
96
96
  res.body = PAGE_CACHE[filename]
97
+ case filename
98
+ when /\.txt$/
99
+ res['Content-Type'] = 'text/plain'
100
+ when /\.jpg$/
101
+ res['Content-Type'] = 'image/jpeg'
102
+ end
97
103
  end
98
104
 
99
105
  res['Content-Type'] ||= 'text/html'
@@ -0,0 +1,9 @@
1
+ <html>
2
+ <head>
3
+ <link rel="start" title="site top" href="/" />
4
+ <link rel="canonical" href="http://localhost/canonical_uri" />
5
+ </head>
6
+ <body>
7
+ test
8
+ </body>
9
+ </html>
@@ -0,0 +1,9 @@
1
+ <html>
2
+ <head>
3
+ <title>nofollow test</title>
4
+ <meta name="ROBOTS" content="nofollow">
5
+ </head>
6
+ <body>
7
+ <p>Do not follow <a href="/robots.html">this</a> or <a href="/">this</a>!</p>
8
+ </body>
9
+ </html>
@@ -0,0 +1,9 @@
1
+ <html>
2
+ <head>
3
+ <title>noindex test</title>
4
+ <meta name="ROBOTS" content="noarchive,noindex">
5
+ </head>
6
+ <body>
7
+ <p>Do not index nor archive this page!</p>
8
+ </body>
9
+ </html>
@@ -0,0 +1,8 @@
1
+ <html>
2
+ <head>
3
+ <title>No Robots!</title>
4
+ </head>
5
+ <body>
6
+ <p>Go home, robot!</p>
7
+ </body>
8
+ </html>
@@ -0,0 +1,8 @@
1
+ <html>
2
+ <head>
3
+ <title>rel_nofollow test</title>
4
+ </head>
5
+ <body>
6
+ <p>You can follow <a href="/robots.html">this link</a> but not <a href="/" rel="me nofollow">this</a>!</p>
7
+ </body>
8
+ </html>
@@ -0,0 +1,8 @@
1
+ <html>
2
+ <head>
3
+ <title>Welcome!</title>
4
+ </head>
5
+ <body>
6
+ <p>Welcome, robot!</p>
7
+ </body>
8
+ </html>
@@ -0,0 +1,2 @@
1
+ User-Agent: *
2
+ Disallow: /norobots
@@ -1,9 +1,9 @@
1
1
  <html>
2
2
  <body>
3
3
  <a href="thing.html"><b>Bold Dude</b></a>
4
- <a href="thing.html">Dude</a>
5
- <a href="thing.html">Aaron <b>James</b> Patterson</a>
6
- <a href="thing.html" id="bold_aaron_link"><b>Aaron</b> Patterson</a>
4
+ <a href="thing.html" rel="nofollow">Dude</a>
5
+ <a href="thing.html" rel="me">Aaron <b>James</b> Patterson</a>
6
+ <a href="thing.html" id="bold_aaron_link" rel="me nofollow"><b>Aaron</b> Patterson</a>
7
7
  <a href="thing.html">Ruby <b>Rocks!</b></a>
8
8
  <!-- Testing a bug with escaped stuff in links:
9
9
  http://rubyforge.org/pipermail/mechanize-users/2006-September/000002.html
@@ -94,14 +94,23 @@ class LinksMechTest < Test::Unit::TestCase
94
94
  link = page.links_with(:text => /Bold Dude/)
95
95
  assert_equal(1, link.length)
96
96
  assert_equal('Bold Dude', link.first.text)
97
+ assert_equal [], link.first.rel
98
+ assert !link.first.rel?('me')
99
+ assert !link.first.rel?('nofollow')
97
100
 
98
101
  link = page.links_with(:text => 'Aaron James Patterson')
99
102
  assert_equal(1, link.length)
100
103
  assert_equal('Aaron James Patterson', link.first.text)
104
+ assert_equal ['me'], link.first.rel
105
+ assert link.first.rel?('me')
106
+ assert !link.first.rel?('nofollow')
101
107
 
102
108
  link = page.links_with(:text => 'Aaron Patterson')
103
109
  assert_equal(1, link.length)
104
110
  assert_equal('Aaron Patterson', link.first.text)
111
+ assert_equal ['me', 'nofollow'], link.first.rel
112
+ assert link.first.rel?('me')
113
+ assert link.first.rel?('nofollow')
105
114
 
106
115
  link = page.links_with(:text => 'Ruby Rocks!')
107
116
  assert_equal(1, link.length)
@@ -32,6 +32,31 @@ class TestMechanize < Test::Unit::TestCase
32
32
  end
33
33
  end
34
34
 
35
+ def test_back
36
+ 0.upto(5) do |i|
37
+ assert_equal(i, @agent.history.size)
38
+ @agent.get("http://localhost/")
39
+ end
40
+ @agent.get("http://localhost/form_test.html")
41
+
42
+ assert_equal("http://localhost/form_test.html",
43
+ @agent.history.last.uri.to_s)
44
+ assert_equal("http://localhost/",
45
+ @agent.history[-2].uri.to_s)
46
+
47
+ assert_equal(7, @agent.history.size)
48
+ @agent.back
49
+ assert_equal(6, @agent.history.size)
50
+ assert_equal("http://localhost/",
51
+ @agent.history.last.uri.to_s)
52
+ end
53
+
54
+ def test_basic_auth
55
+ @agent.basic_auth('user', 'pass')
56
+ page = @agent.get("http://localhost/basic_auth")
57
+ assert_equal('You are authenticated', page.body)
58
+ end
59
+
35
60
  def test_cert_key_file
36
61
  Tempfile.open 'key' do |key|
37
62
  Tempfile.open 'cert' do |cert|
@@ -61,6 +86,80 @@ class TestMechanize < Test::Unit::TestCase
61
86
  assert_equal CERT, agent.http.certificate
62
87
  end
63
88
 
89
+ def test_click
90
+ @agent.user_agent_alias = 'Mac Safari'
91
+ page = @agent.get("http://localhost/frame_test.html")
92
+ link = page.link_with(:text => "Form Test")
93
+ assert_not_nil(link)
94
+ page = @agent.click(link)
95
+ assert_equal("http://localhost/form_test.html",
96
+ @agent.history.last.uri.to_s)
97
+ end
98
+
99
+ def test_click_frame_hpricot_style
100
+ page = @agent.get("http://localhost/frame_test.html")
101
+
102
+ link = (page/"//frame[@name='frame2']").first
103
+ assert_not_nil(link)
104
+ page = @agent.click(link)
105
+ assert_equal("http://localhost/form_test.html",
106
+ @agent.history.last.uri.to_s)
107
+ end
108
+
109
+ def test_click_hpricot_style # HACK move to test_divide in Page
110
+ page = @agent.get("http://localhost/frame_test.html")
111
+
112
+ link = (page/"//a[@class='bar']").first
113
+ assert_not_nil(link)
114
+
115
+ page = @agent.click(link)
116
+
117
+ assert_equal("http://localhost/form_test.html",
118
+ @agent.history.last.uri.to_s)
119
+ end
120
+
121
+ def test_click_link_hpricot_style # HACK move to test_search in Page
122
+ page = @agent.get("http://localhost/tc_encoded_links.html")
123
+
124
+ page = @agent.click(page.search('a').first)
125
+
126
+ assert_equal("http://localhost/form_post?a=b&b=c", page.uri.to_s)
127
+ end
128
+
129
+ def test_click_link_query
130
+ page = @agent.get("http://localhost/tc_encoded_links.html")
131
+ link = page.links.first
132
+ assert_equal('/form_post?a=b&b=c', link.href)
133
+
134
+ page = @agent.click(link)
135
+
136
+ assert_equal("http://localhost/form_post?a=b&b=c", page.uri.to_s)
137
+ end
138
+
139
+ def test_click_link_space
140
+ page = @agent.get("http://localhost/tc_bad_links.html")
141
+
142
+ @agent.click page.links.first
143
+
144
+ assert_match(/alt_text.html$/, @agent.history.last.uri.to_s)
145
+ assert_equal(2, @agent.history.length)
146
+ end
147
+
148
+ def test_click_more
149
+ @agent.get 'http://localhost/test_click.html'
150
+ @agent.click 'A Button'
151
+ assert_equal 'http://localhost/frame_test.html?words=nil',
152
+ @agent.page.uri.to_s
153
+ @agent.back
154
+ @agent.click 'A Link'
155
+ assert_equal 'http://localhost/index.html',
156
+ @agent.page.uri.to_s
157
+ @agent.back
158
+ @agent.click @agent.page.link_with(:text => 'A Link')
159
+ assert_equal 'http://localhost/index.html',
160
+ @agent.page.uri.to_s
161
+ end
162
+
64
163
  def test_connection_for_file
65
164
  uri = URI.parse 'file:///nonexistent'
66
165
  conn = @agent.connection_for uri
@@ -74,6 +173,14 @@ class TestMechanize < Test::Unit::TestCase
74
173
  assert_equal @agent.http, conn
75
174
  end
76
175
 
176
+ def test_delete_redirect
177
+ page = @agent.delete('http://localhost/redirect')
178
+
179
+ assert_equal(page.uri.to_s, 'http://localhost/verb')
180
+
181
+ assert_equal 'GET', page.header['X-Request-Method']
182
+ end
183
+
77
184
  #def test_download
78
185
  # Dir.mktmpdir do |dir|
79
186
  # file = "#{dir}/download"
@@ -129,12 +236,293 @@ class TestMechanize < Test::Unit::TestCase
129
236
  uri = URI.parse 'file:///nonexistent'
130
237
 
131
238
  e = assert_raises Mechanize::ResponseCodeError do
132
- page = @agent.send :fetch_page, uri
239
+ @agent.send :fetch_page, uri
133
240
  end
134
241
 
135
242
  assert_equal '404 => Net::HTTPNotFound', e.message
136
243
  end
137
244
 
245
+ def test_fetch_page_post_connect_hook
246
+ response = nil
247
+ @agent.post_connect_hooks << lambda { |_, res|
248
+ response = res
249
+ }
250
+
251
+ @agent.get('http://localhost/')
252
+ assert(response)
253
+ end
254
+
255
+ def test_get
256
+ page = @agent.get('http://localhost', { :q => 'h' }, 'http://example',
257
+ { 'X-H' => 'v' })
258
+
259
+ assert_equal 'http://localhost/?q=h', page.uri.to_s
260
+ end
261
+
262
+ def test_get_HTTP
263
+ page = @agent.get('HTTP://localhost/', { :q => 'hello' })
264
+ assert_equal('HTTP://localhost/?q=hello', page.uri.to_s)
265
+ end
266
+
267
+ def test_get_anchor
268
+ page = @agent.get('http://localhost/?foo=bar&#34;')
269
+ assert_equal('http://localhost/?foo=bar%22', page.uri.to_s)
270
+ end
271
+
272
+ def test_get_bad_url
273
+ assert_raise ArgumentError do
274
+ @agent.get('/foo.html')
275
+ end
276
+ end
277
+
278
+ def test_get_basic_auth_bad
279
+ @agent.basic_auth('aaron', 'aaron')
280
+
281
+ e = assert_raises Mechanize::ResponseCodeError do
282
+ @agent.get("http://localhost/basic_auth")
283
+ end
284
+
285
+ assert_equal("401", e.response_code)
286
+ end
287
+
288
+ def test_get_basic_auth_none
289
+ e = assert_raises Mechanize::ResponseCodeError do
290
+ @agent.get("http://localhost/basic_auth")
291
+ end
292
+
293
+ assert_equal("401", e.response_code)
294
+ end
295
+
296
+ def test_get_digest_auth
297
+ block_called = false
298
+
299
+ @agent.basic_auth('user', 'pass')
300
+
301
+ @agent.pre_connect_hooks << lambda { |_, request|
302
+ block_called = true
303
+ request.to_hash.each do |k,v|
304
+ assert_equal(1, v.length)
305
+ end
306
+ }
307
+
308
+ page = @agent.get("http://localhost/digest_auth")
309
+ assert_equal('You are authenticated', page.body)
310
+ assert block_called
311
+ end
312
+
313
+ def test_get_file
314
+ page = @agent.get("http://localhost/frame_test.html")
315
+ content_length = page.header['Content-Length']
316
+ page_as_string = @agent.get_file("http://localhost/frame_test.html")
317
+ assert_equal(content_length.to_i, page_as_string.length.to_i)
318
+ end
319
+
320
+ def test_get_follow_meta_refresh
321
+ @agent.follow_meta_refresh = true
322
+
323
+ page = @agent.get('http://localhost/tc_follow_meta.html')
324
+
325
+ assert_equal(2, @agent.history.length)
326
+
327
+ assert_equal('http://localhost/tc_follow_meta.html',
328
+ @agent.history.first.uri.to_s)
329
+ assert_equal('http://localhost/index.html', page.uri.to_s)
330
+ assert_equal('http://localhost/index.html', @agent.history.last.uri.to_s)
331
+ end
332
+
333
+ def test_get_follow_meta_refresh_disabled
334
+ page = @agent.get('http://localhost/tc_follow_meta.html')
335
+ assert_equal('http://localhost/tc_follow_meta.html', page.uri.to_s)
336
+ assert_equal(1, page.meta.length)
337
+ end
338
+
339
+ def test_get_follow_meta_refresh_empty_url
340
+ @agent.follow_meta_refresh = true
341
+
342
+ page = @agent.get('http://localhost/refresh_with_empty_url')
343
+
344
+ assert_equal(3, @agent.history.length)
345
+ assert_equal('http://localhost/refresh_with_empty_url',
346
+ @agent.history[0].uri.to_s)
347
+ assert_equal('http://localhost/refresh_with_empty_url',
348
+ @agent.history[1].uri.to_s)
349
+ assert_equal('http://localhost/index.html', page.uri.to_s)
350
+ assert_equal('http://localhost/index.html', @agent.history.last.uri.to_s)
351
+ end
352
+
353
+ def test_get_follow_meta_refresh_in_body
354
+ @agent.follow_meta_refresh = true
355
+ requests = []
356
+ @agent.pre_connect_hooks << lambda { |_, request|
357
+ requests << request
358
+ }
359
+
360
+ @agent.get('http://localhost/tc_meta_in_body.html')
361
+ assert_equal 1, requests.length
362
+ end
363
+
364
+ def test_get_follow_meta_refresh_no_url
365
+ @agent.follow_meta_refresh = true
366
+
367
+ page = @agent.get('http://localhost/refresh_without_url')
368
+
369
+ assert_equal(3, @agent.history.length)
370
+ assert_equal('http://localhost/refresh_without_url',
371
+ @agent.history[0].uri.to_s)
372
+ assert_equal('http://localhost/refresh_without_url',
373
+ @agent.history[1].uri.to_s)
374
+ assert_equal('http://localhost/index.html', page.uri.to_s)
375
+ assert_equal('http://localhost/index.html', @agent.history.last.uri.to_s)
376
+ end
377
+
378
+ def test_get_follow_meta_refresh_referer_not_sent
379
+ @agent.follow_meta_refresh = true
380
+
381
+ requests = []
382
+
383
+ @agent.pre_connect_hooks << lambda { |_, request|
384
+ requests << request
385
+ }
386
+
387
+ @agent.get('http://localhost/tc_follow_meta.html')
388
+
389
+ assert_equal 2, @agent.history.length
390
+ assert_nil requests.last['referer']
391
+ end
392
+
393
+ def test_get_gzip
394
+ page = @agent.get("http://localhost/gzip?file=index.html")
395
+
396
+ assert_kind_of(Mechanize::Page, page)
397
+
398
+ assert_match('Hello World', page.body)
399
+ end
400
+
401
+ def test_get_http_refresh
402
+ @agent.follow_meta_refresh = true
403
+ page = @agent.get('http://localhost/http_refresh?refresh_time=0')
404
+ assert_equal('http://localhost/index.html', page.uri.to_s)
405
+ assert_equal(2, @agent.history.length)
406
+ end
407
+
408
+ def test_get_http_refresh_delay
409
+ @agent.follow_meta_refresh = true
410
+ class << @agent
411
+ attr_accessor :slept
412
+ def sleep *args
413
+ @slept = args
414
+ end
415
+ end
416
+
417
+ @agent.get('http://localhost/http_refresh?refresh_time=1')
418
+ assert_equal [1], @agent.slept
419
+ end
420
+
421
+ def test_get_http_refresh_disabled
422
+ page = @agent.get('http://localhost/http_refresh?refresh_time=0')
423
+ assert_equal('http://localhost/http_refresh?refresh_time=0', page.uri.to_s)
424
+ end
425
+
426
+ def test_get_kcode
427
+ $KCODE = 'u'
428
+ page = @agent.get("http://localhost/?a=#{[0xd6].pack('U')}")
429
+ assert_not_nil(page)
430
+ assert_equal('http://localhost/?a=%D6', page.uri.to_s)
431
+ $KCODE = 'NONE'
432
+ end unless RUBY_VERSION >= '1.9.0'
433
+
434
+ def test_get_query
435
+ page = @agent.get('http://localhost/', { :q => 'hello' })
436
+ assert_equal('http://localhost/?q=hello', page.uri.to_s)
437
+ end
438
+
439
+ def test_get_redirect
440
+ page = @agent.get('http://localhost/redirect')
441
+
442
+ assert_equal(page.uri.to_s, 'http://localhost/verb')
443
+
444
+ assert_equal 'GET', page.header['X-Request-Method']
445
+ end
446
+
447
+ def test_get_redirect_found
448
+ page = @agent.get('http://localhost/response_code?code=302&ct=test/xml')
449
+
450
+ assert_equal('http://localhost/index.html', page.uri.to_s)
451
+
452
+ assert_equal(2, @agent.history.length)
453
+ end
454
+
455
+ def test_get_redirect_infinite
456
+ assert_raises(Mechanize::RedirectLimitReachedError) {
457
+ @agent.get('http://localhost/infinite_refresh')
458
+ }
459
+ end
460
+
461
+ def test_get_referer
462
+ request = nil
463
+ @agent.pre_connect_hooks << lambda { |_, req|
464
+ request = req
465
+ }
466
+
467
+ @agent.get('http://localhost/', [], 'http://tenderlovemaking.com/')
468
+ assert_equal 'http://tenderlovemaking.com/', request['Referer']
469
+ end
470
+
471
+ def test_get_referer_file
472
+ assert_nothing_raised do
473
+ @agent.get('http://localhost', [], Mechanize::File.new(URI.parse('http://tenderlovemaking.com/crossdomain.xml')))
474
+ end
475
+
476
+ # HACK no assertion of behavior
477
+ end
478
+
479
+ def test_get_referer_none
480
+ requests = []
481
+ @agent.pre_connect_hooks << lambda { |_, request|
482
+ requests << request
483
+ }
484
+
485
+ @agent.get('http://localhost/')
486
+ @agent.get('http://localhost/')
487
+ assert_equal(2, requests.length)
488
+ requests.each do |request|
489
+ assert_nil request['referer']
490
+ end
491
+ end
492
+
493
+ def test_get_scheme_unsupported
494
+ assert_raise(Mechanize::UnsupportedSchemeError) {
495
+ @agent.get('ftp://server.com/foo.html')
496
+ }
497
+ end
498
+
499
+ def test_get_space
500
+ page = nil
501
+
502
+ page = @agent.get("http://localhost/tc_bad_links.html ")
503
+
504
+ assert_match(/tc_bad_links.html$/, @agent.history.last.uri.to_s)
505
+
506
+ assert_equal(1, @agent.history.length)
507
+ end
508
+
509
+ def test_get_tilde
510
+ page = @agent.get('http://localhost/?foo=~2')
511
+ assert_equal('http://localhost/?foo=~2', page.uri.to_s)
512
+ end
513
+
514
+ def test_get_weird
515
+ assert_nothing_raised {
516
+ @agent.get('http://localhost/?action=bing&bang=boom=1|a=|b=|c=')
517
+ }
518
+ assert_nothing_raised {
519
+ @agent.get('http://localhost/?a=b&#038;b=c&#038;c=d')
520
+ }
521
+ assert_nothing_raised {
522
+ @agent.get("http://localhost/?a=#{[0xd6].pack('U')}")
523
+ }
524
+ end
525
+
138
526
  def test_get_yield
139
527
  pages = nil
140
528
 
@@ -146,6 +534,59 @@ class TestMechanize < Test::Unit::TestCase
146
534
  assert_equal('File Upload Form', pages.title)
147
535
  end
148
536
 
537
+ def test_head_redirect
538
+ page = @agent.head('http://localhost/redirect')
539
+
540
+ assert_equal(page.uri.to_s, 'http://localhost/verb')
541
+
542
+ assert_equal 'HEAD', page.header['X-Request-Method']
543
+ end
544
+
545
+ def test_history
546
+ 0.upto(25) do |i|
547
+ assert_equal(i, @agent.history.size)
548
+ @agent.get("http://localhost/")
549
+ end
550
+ page = @agent.get("http://localhost/form_test.html")
551
+
552
+ assert_equal("http://localhost/form_test.html",
553
+ @agent.history.last.uri.to_s)
554
+ assert_equal("http://localhost/",
555
+ @agent.history[-2].uri.to_s)
556
+ assert_equal("http://localhost/",
557
+ @agent.history[-2].uri.to_s)
558
+
559
+ assert_equal(true, @agent.visited?("http://localhost/"))
560
+ assert_equal(true, @agent.visited?("/form_test.html"))
561
+ assert_equal(false, @agent.visited?("http://google.com/"))
562
+ assert_equal(true, @agent.visited?(page.links.first))
563
+
564
+ end
565
+
566
+ def test_history_order
567
+ @agent.max_history = 2
568
+ assert_equal(0, @agent.history.length)
569
+
570
+ @agent.get('http://localhost/form_test.html')
571
+ assert_equal(1, @agent.history.length)
572
+
573
+ @agent.get('http://localhost/empty_form.html')
574
+ assert_equal(2, @agent.history.length)
575
+
576
+ @agent.get('http://localhost/tc_checkboxes.html')
577
+ assert_equal(2, @agent.history.length)
578
+ assert_equal('http://localhost/empty_form.html', @agent.history[0].uri.to_s)
579
+ assert_equal('http://localhost/tc_checkboxes.html',
580
+ @agent.history[1].uri.to_s)
581
+ end
582
+
583
+ def test_html_parser_equals
584
+ @agent.html_parser = {}
585
+ assert_raises(NoMethodError) {
586
+ @agent.get('http://localhost/?foo=~2').links
587
+ }
588
+ end
589
+
149
590
  def test_http_request_file
150
591
  uri = URI.parse 'file:///nonexistent'
151
592
  request = @agent.http_request uri, :get
@@ -168,6 +609,47 @@ class TestMechanize < Test::Unit::TestCase
168
609
  assert_equal '/', request.path
169
610
  end
170
611
 
612
+ def test_max_history_equals
613
+ @agent.max_history = 10
614
+ 0.upto(10) do |i|
615
+ assert_equal(i, @agent.history.size)
616
+ @agent.get("http://localhost/")
617
+ end
618
+
619
+ 0.upto(10) do |i|
620
+ assert_equal(10, @agent.history.size)
621
+ @agent.get("http://localhost/")
622
+ end
623
+ end
624
+
625
+ def test_post_basic_auth
626
+ class << @agent
627
+ alias :old_fetch_page :fetch_page
628
+ attr_accessor :requests
629
+ def fetch_page(uri, method, *args)
630
+ @requests ||= []
631
+ x = old_fetch_page(uri, method, *args)
632
+ @requests << method
633
+ x
634
+ end
635
+ end
636
+ @agent.basic_auth('user', 'pass')
637
+ page = @agent.post("http://localhost/basic_auth")
638
+ assert_equal('You are authenticated', page.body)
639
+ assert_equal(2, @agent.requests.length)
640
+ r1 = @agent.requests[0]
641
+ r2 = @agent.requests[1]
642
+ assert_equal(r1, r2)
643
+ end
644
+
645
+ def test_post_redirect
646
+ page = @agent.post('http://localhost/redirect')
647
+
648
+ assert_equal(page.uri.to_s, 'http://localhost/verb')
649
+
650
+ assert_equal 'GET', page.header['X-Request-Method']
651
+ end
652
+
171
653
  def test_post_connect
172
654
  @agent.post_connect_hooks << proc { |agent, response|
173
655
  assert_equal @agent, agent
@@ -192,6 +674,14 @@ class TestMechanize < Test::Unit::TestCase
192
674
  end
193
675
  end
194
676
 
677
+ def test_put_redirect
678
+ page = @agent.put('http://localhost/redirect', 'foo')
679
+
680
+ assert_equal(page.uri.to_s, 'http://localhost/verb')
681
+
682
+ assert_equal 'GET', page.header['X-Request-Method']
683
+ end
684
+
195
685
  def test_request_cookies
196
686
  uri = URI.parse 'http://host.example.com'
197
687
  Mechanize::Cookie.parse uri, 'hello=world domain=.example.com' do |cookie|
@@ -349,6 +839,22 @@ class TestMechanize < Test::Unit::TestCase
349
839
  assert_match %r%Ruby/#{ruby_version}%, @req['user-agent']
350
840
  end
351
841
 
842
+ def test_resolve_bad_uri
843
+ e = assert_raises ArgumentError do
844
+ @agent.resolve 'google'
845
+ end
846
+
847
+ assert_equal 'absolute URL needed (not google)', e.message
848
+ end
849
+
850
+ def test_resolve_utf8
851
+ uri = 'http://example?q=ü'
852
+
853
+ resolved = @agent.resolve uri
854
+
855
+ assert_equal '/?q=%C3%BC', resolved.request_uri
856
+ end
857
+
352
858
  def test_resolve_parameters_body
353
859
  input_params = { :q => 'hello' }
354
860
 
@@ -507,7 +1013,7 @@ class TestMechanize < Test::Unit::TestCase
507
1013
  end
508
1014
 
509
1015
  def test_response_read_encoding_gzip
510
- def @res.read_body()
1016
+ def @res.read_body
511
1017
  yield "\037\213\b\0002\002\225M\000\003"
512
1018
  yield "+H,*\001\000\306p\017I\004\000\000\000"
513
1019
  end
@@ -683,5 +1189,114 @@ class TestMechanize < Test::Unit::TestCase
683
1189
  assert_equal 'UTF-8', page.encoding
684
1190
  end
685
1191
 
1192
+ def test_set_proxy
1193
+ @agent.set_proxy('www.example.com', 9001, 'joe', 'lol')
1194
+
1195
+ assert_equal(@agent.http.proxy_uri.host, 'www.example.com')
1196
+ assert_equal(@agent.http.proxy_uri.port, 9001)
1197
+ assert_equal(@agent.http.proxy_uri.user, 'joe')
1198
+ assert_equal(@agent.http.proxy_uri.password, 'lol')
1199
+ end
1200
+
1201
+ def test_submit_bad_form_method
1202
+ page = @agent.get("http://localhost/bad_form_test.html")
1203
+ assert_raise ArgumentError do
1204
+ @agent.submit(page.forms.first)
1205
+ end
1206
+ end
1207
+
1208
+ def test_submit_check_one
1209
+ page = @agent.get('http://localhost/tc_checkboxes.html')
1210
+ form = page.forms.first
1211
+ form.checkboxes_with(:name => 'green')[1].check
1212
+
1213
+ page = @agent.submit(form)
1214
+
1215
+ assert_equal(1, page.links.length)
1216
+ assert_equal('green:on', page.links.first.text)
1217
+ end
1218
+
1219
+ def test_submit_check_two
1220
+ page = @agent.get('http://localhost/tc_checkboxes.html')
1221
+ form = page.forms.first
1222
+ form.checkboxes_with(:name => 'green')[0].check
1223
+ form.checkboxes_with(:name => 'green')[1].check
1224
+
1225
+ page = @agent.submit(form)
1226
+
1227
+ assert_equal(2, page.links.length)
1228
+ assert_equal('green:on', page.links[0].text)
1229
+ assert_equal('green:on', page.links[1].text)
1230
+ end
1231
+
1232
+ def test_submit_headers
1233
+ page = @agent.get('http://localhost:2000/form_no_action.html')
1234
+ assert form = page.forms.first
1235
+ form.action = '/http_headers'
1236
+ page = @agent.submit(form, nil, { 'foo' => 'bar' })
1237
+ headers = Hash[*(
1238
+ page.body.split("\n").map { |x| x.split('|') }.flatten
1239
+ )]
1240
+ assert_equal 'bar', headers['foo']
1241
+ end
1242
+
1243
+ def test_submit_too_many_radiobuttons
1244
+ page = @agent.get("http://localhost/form_test.html")
1245
+ form = page.form_with(:name => 'post_form1')
1246
+ form.radiobuttons.each { |r| r.checked = true }
1247
+
1248
+ assert_raises Mechanize::Error do
1249
+ @agent.submit(form)
1250
+ end
1251
+ end
1252
+
1253
+ def test_transact
1254
+ @agent.get("http://localhost/frame_test.html")
1255
+ assert_equal(1, @agent.history.length)
1256
+ @agent.transact { |a|
1257
+ 5.times {
1258
+ @agent.get("http://localhost/frame_test.html")
1259
+ }
1260
+ assert_equal(6, @agent.history.length)
1261
+ }
1262
+ assert_equal(1, @agent.history.length)
1263
+ end
1264
+
1265
+ def test_user_agent_alias_equals_unknown
1266
+ assert_raises ArgumentError do
1267
+ @agent.user_agent_alias = "Aaron's Browser"
1268
+ end
1269
+ end
1270
+
1271
+ def test_visited_eh
1272
+ @agent.get("http://localhost/content_type_test?ct=application/pdf")
1273
+ assert_equal(true,
1274
+ @agent.visited?("http://localhost/content_type_test?ct=application/pdf"))
1275
+ assert_equal(false,
1276
+ @agent.visited?("http://localhost/content_type_test"))
1277
+ assert_equal(false,
1278
+ @agent.visited?("http://localhost/content_type_test?ct=text/html"))
1279
+ end
1280
+
1281
+ def test_visited_eh_redirect
1282
+ @agent.get("http://localhost/response_code?code=302")
1283
+ assert_equal("http://localhost/index.html",
1284
+ @agent.current_page.uri.to_s)
1285
+ assert_equal(true,
1286
+ @agent.visited?('http://localhost/response_code?code=302'))
1287
+ end
1288
+
1289
+ def assert_header(page, header)
1290
+ headers = {}
1291
+
1292
+ page.body.split(/[\r\n]+/).each do |page_header|
1293
+ headers.[]=(*page_header.chomp.split(/\|/))
1294
+ end
1295
+
1296
+ header.each do |key, value|
1297
+ assert(headers.has_key?(key))
1298
+ assert_equal(value, headers[key])
1299
+ end
1300
+ end
686
1301
  end
687
1302