mechanize 0.7.6 → 0.7.7

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (65) hide show
  1. data/EXAMPLES.txt +87 -40
  2. data/History.txt +21 -0
  3. data/Manifest.txt +51 -47
  4. data/lib/www/mechanize.rb +88 -22
  5. data/lib/www/mechanize/cookie.rb +1 -1
  6. data/lib/www/mechanize/form.rb +27 -14
  7. data/lib/www/mechanize/form/multi_select_list.rb +1 -1
  8. data/lib/www/mechanize/monkey_patch.rb +3 -0
  9. data/lib/www/mechanize/page.rb +20 -16
  10. data/lib/www/mechanize/page/link.rb +2 -2
  11. data/lib/www/mechanize/redirect_limit_reached_error.rb +18 -0
  12. data/test/helper.rb +14 -0
  13. data/test/htdocs/meta_cookie.html +11 -0
  14. data/test/servlets.rb +10 -0
  15. data/test/{tc_authenticate.rb → test_authenticate.rb} +0 -0
  16. data/test/{tc_bad_links.rb → test_bad_links.rb} +0 -0
  17. data/test/{tc_blank_form.rb → test_blank_form.rb} +0 -0
  18. data/test/{tc_checkboxes.rb → test_checkboxes.rb} +0 -0
  19. data/test/{tc_cookie_class.rb → test_cookie_class.rb} +9 -0
  20. data/test/{tc_cookie_jar.rb → test_cookie_jar.rb} +0 -0
  21. data/test/{tc_cookies.rb → test_cookies.rb} +6 -0
  22. data/test/{tc_encoded_links.rb → test_encoded_links.rb} +1 -1
  23. data/test/{tc_errors.rb → test_errors.rb} +0 -0
  24. data/test/{tc_follow_meta.rb → test_follow_meta.rb} +0 -0
  25. data/test/{tc_form_action.rb → test_form_action.rb} +1 -1
  26. data/test/{tc_form_as_hash.rb → test_form_as_hash.rb} +0 -0
  27. data/test/{tc_form_button.rb → test_form_button.rb} +0 -0
  28. data/test/{tc_form_no_inputname.rb → test_form_no_inputname.rb} +0 -0
  29. data/test/{tc_forms.rb → test_forms.rb} +0 -0
  30. data/test/{tc_frames.rb → test_frames.rb} +0 -0
  31. data/test/test_get_headers.rb +45 -0
  32. data/test/{tc_gzipping.rb → test_gzipping.rb} +0 -0
  33. data/test/test_hash_api.rb +42 -0
  34. data/test/{tc_history.rb → test_history.rb} +0 -0
  35. data/test/{tc_history_added.rb → test_history_added.rb} +0 -0
  36. data/test/{tc_html_unscape_forms.rb → test_html_unscape_forms.rb} +0 -0
  37. data/test/{tc_if_modified_since.rb → test_if_modified_since.rb} +0 -0
  38. data/test/{tc_keep_alive.rb → test_keep_alive.rb} +0 -0
  39. data/test/{tc_links.rb → test_links.rb} +0 -0
  40. data/test/{tc_mech.rb → test_mech.rb} +2 -2
  41. data/test/{tc_mechanize_file.rb → test_mechanize_file.rb} +0 -0
  42. data/test/{tc_multi_select.rb → test_multi_select.rb} +0 -0
  43. data/test/{tc_no_attributes.rb → test_no_attributes.rb} +0 -0
  44. data/test/{tc_option.rb → test_option.rb} +0 -0
  45. data/test/{tc_page.rb → test_page.rb} +17 -0
  46. data/test/{tc_pluggable_parser.rb → test_pluggable_parser.rb} +0 -0
  47. data/test/{tc_post_form.rb → test_post_form.rb} +0 -0
  48. data/test/{tc_pretty_print.rb → test_pretty_print.rb} +0 -0
  49. data/test/{tc_radiobutton.rb → test_radiobutton.rb} +0 -0
  50. data/test/test_redirect_limit_reached.rb +41 -0
  51. data/test/{tc_referer.rb → test_referer.rb} +0 -0
  52. data/test/{tc_relative_links.rb → test_relative_links.rb} +0 -0
  53. data/test/{tc_response_code.rb → test_response_code.rb} +0 -0
  54. data/test/{tc_save_file.rb → test_save_file.rb} +0 -0
  55. data/test/{tc_select.rb → test_select.rb} +0 -0
  56. data/test/{tc_select_all.rb → test_select_all.rb} +0 -0
  57. data/test/{tc_select_none.rb → test_select_none.rb} +0 -0
  58. data/test/{tc_select_noopts.rb → test_select_noopts.rb} +0 -0
  59. data/test/{tc_set_fields.rb → test_set_fields.rb} +8 -0
  60. data/test/{tc_ssl_server.rb → test_ssl_server.rb} +0 -0
  61. data/test/{tc_subclass.rb → test_subclass.rb} +0 -0
  62. data/test/{tc_textarea.rb → test_textarea.rb} +0 -0
  63. data/test/{tc_upload.rb → test_upload.rb} +11 -11
  64. metadata +106 -52
  65. data/test/test_all.rb +0 -5
@@ -7,7 +7,7 @@ module WWW
7
7
  class Cookie < WEBrick::Cookie
8
8
  def self.parse(uri, str, log = nil)
9
9
  return str.split(/,(?=[^;,]*=)|,$/).collect { |c|
10
- cookie_elem = c.split(/;/)
10
+ cookie_elem = c.split(/;+/)
11
11
  first_elem = cookie_elem.shift
12
12
  first_elem.strip!
13
13
  key, value = first_elem.split(/=/, 2)
@@ -74,20 +74,27 @@ module WWW
74
74
  # This method sets multiple fields on the form. It takes a list of field
75
75
  # name, value pairs. If there is more than one field found with the
76
76
  # same name, this method will set the first one found. If you want to
77
- # set the value of a duplicate field, use a value which is an Array with
78
- # the second value of the array as the index in to the form. The index
77
+ # set the value of a duplicate field, use a value which is a Hash with
78
+ # the key as the index in to the form. The index
79
79
  # is zero based. For example, to set the second field named 'foo', you
80
80
  # could do the following:
81
- # form.set_fields( :foo => ['bar', 1] )
81
+ # form.set_fields( :foo => { 1 => 'bar' } )
82
82
  def set_fields(fields = {})
83
83
  fields.each do |k,v|
84
- value = nil
85
- index = 0
86
- v.each do |val|
87
- index = val.to_i unless value.nil?
88
- value = val if value.nil?
84
+ case v
85
+ when Hash
86
+ v.each do |index, value|
87
+ self.fields.name(k.to_s).[](index).value = value
88
+ end
89
+ else
90
+ value = nil
91
+ index = 0
92
+ v.each do |val|
93
+ index = val.to_i unless value.nil?
94
+ value = val if value.nil?
95
+ end
96
+ self.fields.name(k.to_s).[](index).value = value
89
97
  end
90
- self.fields.name(k.to_s).[](index).value = value
91
98
  end
92
99
  end
93
100
 
@@ -128,6 +135,12 @@ module WWW
128
135
  @mech.submit(self, button)
129
136
  end
130
137
 
138
+ # Submit form using +button+. Defaults
139
+ # to the first button.
140
+ def click_button(button = buttons.first)
141
+ submit(button)
142
+ end
143
+
131
144
  # This method builds an array of arrays that represent the query
132
145
  # parameters to be used with this form. The return value can then
133
146
  # be used to create a query string for this form.
@@ -179,9 +192,9 @@ module WWW
179
192
  def request_data
180
193
  query_params = build_query()
181
194
  case @enctype.downcase
182
- when 'multipart/form-data'
195
+ when /^multipart\/form-data/
183
196
  boundary = rand_string(20)
184
- @enctype << "; boundary=#{boundary}"
197
+ @enctype = "multipart/form-data; boundary=#{boundary}"
185
198
  params = []
186
199
  query_params.each { |k,v| params << param_to_multipart(k, v) }
187
200
  @file_uploads.each { |f| params << file_to_multipart(f) }
@@ -206,7 +219,7 @@ module WWW
206
219
  @checkboxes = WWW::Mechanize::List.new
207
220
 
208
221
  # Find all input tags
209
- (form_node/'input').each do |node|
222
+ form_node.search('//input').each do |node|
210
223
  type = (node['type'] || 'text').downcase
211
224
  name = node['name']
212
225
  next if name.nil? && !(type == 'submit' || type =='button')
@@ -229,13 +242,13 @@ module WWW
229
242
  end
230
243
 
231
244
  # Find all textarea tags
232
- (form_node/'textarea').each do |node|
245
+ form_node.search('//textarea').each do |node|
233
246
  next if node['name'].nil?
234
247
  @fields << Field.new(node['name'], node.inner_text)
235
248
  end
236
249
 
237
250
  # Find all select tags
238
- (form_node/'select').each do |node|
251
+ form_node.search('//select').each do |node|
239
252
  next if node['name'].nil?
240
253
  if node.has_attribute? 'multiple'
241
254
  @fields << MultiSelectList.new(node['name'], node)
@@ -17,7 +17,7 @@ module WWW
17
17
  @options = WWW::Mechanize::List.new
18
18
 
19
19
  # parse
20
- (node/'option').each do |n|
20
+ node.search('//option').each do |n|
21
21
  option = Option.new(n, self)
22
22
  @options << option
23
23
  end
@@ -23,6 +23,9 @@ end
23
23
  else
24
24
  module WWW
25
25
  class Mechanize
26
+ class Form
27
+ alias :inspect :pretty_inspect
28
+ end
26
29
  class Page
27
30
  alias :inspect :pretty_inspect
28
31
  class Link
@@ -1,7 +1,3 @@
1
- require 'fileutils'
2
- require 'hpricot'
3
- require 'forwardable'
4
-
5
1
  require 'www/mechanize/page/link'
6
2
  require 'www/mechanize/page/meta'
7
3
  require 'www/mechanize/page/base'
@@ -37,13 +33,13 @@ module WWW
37
33
  end
38
34
 
39
35
  def title
40
- @title ||= if parser && search('title').text.length > 0
41
- search('title').text
36
+ @title ||= if parser && search('//title').inner_text.length > 0
37
+ search('//title').inner_text
42
38
  end
43
39
  end
44
40
 
45
41
  def parser
46
- @parser ||= body && response ? Hpricot.parse(body) : nil
42
+ @parser ||= body && response ? Mechanize.html_parser.parse(body) : nil
47
43
  end
48
44
  alias :root :parser
49
45
 
@@ -57,16 +53,24 @@ module WWW
57
53
  def_delegator :parser, :/, :/
58
54
  def_delegator :parser, :at, :at
59
55
 
60
- # Find a form with +name+. Form will be yielded if a block is given.
61
- def form(name)
62
- f = forms.name(name).first
56
+ # Find a form matching +criteria+.
57
+ # Example:
58
+ # page.form(:action => '/post/login.php') do |f|
59
+ # ...
60
+ # end
61
+ def form_with(criteria)
62
+ criteria = {:name => criteria} if String === criteria
63
+ f = forms.find do |form|
64
+ criteria.all? { |k,v| form.send(k) == v }
65
+ end
63
66
  yield f if block_given?
64
67
  f
65
68
  end
69
+ alias :form :form_with
66
70
 
67
71
  def links
68
72
  @links ||= WWW::Mechanize::List.new(
69
- %w{ a area }.map do |tag|
73
+ %w{ //a //area }.map do |tag|
70
74
  search(tag).map do |node|
71
75
  Link.new(node, @mech, self)
72
76
  end
@@ -76,7 +80,7 @@ module WWW
76
80
 
77
81
  def forms
78
82
  @forms ||= WWW::Mechanize::List.new(
79
- search('form').map do |html_form|
83
+ search('//form').map do |html_form|
80
84
  form = Form.new(html_form, @mech, self)
81
85
  form.action ||= @uri
82
86
  form
@@ -86,7 +90,7 @@ module WWW
86
90
 
87
91
  def meta
88
92
  @meta ||= WWW::Mechanize::List.new(
89
- search('meta').map do |node|
93
+ search('//meta').map do |node|
90
94
  next unless node['http-equiv'] && node['content']
91
95
  (equiv, content) = node['http-equiv'], node['content']
92
96
  if equiv && equiv.downcase == 'refresh'
@@ -101,19 +105,19 @@ module WWW
101
105
 
102
106
  def bases
103
107
  @bases ||= WWW::Mechanize::List.new(
104
- search('base').map { |node| Base.new(node, @mech, self) }
108
+ search('//base').map { |node| Base.new(node, @mech, self) }
105
109
  )
106
110
  end
107
111
 
108
112
  def frames
109
113
  @frames ||= WWW::Mechanize::List.new(
110
- search('frame').map { |node| Frame.new(node, @mech, self) }
114
+ search('//frame').map { |node| Frame.new(node, @mech, self) }
111
115
  )
112
116
  end
113
117
 
114
118
  def iframes
115
119
  @iframes ||= WWW::Mechanize::List.new(
116
- search('iframe').map { |node| Frame.new(node, @mech, self) }
120
+ search('//iframe').map { |node| Frame.new(node, @mech, self) }
117
121
  )
118
122
  end
119
123
  end
@@ -27,9 +27,9 @@ module WWW
27
27
  @attributes = node
28
28
 
29
29
  # If there is no text, try to find an image and use it's alt text
30
- if (@text.nil? || @text.length == 0) && (node/'img').length > 0
30
+ if (@text.nil? || @text.length == 0) && node.search('//img').length > 0
31
31
  @text = ''
32
- (node/'img').each do |e|
32
+ node.search('//img').each do |e|
33
33
  @text << ( e['alt'] || '')
34
34
  end
35
35
  end
@@ -0,0 +1,18 @@
1
+ module WWW
2
+ class Mechanize
3
+ # Thrown when too many redirects are sent
4
+ class RedirectLimitReachedError < RuntimeError
5
+ attr_reader :page, :response_code, :redirects
6
+ def initialize(page, redirects)
7
+ @page = page
8
+ @redirects = redirects
9
+ @response_code = page.code
10
+ end
11
+
12
+ def to_s
13
+ "Maximum redirect limit (#{redirects}) reached"
14
+ end
15
+ alias :inspect :to_s
16
+ end
17
+ end
18
+ end
data/test/helper.rb CHANGED
@@ -6,6 +6,19 @@ require 'servlets'
6
6
 
7
7
  BASE_DIR = File.dirname(__FILE__)
8
8
 
9
+ # Move this to a test base class
10
+ module MechTestHelper
11
+ def self.fake_page(agent)
12
+ html = <<-END
13
+ <html><body>
14
+ <form><input type="submit" value="submit" /></form>
15
+ </body></html>
16
+ END
17
+ html_response = { 'content-type' => 'text/html' }
18
+ page = WWW::Mechanize::Page.new( nil, html_response, html, 200, agent )
19
+ end
20
+ end
21
+
9
22
  class Net::HTTP
10
23
  alias :old_do_start :do_start
11
24
 
@@ -30,6 +43,7 @@ class Net::HTTP
30
43
  '/send_cookies' => SendCookiesTest,
31
44
  '/if_modified_since' => ModifiedSinceServlet,
32
45
  '/http_headers' => HeaderServlet,
46
+ '/infinite_redirect' => InfiniteRedirectTest,
33
47
  }
34
48
 
35
49
  PAGE_CACHE = {}
@@ -0,0 +1,11 @@
1
+ <html>
2
+ <meta http-equiv="Set-Cookie" content="tender=live;expires=Sun, 27-Sep-2037 00:00:00 GMT; path=/">
3
+ <body>
4
+ <a href="alt_text.html"><img alt="alt text" src="hello"></a>
5
+ <a href="no_alt_text.html"><img src="hello"></a>
6
+ <a href="nil_alt_text.html"><img alt src="hello"></a>
7
+ <a href="no_image.html">no image</a>
8
+ <a href="no_text.html"></a>
9
+ </body>
10
+ </html>
11
+
data/test/servlets.rb CHANGED
@@ -114,6 +114,16 @@ class FileUploadTest < WEBrick::HTTPServlet::AbstractServlet
114
114
  end
115
115
  end
116
116
 
117
+ class InfiniteRedirectTest < WEBrick::HTTPServlet::AbstractServlet
118
+ def do_GET(req, res)
119
+ res['Content-Type'] = req.query['ct'] || "text/html"
120
+ res.status = req.query['code'] ? req.query['code'].to_i : '302'
121
+ number = req.query['q'] ? req.query['q'].to_i : 0
122
+ res['Location'] = "/infinite_redirect?q=#{number + 1}"
123
+ end
124
+ alias :do_POST :do_GET
125
+ end
126
+
117
127
  class ResponseCodeTest < WEBrick::HTTPServlet::AbstractServlet
118
128
  def do_GET(req, res)
119
129
  res['Content-Type'] = req.query['ct'] || "text/html"
File without changes
File without changes
File without changes
File without changes
@@ -58,6 +58,15 @@ class CookieClassTest < Test::Unit::TestCase
58
58
  end
59
59
  end
60
60
 
61
+ def test_double_semicolon
62
+ double_semi = 'WSIDC=WEST;; domain=.williams-sonoma.com; path=/'
63
+ url = URI.parse('http://williams-sonoma.com/')
64
+ WWW::Mechanize::Cookie.parse(url, double_semi) { |cookie|
65
+ assert_equal('WSIDC', cookie.name)
66
+ assert_equal('WEST', cookie.value)
67
+ }
68
+ end
69
+
61
70
  def test_parse_bad_version
62
71
  bad_cookie = 'PRETANET=TGIAqbFXtt; Name=/PRETANET; Path=/; Version=1.2; Content-type=text/html; Domain=192.168.6.196; expires=Friday, 13-November-2026 23:01:46 GMT;'
63
72
  url = URI.parse('http://localhost/')
File without changes
@@ -5,6 +5,12 @@ class CookiesMechTest < Test::Unit::TestCase
5
5
  @agent = WWW::Mechanize.new
6
6
  end
7
7
 
8
+ def test_meta_tag_cookies
9
+ cookies = @agent.cookies.length
10
+ page = @agent.get("http://localhost/meta_cookie.html")
11
+ assert_equal(cookies + 1, @agent.cookies.length)
12
+ end
13
+
8
14
  def test_send_cookies
9
15
  page = @agent.get("http://localhost/many_cookies")
10
16
  page = @agent.get("http://localhost/send_cookies")
@@ -14,7 +14,7 @@ class TestEncodedLinks < Test::Unit::TestCase
14
14
  end
15
15
 
16
16
  def test_hpricot_link
17
- page = @agent.click(@page.search('a').first)
17
+ page = @agent.click(@page.search('//a').first)
18
18
  assert_equal("http://localhost/form_post?a=b&b=c", page.uri.to_s)
19
19
  end
20
20
  end
File without changes
File without changes
@@ -7,7 +7,7 @@ class TestFormAction < Test::Unit::TestCase
7
7
  end
8
8
 
9
9
  def test_post_encoded_action
10
- form = @page.form('post_form1') { |f|
10
+ form = @page.form(:name => 'post_form1') { |f|
11
11
  f.first_name = "Aaron"
12
12
  }
13
13
  assert_equal('/form_post?a=b&b=c', form.action)
File without changes
File without changes
File without changes
File without changes
@@ -0,0 +1,45 @@
1
+ require File.dirname(__FILE__) + "/helper"
2
+
3
+ class TestGetHeaders < Test::Unit::TestCase
4
+ def setup
5
+ @agent = WWW::Mechanize.new
6
+ end
7
+
8
+ def test_bad_header_symbol
9
+ assert_raises(ArgumentError) do
10
+ @agent.get(:url => "http://localhost/file_upload.html", :headers => { :foobar => "is fubar"})
11
+ end
12
+ end
13
+
14
+ def test_host_header
15
+ page = @agent.get(:url => 'http://localhost/http_headers', :headers => { :etag => '160604-24bc-9fe2c40'})
16
+ assert_header(page, 'host' => 'localhost')
17
+ end
18
+
19
+ def test_etag_header
20
+ page = @agent.get(:url => 'http://localhost/http_headers', :headers => { :etag => '160604-24bc-9fe2c40'})
21
+ assert_header(page, 'etag' => '160604-24bc-9fe2c40')
22
+ end
23
+
24
+ def test_if_modified_since_header
25
+ value = Time.now.strftime("%a, %d %b %Y %H:%M:%S %z")
26
+ page = @agent.get(:url => 'http://localhost/http_headers', :headers => { :if_modified_since => value})
27
+ assert_header(page, 'if-modified-since' => value)
28
+ end
29
+
30
+ def test_string_header
31
+ page = @agent.get(:url => 'http://localhost/http_headers', :headers => { "X-BS-French-Header" => 'Ou est la bibliotheque?'})
32
+ assert_header(page, 'x-bs-french-header' => 'Ou est la bibliotheque?')
33
+ end
34
+
35
+ def assert_header(page, header)
36
+ headers = {}
37
+ page.body.split(/[\r\n]+/).each do |page_header|
38
+ headers.[]=(*page_header.chomp.split(/\|/))
39
+ end
40
+ header.each do |key, value|
41
+ assert(headers.has_key?(key))
42
+ assert_equal(value, headers[key])
43
+ end
44
+ end
45
+ end
File without changes
@@ -0,0 +1,42 @@
1
+ require File.dirname(__FILE__) + "/helper"
2
+
3
+ class TestHashApi < Test::Unit::TestCase
4
+ def setup
5
+ @agent = WWW::Mechanize.new
6
+ end
7
+
8
+ def test_title
9
+ page = @agent.get(:url => "http://localhost/file_upload.html")
10
+ assert_equal('File Upload Form', page.title)
11
+ end
12
+
13
+ def test_page_gets_yielded
14
+ pages = nil
15
+ @agent.get(:url => "http://localhost/file_upload.html") { |page|
16
+ pages = page
17
+ }
18
+ assert pages
19
+ assert_equal('File Upload Form', pages.title)
20
+ end
21
+
22
+ def test_get_with_params
23
+ page = @agent.get(:url => 'http://localhost/', :params => { :q => 'hello' })
24
+ assert_equal('http://localhost/?q=hello', page.uri.to_s)
25
+ end
26
+
27
+ def test_get_with_referer
28
+ class << @agent
29
+ attr_reader :request
30
+ alias :old_set_headers :set_headers
31
+ def set_headers(u, request, cur_page)
32
+ old_set_headers(u, request, cur_page)
33
+ @request = request
34
+ end
35
+ end
36
+ @agent.get(:url => 'http://localhost/', :referer => URI.parse('http://google.com/'))
37
+ assert_equal 'http://google.com/', @agent.request['Referer']
38
+
39
+ @agent.get(:url => 'http://localhost/', :params => [], :referer => 'http://tenderlovemaking.com/')
40
+ assert_equal 'http://tenderlovemaking.com/', @agent.request['Referer']
41
+ end
42
+ end