mechanize 0.9.0 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

@@ -1,5 +1,21 @@
1
1
  = Mechanize CHANGELOG
2
2
 
3
+ === HEAD
4
+
5
+ * New Features:
6
+ * Encoding may be specified for a page: Page#encoding=
7
+
8
+ * Bug Fixes:
9
+ * m17n fixes. ありがとう konn!
10
+ * Fixed a problem with base tags. ありがとう Keisuke
11
+ * HEAD requests do not record in the history
12
+ * Default encoding to ISO-8859-1 instead of ASCII
13
+ * Requests with URI instances should not be polluted RF #23472
14
+ * Nonce count fixed for digest auth requests. Thanks Adrian Slapa!
15
+ * Fixed a referer issue with requests using a uri. RF #23472
16
+ * WAP content types will now be parsed
17
+ * Rescued poorly formatted cookies. Thanks Kelley Reynolds!
18
+
3
19
  === 0.9.0
4
20
 
5
21
  * Deprecations
@@ -153,6 +153,7 @@ test/test_redirect_limit_reached.rb
153
153
  test/test_redirect_verb_handling.rb
154
154
  test/test_referer.rb
155
155
  test/test_relative_links.rb
156
+ test/test_request.rb
156
157
  test/test_response_code.rb
157
158
  test/test_save_file.rb
158
159
  test/test_scheme.rb
data/Rakefile CHANGED
@@ -9,7 +9,7 @@ HOE = Hoe.new('mechanize', WWW::Mechanize::VERSION) do |p|
9
9
  p.developer('Aaron Patterson','aaronp@rubyforge.org')
10
10
  p.developer('Mike Dalessio','mike.dalessio@gmail.com')
11
11
  p.summary = "Mechanize provides automated web-browsing"
12
- p.extra_deps = [['nokogiri', '>= 1.0.7']]
12
+ p.extra_deps = [['nokogiri', '>= 1.2.1']]
13
13
  end
14
14
 
15
15
  desc "Update SSL Certificate"
@@ -8,6 +8,8 @@ require 'digest/md5'
8
8
  require 'fileutils'
9
9
  require 'nokogiri'
10
10
  require 'forwardable'
11
+ require 'iconv'
12
+ require 'nkf'
11
13
 
12
14
  require 'www/mechanize/util'
13
15
  require 'www/mechanize/content_type_error'
@@ -46,7 +48,7 @@ module WWW
46
48
  class Mechanize
47
49
  ##
48
50
  # The version of Mechanize you are using.
49
- VERSION = '0.9.0'
51
+ VERSION = '0.9.1'
50
52
 
51
53
  ##
52
54
  # User Agent aliases
@@ -204,7 +206,7 @@ module WWW
204
206
  end
205
207
 
206
208
  unless referer
207
- if url =~ /^http/
209
+ if url.to_s =~ /^http/
208
210
  referer = Page.new(nil, {'content-type'=>'text/html'})
209
211
  else
210
212
  referer = current_page || Page.new(nil, {'content-type'=>'text/html'})
@@ -237,16 +239,8 @@ module WWW
237
239
  # put('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
238
240
  #
239
241
  def put(url, query_params = {}, options = {})
240
- options = {
241
- :uri => url,
242
- :headers => {},
243
- :params => query_params,
244
- :verb => :put
245
- }.merge(options)
246
- # fetch the page
247
- page = fetch_page(options)
242
+ page = head(url, query_params, options.merge({:verb => :put}))
248
243
  add_to_history(page)
249
- yield page if block_given?
250
244
  page
251
245
  end
252
246
 
@@ -256,7 +250,9 @@ module WWW
256
250
  # delete('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
257
251
  #
258
252
  def delete(url, query_params = {}, options = {})
259
- put(url, query_params, options.merge({:verb => :delete}))
253
+ page = head(url, query_params, options.merge({:verb => :delete}))
254
+ add_to_history(page)
255
+ page
260
256
  end
261
257
 
262
258
  ####
@@ -265,7 +261,16 @@ module WWW
265
261
  # head('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
266
262
  #
267
263
  def head(url, query_params = {}, options = {})
268
- put(url, query_params, options.merge({:verb => :head}))
264
+ options = {
265
+ :uri => url,
266
+ :headers => {},
267
+ :params => query_params,
268
+ :verb => :head
269
+ }.merge(options)
270
+ # fetch the page
271
+ page = fetch_page(options)
272
+ yield page if block_given?
273
+ page
269
274
  end
270
275
 
271
276
  # Fetch a file and return the contents of the file.
@@ -4,7 +4,7 @@ module WWW
4
4
  class AuthHeaders
5
5
  include WWW::Handler
6
6
 
7
- @@nonce_count = -1
7
+ @@nonce_count = Hash.new(0)
8
8
  CNONCE = Digest::MD5.hexdigest("%x" % (Time.now.to_i + rand(65535)))
9
9
 
10
10
  def initialize(auth_hash, user, password, digest)
@@ -36,8 +36,6 @@ module WWW
36
36
  end
37
37
 
38
38
  def gen_auth_header(uri, request, auth_header, is_IIS = false)
39
- @@nonce_count += 1
40
-
41
39
  auth_header =~ /^(\w+) (.*)/
42
40
 
43
41
  params = {}
@@ -45,12 +43,14 @@ module WWW
45
43
  params[$1] = $2.gsub(/^"/, '').gsub(/"$/, '')
46
44
  }
47
45
 
46
+ @@nonce_count[params['nonce']] += 1
47
+
48
48
  a_1 = "#{@user}:#{params['realm']}:#{@password}"
49
49
  a_2 = "#{request.method}:#{uri.path}"
50
50
  request_digest = ''
51
51
  request_digest << Digest::MD5.hexdigest(a_1)
52
52
  request_digest << ':' << params['nonce']
53
- request_digest << ':' << ('%08x' % @@nonce_count)
53
+ request_digest << ':' << ('%08x' % @@nonce_count[params['nonce']])
54
54
  request_digest << ':' << CNONCE
55
55
  request_digest << ':' << params['qop']
56
56
  request_digest << ':' << Digest::MD5.hexdigest(a_2)
@@ -68,7 +68,7 @@ module WWW
68
68
  "#{field}=\"#{params[field]}\""
69
69
  }.compact.join(', ')
70
70
 
71
- header << "nc=#{'%08x' % @@nonce_count}, "
71
+ header << "nc=#{'%08x' % @@nonce_count[params['nonce']]}, "
72
72
  header << "cnonce=\"#{CNONCE}\", "
73
73
  header << "response=\"#{Digest::MD5.hexdigest(request_digest)}\""
74
74
 
@@ -26,8 +26,8 @@ module WWW
26
26
  cache_obj[:keep_alive_options][k.intern] = v
27
27
  end
28
28
  end
29
-
30
- if page.is_a?(Page) && page.body =~ /Set-Cookie/
29
+ body = Util.to_native_charset(page.body)
30
+ if page.is_a?(Page) && body =~ /Set-Cookie/
31
31
  page.search('//meta[@http-equiv="Set-Cookie"]').each do |meta|
32
32
  Cookie::parse(uri, meta['content']) { |c|
33
33
  Mechanize.log.debug("saved cookie: #{c}") if Mechanize.log
@@ -10,6 +10,7 @@ module WWW
10
10
 
11
11
  def handle(ctx, params)
12
12
  raise ArgumentError.new('uri must be specified') unless params[:uri]
13
+ params[:uri] = params[:uri].dup if params[:uri].is_a?(URI)
13
14
  uri = params[:uri]
14
15
  referer = params[:referer]
15
16
  unless uri.is_a?(URI)
@@ -11,7 +11,13 @@ module WWW
11
11
  first_elem = cookie_elem.shift
12
12
  first_elem.strip!
13
13
  key, value = first_elem.split(/=/, 2)
14
- cookie = new(key, WEBrick::HTTPUtils.dequote(value))
14
+
15
+ begin
16
+ cookie = new(key, WEBrick::HTTPUtils.dequote(value))
17
+ rescue
18
+ log.warn("Couldn't parse key/value: #{first_elem}") if log
19
+ end
20
+
15
21
  cookie_elem.each{|pair|
16
22
  pair.strip!
17
23
  key, value = pair.split(/=/, 2)
@@ -136,6 +136,25 @@ module WWW
136
136
  submit(button)
137
137
  end
138
138
 
139
+ # This method is sub-method of build_query.
140
+ # It converts charset of query value of fields into excepted one.
141
+ def proc_query(field)
142
+ field.query_value.map{|(name, val)|
143
+ [from_native_charset(name), from_native_charset(val)]
144
+ }
145
+ end
146
+ private :proc_query
147
+
148
+ def from_native_charset(str, enc=nil)
149
+ if page
150
+ enc ||= page.encoding
151
+ Util.from_native_charset(str,enc)
152
+ else
153
+ str
154
+ end
155
+ end
156
+ private :from_native_charset
157
+
139
158
  # This method builds an array of arrays that represent the query
140
159
  # parameters to be used with this form. The return value can then
141
160
  # be used to create a query string for this form.
@@ -143,17 +162,22 @@ module WWW
143
162
  query = []
144
163
 
145
164
  fields().each do |f|
146
- query.push(*f.query_value)
165
+ qval = proc_query(f)
166
+ query.push(*qval)
147
167
  end
148
168
 
149
169
  checkboxes().each do |f|
150
- query.push(*f.query_value) if f.checked
170
+ if f.checked
171
+ qval = proc_query(f)
172
+ query.push(*qval)
173
+ end
151
174
  end
152
175
 
153
176
  radio_groups = {}
154
177
  radiobuttons().each do |f|
155
- radio_groups[f.name] ||= []
156
- radio_groups[f.name] << f
178
+ fname = from_native_charset(f.name)
179
+ radio_groups[fname] ||= []
180
+ radio_groups[fname] << f
157
181
  end
158
182
 
159
183
  # take one radio button from each group
@@ -162,16 +186,17 @@ module WWW
162
186
 
163
187
  if checked.size == 1
164
188
  f = checked.first
165
- query.push(*f.query_value)
189
+ qval = proc_query(f)
190
+ query.push(*qval)
166
191
  elsif checked.size > 1
167
192
  raise "multiple radiobuttons are checked in the same group!"
168
193
  end
169
194
  end
170
195
 
171
196
  @clicked_buttons.each { |b|
172
- query.push(*b.query_value)
197
+ qval = proc_query(b)
198
+ query.push(*qval)
173
199
  }
174
-
175
200
  query
176
201
  end
177
202
 
@@ -191,7 +216,7 @@ module WWW
191
216
  boundary = rand_string(20)
192
217
  @enctype = "multipart/form-data; boundary=#{boundary}"
193
218
  params = []
194
- query_params.each { |k,v| params << param_to_multipart(k, v) }
219
+ query_params.each { |k,v| params << param_to_multipart(k, v) unless k.nil? }
195
220
  @file_uploads.each { |f| params << file_to_multipart(f) }
196
221
  params.collect { |p| "--#{boundary}\r\n#{p}" }.join('') +
197
222
  "--#{boundary}--\r\n"
@@ -21,9 +21,11 @@ module WWW
21
21
  extend Forwardable
22
22
 
23
23
  attr_accessor :mech
24
+ attr_accessor :encoding
24
25
 
25
26
  def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
26
27
  super(uri, response, body, code)
28
+ @encoding = Util.detect_charset(body)
27
29
  @mech ||= mech
28
30
 
29
31
  raise Mechanize::ContentTypeError.new(response['content-type']) unless
@@ -43,7 +45,11 @@ module WWW
43
45
 
44
46
  if body && response
45
47
  html_body = body.length > 0 ? body : '<html></html>'
46
- @parser = Mechanize.html_parser.parse(html_body)
48
+ if WWW::Mechanize.html_parser == Nokogiri::HTML
49
+ @parser = Mechanize.html_parser.parse(html_body, nil, @encoding)
50
+ else
51
+ @parser = Mechanize.html_parser.parse(html_body)
52
+ end
47
53
  end
48
54
 
49
55
  @parser
@@ -37,7 +37,7 @@ module WWW
37
37
  end
38
38
 
39
39
  def uri
40
- URI.parse(@href)
40
+ @href && URI.parse(@href)
41
41
  end
42
42
 
43
43
  # Click on this link
@@ -45,6 +45,7 @@ module WWW
45
45
  class PluggableParser
46
46
  CONTENT_TYPES = {
47
47
  :html => 'text/html',
48
+ :wap => 'application/vnd.wap.xhtml+xml',
48
49
  :xhtml => 'application/xhtml+xml',
49
50
  :pdf => 'application/pdf',
50
51
  :csv => 'text/csv',
@@ -54,8 +55,10 @@ module WWW
54
55
  attr_accessor :default
55
56
 
56
57
  def initialize
57
- @parsers = { CONTENT_TYPES[:html] => Page,
58
- CONTENT_TYPES[:xhtml] => Page }
58
+ @parsers = { CONTENT_TYPES[:html] => Page,
59
+ CONTENT_TYPES[:xhtml] => Page,
60
+ CONTENT_TYPES[:wap] => Page,
61
+ }
59
62
  @default = File
60
63
  end
61
64
 
@@ -1,15 +1,46 @@
1
1
  module WWW
2
2
  class Mechanize
3
3
  class Util
4
+ CODE_DIC = {
5
+ :JIS => "ISO-2022-JP",
6
+ :EUC => "EUC-JP",
7
+ :SJIS => "SHIFT_JIS",
8
+ :UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"}
9
+
4
10
  class << self
5
- def build_query_string(parameters)
11
+ def build_query_string(parameters, enc=nil)
6
12
  parameters.map { |k,v|
7
- k &&
13
+ if k
14
+ # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
15
+ [URI.escape(k.to_s), URI.escape(v.to_s)].join("=")
16
+ =begin
8
17
  [WEBrick::HTTPUtils.escape_form(k.to_s),
9
18
  WEBrick::HTTPUtils.escape_form(v.to_s)].join("=")
19
+ =end
20
+
21
+ end
10
22
  }.compact.join('&')
11
23
  end
12
24
 
25
+ def to_native_charset(s, code=nil)
26
+ if Mechanize.html_parser == Nokogiri::HTML
27
+ return unless s
28
+ code ||= detect_charset(s)
29
+ Iconv.iconv("UTF-8", code, s).join("")
30
+ else
31
+ s
32
+ end
33
+ end
34
+
35
+ def from_native_charset(s, code)
36
+ if Mechanize.html_parser == Nokogiri::HTML
37
+ return unless s
38
+ Iconv.iconv(code, "UTF-8", s).join("")
39
+ else
40
+ return s
41
+ end
42
+ end
43
+
13
44
  def html_unescape(s)
14
45
  return s unless s
15
46
  s.gsub(/&(\w+|#[0-9]+);/) { |match|
@@ -23,6 +54,20 @@ module WWW
23
54
  number ? ([number].pack('U') rescue match) : match
24
55
  }
25
56
  end
57
+
58
+ def detect_charset(src)
59
+ tmp = NKF.guess(src || "<html></html>")
60
+ if RUBY_VERSION >= "1.9.0"
61
+ enc = tmp.to_s.upcase
62
+ else
63
+ enc = NKF.constants.find{|c|
64
+ NKF.const_get(c) == tmp
65
+ }
66
+ enc = CODE_DIC[enc.intern]
67
+ end
68
+ enc || "ISO-8859-1"
69
+ end
70
+
26
71
  end
27
72
  end
28
73
  end
@@ -145,7 +145,20 @@
145
145
  </table><br />
146
146
  <input type="submit" value="Submit" />
147
147
  </form>
148
- <h1>Get Form 3</h1>
148
+
149
+ <!-- Post form with multipart/form-data -->
150
+ <h1>Post Form 4 - Multipart</h1>
151
+ <form name="post_form4_multipart" enctype="multipart/form-data" method="post" action="/form_post">
152
+ <table>
153
+ <tr>
154
+ <td>First Name</td>
155
+ <td><input type="text" name="first_name" /></td>
156
+ </tr>
157
+ </table><br />
158
+ <input type="submit" value="Submit" />
159
+ </form>
160
+
161
+ <h1>Get Form 3</h1>
149
162
  <form name="get_form3" method="get" action="/form_post?great day=yes&one=two">
150
163
  <table>
151
164
  <tr>
@@ -149,6 +149,21 @@ class FormsMechTest < Test::Unit::TestCase
149
149
  )
150
150
  end
151
151
 
152
+ def test_post_multipart
153
+ page = @agent.get("http://localhost/form_test.html")
154
+ post_form = page.forms.find { |f| f.name == "post_form4_multipart" }
155
+ assert_not_nil(post_form, "Post form is null")
156
+ assert_equal("post", post_form.method.downcase)
157
+ assert_equal("/form_post", post_form.action)
158
+
159
+ assert_equal(1, post_form.fields.size)
160
+ assert_equal(1, post_form.buttons.size)
161
+
162
+ page = @agent.submit(post_form, post_form.buttons.first)
163
+
164
+ assert_not_nil(page)
165
+ end
166
+
152
167
  def test_select_box
153
168
  page = @agent.get("http://localhost/form_test.html")
154
169
  post_form = page.forms.find { |f| f.name == "post_form1" }
@@ -447,7 +462,6 @@ class FormsMechTest < Test::Unit::TestCase
447
462
  }.checked = true
448
463
  get_form.checkboxes.find { |f| f.name == "likes ham" }.checked = true
449
464
  page = @agent.submit(get_form, get_form.buttons.first)
450
-
451
465
  # Check that the submitted fields exist
452
466
  assert_equal(3, page.links.size, "Not enough links")
453
467
  assert_not_nil(
@@ -5,6 +5,12 @@ class TestPage < Test::Unit::TestCase
5
5
  @agent = WWW::Mechanize.new
6
6
  end
7
7
 
8
+ def test_set_encoding
9
+ page = @agent.get("http://localhost/file_upload.html")
10
+ page.encoding = 'UTF-8'
11
+ assert_equal 'UTF-8', page.parser.encoding
12
+ end
13
+
8
14
  def test_page_gets_yielded
9
15
  pages = nil
10
16
  @agent.get("http://localhost/file_upload.html") { |page|
@@ -0,0 +1,13 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
+
3
+ class RequestTest < Test::Unit::TestCase
4
+ def setup
5
+ @agent = WWW::Mechanize.new
6
+ end
7
+
8
+ def test_uri_is_not_polluted
9
+ uri = URI.parse('http://localhost/')
10
+ @agent.get(uri, {'q' => 'Ruby'})
11
+ assert_equal 'http://localhost/', uri.to_s
12
+ end
13
+ end
@@ -7,16 +7,19 @@ class VerbsTest < Test::Unit::TestCase
7
7
 
8
8
  def test_put
9
9
  page = @agent.put('http://localhost/verb', { 'q' => 'foo' })
10
+ assert_equal 1, @agent.history.length
10
11
  assert_equal('method: PUT', page.body)
11
12
  end
12
13
 
13
14
  def test_delete
14
15
  page = @agent.delete('http://localhost/verb', { 'q' => 'foo' })
16
+ assert_equal 1, @agent.history.length
15
17
  assert_equal('method: DELETE', page.body)
16
18
  end
17
19
 
18
20
  def test_head
19
21
  page = @agent.head('http://localhost/verb', { 'q' => 'foo' })
22
+ assert_equal 0, @agent.history.length
20
23
  assert_equal('method: HEAD', page.body)
21
24
  end
22
25
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mechanize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aaron Patterson
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2008-12-21 00:00:00 -08:00
13
+ date: 2009-02-23 00:00:00 -08:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -21,7 +21,7 @@ dependencies:
21
21
  requirements:
22
22
  - - ">="
23
23
  - !ruby/object:Gem::Version
24
- version: 1.0.7
24
+ version: 1.2.1
25
25
  version:
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: hoe
@@ -31,7 +31,7 @@ dependencies:
31
31
  requirements:
32
32
  - - ">="
33
33
  - !ruby/object:Gem::Version
34
- version: 1.8.2
34
+ version: 1.9.0
35
35
  version:
36
36
  description: The Mechanize library is used for automating interaction with websites. Mechanize automatically stores and sends cookies, follows redirects, can follow links, and submit forms. Form fields can be populated and submitted. Mechanize also keeps track of the sites that you have visited as a history.
37
37
  email:
@@ -205,6 +205,7 @@ files:
205
205
  - test/test_redirect_verb_handling.rb
206
206
  - test/test_referer.rb
207
207
  - test/test_relative_links.rb
208
+ - test/test_request.rb
208
209
  - test/test_response_code.rb
209
210
  - test/test_save_file.rb
210
211
  - test/test_scheme.rb
@@ -291,6 +292,7 @@ test_files:
291
292
  - test/test_redirect_verb_handling.rb
292
293
  - test/test_referer.rb
293
294
  - test/test_relative_links.rb
295
+ - test/test_request.rb
294
296
  - test/test_response_code.rb
295
297
  - test/test_save_file.rb
296
298
  - test/test_scheme.rb