mechanize 0.9.2 → 0.9.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

@@ -1,5 +1,21 @@
1
1
  = Mechanize CHANGELOG
2
2
 
3
+ === HEAD
4
+
5
+ * Bug Fixes:
6
+
7
+ * Do not apply encoding if encoding equals 'none' Thanks Akinori MUSHA!
8
+ * Custom request headers may be supplied WWW::Mechanize#request_headers
9
+ RF #24516
10
+ * HTML Parser may be set on a per instance level WWW::Mechanize#html_parser
11
+ RF #24693
12
+ * Fixed string encoding in ruby 1.9. RF #2433
13
+ * Rescuing Zlib::DataErrors (Thanks Kelley Reynolds)
14
+ * Fixing a problem with frozen SSL objects. RF #24950
15
+ * Do not send a referer on meta refresh. RF #24945
16
+ * Fixed a bug with double semi-colons in Content-Disposition headers
17
+ * Properly handling cookies that specify a path. RF #25259
18
+
3
19
  === 0.9.2 / 2009/03/05
4
20
 
5
21
  * New Features:
@@ -1,7 +1,7 @@
1
1
  = WWW::Mechanize
2
2
 
3
- http://mechanize.rubyforge.org/
4
- http://github.com/tenderlove/mechanize/tree/master
3
+ * http://mechanize.rubyforge.org/
4
+ * http://github.com/tenderlove/mechanize/tree/master
5
5
 
6
6
  == DESCRIPTION
7
7
 
@@ -28,8 +28,8 @@ The bug tracker is available here:
28
28
 
29
29
  == Examples
30
30
 
31
- If you are just starting, check out the GUIDE[link://files/GUIDE_rdoc.html].
32
- Also, check out the EXAMPLES[link://files/EXAMPLES_rdoc.html] file.
31
+ If you are just starting, check out the GUIDE.
32
+ Also, check out the EXAMPLES file.
33
33
 
34
34
  == Authors
35
35
 
@@ -56,5 +56,5 @@ library!
56
56
 
57
57
  == License
58
58
 
59
- This library is distributed under the GPL. Please see the LICENSE[link://files/LICENSE_rdoc.html] file.
59
+ This library is distributed under the GPL. Please see the LICENSE file.
60
60
 
data/Rakefile CHANGED
@@ -5,7 +5,6 @@ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), "lib")
5
5
  require 'mechanize'
6
6
 
7
7
  HOE = Hoe.new('mechanize', WWW::Mechanize::VERSION) do |p|
8
- p.rubyforge_name = 'mechanize'
9
8
  p.developer('Aaron Patterson','aaronp@rubyforge.org')
10
9
  p.developer('Mike Dalessio','mike.dalessio@gmail.com')
11
10
  p.readme_file = 'README.rdoc'
@@ -37,7 +37,7 @@ module WWW
37
37
  # require 'rubygems'
38
38
  # require 'mechanize'
39
39
  # require 'logger'
40
- #
40
+ #
41
41
  # agent = WWW::Mechanize.new { |a| a.log = Logger.new("mech.log") }
42
42
  # agent.user_agent_alias = 'Mac Safari'
43
43
  # page = agent.get("http://www.google.com/")
@@ -48,8 +48,8 @@ module WWW
48
48
  class Mechanize
49
49
  ##
50
50
  # The version of Mechanize you are using.
51
- VERSION = '0.9.2'
52
-
51
+ VERSION = '0.9.3'
52
+
53
53
  ##
54
54
  # User Agent aliases
55
55
  AGENT_ALIASES = {
@@ -64,7 +64,7 @@ module WWW
64
64
  'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
65
65
  'Mechanize' => "WWW-Mechanize/#{VERSION} (http://rubyforge.org/projects/mechanize/)"
66
66
  }
67
-
67
+
68
68
  attr_accessor :cookie_jar
69
69
  attr_accessor :open_timeout, :read_timeout
70
70
  attr_accessor :user_agent
@@ -82,15 +82,21 @@ module WWW
82
82
  attr_accessor :history_added
83
83
  attr_accessor :scheme_handlers
84
84
  attr_accessor :redirection_limit
85
-
85
+
86
+ # A hash of custom request headers
87
+ attr_accessor :request_headers
88
+
89
+ # The HTML parser to be used when parsing documents
90
+ attr_accessor :html_parser
91
+
86
92
  attr_reader :history
87
93
  attr_reader :pluggable_parser
88
-
94
+
89
95
  alias :follow_redirect? :redirect_ok
90
-
96
+
91
97
  @html_parser = Nokogiri::HTML
92
98
  class << self; attr_accessor :html_parser, :log end
93
-
99
+
94
100
  def initialize
95
101
  # attr_accessors
96
102
  @cookie_jar = CookieJar.new
@@ -110,28 +116,29 @@ module WWW
110
116
  @key = nil # OpenSSL Private Key
111
117
  @pass = nil # OpenSSL Password
112
118
  @redirect_ok = true # Should we follow redirects?
113
-
119
+
114
120
  # attr_readers
115
121
  @history = WWW::Mechanize::History.new
116
122
  @pluggable_parser = PluggableParser.new
117
-
123
+
118
124
  # Auth variables
119
125
  @user = nil # Auth User
120
126
  @password = nil # Auth Password
121
127
  @digest = nil # DigestAuth Digest
122
128
  @auth_hash = {} # Keep track of urls for sending auth
123
-
129
+ @request_headers= {} # A hash of request headers to be used
130
+
124
131
  # Proxy settings
125
132
  @proxy_addr = nil
126
133
  @proxy_pass = nil
127
134
  @proxy_port = nil
128
135
  @proxy_user = nil
129
-
136
+
130
137
  @conditional_requests = true
131
-
138
+
132
139
  @follow_meta_refresh = false
133
140
  @redirection_limit = 20
134
-
141
+
135
142
  # Connection Cache & Keep alive
136
143
  @connection_cache = {}
137
144
  @keep_alive_time = 300
@@ -149,7 +156,9 @@ module WWW
149
156
 
150
157
  @pre_connect_hook = Chain::PreConnectHook.new
151
158
  @post_connect_hook = Chain::PostConnectHook.new
152
-
159
+
160
+ @html_parser = self.class.html_parser
161
+
153
162
  yield self if block_given?
154
163
  end
155
164
 
@@ -165,31 +174,31 @@ module WWW
165
174
  def post_connect_hooks
166
175
  @post_connect_hook.hooks
167
176
  end
168
-
177
+
169
178
  # Sets the proxy address, port, user, and password
170
179
  # +addr+ should be a host, with no "http://"
171
180
  def set_proxy(addr, port, user = nil, pass = nil)
172
181
  @proxy_addr, @proxy_port, @proxy_user, @proxy_pass = addr, port, user, pass
173
182
  end
174
-
183
+
175
184
  # Set the user agent for the Mechanize object.
176
185
  # See AGENT_ALIASES
177
186
  def user_agent_alias=(al)
178
187
  self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias")
179
188
  end
180
-
189
+
181
190
  # Returns a list of cookies stored in the cookie jar.
182
191
  def cookies
183
192
  @cookie_jar.to_a
184
193
  end
185
-
194
+
186
195
  # Sets the user and password to be used for authentication.
187
196
  def auth(user, password)
188
197
  @user = user
189
198
  @password = password
190
199
  end
191
200
  alias :basic_auth :auth
192
-
201
+
193
202
  # Fetches the URL passed in and returns a page.
194
203
  def get(options, parameters = [], referer = nil)
195
204
  unless options.is_a? Hash
@@ -272,12 +281,12 @@ module WWW
272
281
  yield page if block_given?
273
282
  page
274
283
  end
275
-
284
+
276
285
  # Fetch a file and return the contents of the file.
277
286
  def get_file(url)
278
287
  get(url).body
279
288
  end
280
-
289
+
281
290
  # Clicks the WWW::Mechanize::Link object passed in and returns the
282
291
  # page fetched.
283
292
  def click(link)
@@ -286,13 +295,13 @@ module WWW
286
295
  (link['href'] || link['src'])
287
296
  get(:url => href, :referer => (referer || current_page()))
288
297
  end
289
-
298
+
290
299
  # Equivalent to the browser back button. Returns the most recent page
291
300
  # visited.
292
301
  def back
293
302
  @history.pop
294
303
  end
295
-
304
+
296
305
  # Posts to the given URL wht the query parameters passed in. Query
297
306
  # parameters can be passed as a hash, or as an array of arrays.
298
307
  # Example:
@@ -307,7 +316,7 @@ module WWW
307
316
  end
308
317
  node['method'] = 'POST'
309
318
  node['enctype'] = 'application/x-www-form-urlencoded'
310
-
319
+
311
320
  form = Form.new(node)
312
321
  query.each { |k,v|
313
322
  if v.is_a?(IO)
@@ -321,7 +330,7 @@ module WWW
321
330
  }
322
331
  post_form(url, form)
323
332
  end
324
-
333
+
325
334
  # Submit a form with an optional button.
326
335
  # Without a button:
327
336
  # page = agent.get('http://example.com')
@@ -343,17 +352,17 @@ module WWW
343
352
  raise "unsupported method: #{form.method.upcase}"
344
353
  end
345
354
  end
346
-
355
+
347
356
  # Returns the current page loaded by Mechanize
348
357
  def current_page
349
358
  @history.last
350
359
  end
351
-
360
+
352
361
  # Returns whether or not a url has been visited
353
362
  def visited?(url)
354
363
  ! visited_page(url).nil?
355
364
  end
356
-
365
+
357
366
  # Returns a visited page for the url passed in, otherwise nil
358
367
  def visited_page(url)
359
368
  if url.respond_to? :href
@@ -361,7 +370,7 @@ module WWW
361
370
  end
362
371
  @history.visited_page(resolve(url))
363
372
  end
364
-
373
+
365
374
  # Runs given block, then resets the page history as it was before. self is
366
375
  # given as a parameter to the block. Returns the value of the block.
367
376
  def transact
@@ -372,11 +381,11 @@ module WWW
372
381
  @history = history_backup
373
382
  end
374
383
  end
375
-
384
+
376
385
  alias :page :current_page
377
386
 
378
387
  private
379
-
388
+
380
389
  def resolve(url, referer = current_page())
381
390
  hash = { :uri => url, :referer => referer }
382
391
  chain = Chain.new([
@@ -384,15 +393,15 @@ module WWW
384
393
  ]).handle(hash)
385
394
  hash[:uri].to_s
386
395
  end
387
-
396
+
388
397
  def post_form(url, form, headers = {})
389
398
  cur_page = form.page || current_page ||
390
399
  Page.new( nil, {'content-type'=>'text/html'})
391
-
400
+
392
401
  request_data = form.request_data
393
-
402
+
394
403
  log.debug("query: #{ request_data.inspect }") if log
395
-
404
+
396
405
  # fetch the page
397
406
  page = fetch_page( :uri => url,
398
407
  :referer => cur_page,
@@ -402,10 +411,10 @@ module WWW
402
411
  'Content-Type' => form.enctype,
403
412
  'Content-Length' => request_data.size.to_s,
404
413
  }.merge(headers))
405
- add_to_history(page)
414
+ add_to_history(page)
406
415
  page
407
416
  end
408
-
417
+
409
418
  # uri is an absolute URI
410
419
  def fetch_page(params)
411
420
  options = {
@@ -435,10 +444,13 @@ module WWW
435
444
  ),
436
445
  Chain::SSLResolver.new(@ca_file, @verify_callback, @cert, @key, @pass),
437
446
  Chain::AuthHeaders.new(@auth_hash, @user, @password, @digest),
438
- Chain::HeaderResolver.new( @keep_alive,
439
- @keep_alive_time,
440
- @cookie_jar,
441
- @user_agent),
447
+ Chain::HeaderResolver.new(
448
+ @keep_alive,
449
+ @keep_alive_time,
450
+ @cookie_jar,
451
+ @user_agent,
452
+ {}
453
+ ),
442
454
  Chain::CustomHeaders.new,
443
455
  @pre_connect_hook,
444
456
  ])
@@ -499,37 +511,36 @@ module WWW
499
511
  page = options[:page]
500
512
 
501
513
  log.info("status: #{ page.code }") if log
502
-
514
+
503
515
  if follow_meta_refresh
504
- redirect_uri = nil
516
+ redirect_uri = nil
517
+ referer = page
505
518
  if (page.respond_to?(:meta) && (redirect = page.meta.first))
506
519
  redirect_uri = redirect.uri.to_s
520
+ sleep redirect.node['delay'].to_f
521
+ referer = Page.new(nil, {'content-type'=>'text/html'})
507
522
  elsif refresh = response['refresh']
508
- parsed_refresh = refresh.match(/^\s*(\d+\.?\d*);\s*(url|URL)=(\S*)\s*$/)
509
- raise StandardError, "Invalid refresh http header" unless parsed_refresh
510
- delay = parsed_refresh[1]
511
- location = parsed_refresh[3]
512
- location = "http://#{uri.host}#{location}" unless location.include?("http")
523
+ delay, redirect_uri = Page::Meta.parse(refresh, uri)
524
+ raise StandardError, "Invalid refresh http header" unless delay
513
525
  if redirects + 1 > redirection_limit
514
526
  raise RedirectLimitReachedError.new(page, redirects)
515
527
  end
516
- sleep delay.to_i
517
- redirect_uri = location
528
+ sleep delay.to_f
518
529
  end
519
530
  if redirect_uri
520
531
  @history.push(page, page.uri)
521
532
  return fetch_page(
522
533
  :uri => redirect_uri,
523
- :referer => page,
534
+ :referer => referer,
524
535
  :params => [],
525
536
  :verb => :get,
526
537
  :redirects => redirects + 1
527
538
  )
528
539
  end
529
540
  end
530
-
541
+
531
542
  return page if res_klass <= Net::HTTPSuccess
532
-
543
+
533
544
  if res_klass == Net::HTTPNotModified
534
545
  log.debug("Got cached page") if log
535
546
  return visited_page(uri) || page
@@ -566,10 +577,10 @@ module WWW
566
577
  :headers => options[:headers]
567
578
  )
568
579
  end
569
-
580
+
570
581
  raise ResponseCodeError.new(page), "Unhandled response", caller
571
582
  end
572
-
583
+
573
584
  def add_to_history(page)
574
585
  @history.push(page, resolve(page.uri))
575
586
  history_added.call(page) if history_added
@@ -23,6 +23,11 @@ module WWW
23
23
  body.rewind
24
24
  body.read(10)
25
25
  Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.read)
26
+ rescue Zlib::DataError
27
+ if Mechanize.log
28
+ Mechanize.log.error("Caught a Zlib::DataError, unable to decode page: #{$!.to_s}")
29
+ end
30
+ ''
26
31
  end
27
32
  else
28
33
  ''
@@ -8,11 +8,11 @@ module WWW
8
8
  request = params[:request]
9
9
  params[:headers].each do |k,v|
10
10
  case k
11
- when :etag then request.add_field("ETag", v)
12
- when :if_modified_since then request.add_field("If-Modified-Since", v)
11
+ when :etag then request["ETag"] = v
12
+ when :if_modified_since then request["If-Modified-Since"] = v
13
13
  else
14
14
  raise ArgumentError.new("unknown header symbol #{k}") if k.is_a? Symbol
15
- request.add_field(k,v)
15
+ request[k] = v
16
16
  end
17
17
  end
18
18
  super
@@ -3,11 +3,12 @@ module WWW
3
3
  class Chain
4
4
  class HeaderResolver
5
5
  include WWW::Handler
6
- def initialize(keep_alive, keep_alive_time, cookie_jar, user_agent)
6
+ def initialize(keep_alive, keep_alive_time, cookie_jar, user_agent, headers)
7
7
  @keep_alive = keep_alive
8
8
  @keep_alive_time = keep_alive_time
9
9
  @cookie_jar = cookie_jar
10
10
  @user_agent = user_agent
11
+ @headers = headers
11
12
  end
12
13
 
13
14
  def handle(ctx, params)
@@ -40,6 +41,10 @@ module WWW
40
41
 
41
42
  # Add User-Agent header to request
42
43
  request['User-Agent'] = @user_agent if @user_agent
44
+
45
+ @headers.each do |k,v|
46
+ request[k] = v
47
+ end if request
43
48
  super
44
49
  end
45
50
  end
@@ -27,7 +27,6 @@ module WWW
27
27
  end
28
28
  end
29
29
 
30
-
31
30
  if page.is_a?(Page) && page.body =~ /Set-Cookie/n
32
31
  page.search('//meta[@http-equiv="Set-Cookie"]').each do |meta|
33
32
  Cookie::parse(uri, meta['content']) { |c|