mechanize 0.6.8 → 0.6.9
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- data/CHANGELOG.txt +12 -0
- data/Manifest.txt +2 -0
- data/lib/mechanize.rb +89 -9
- data/lib/mechanize/page.rb +6 -0
- data/test/htdocs/tc_follow_meta.html +8 -0
- data/test/tc_follow_meta.rb +25 -0
- data/test/tc_links.rb +1 -1
- data/test/tc_mech.rb +11 -0
- data/test/tc_pluggable_parser.rb +2 -2
- data/test/test_all.rb +1 -0
- metadata +16 -5
data/CHANGELOG.txt
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
= Mechanize CHANGELOG
|
2
2
|
|
3
|
+
== 0.6.9
|
4
|
+
|
5
|
+
* Updating UTF-8 support for urls
|
6
|
+
* Adding AREA tags to the links list.
|
7
|
+
http://rubyforge.org/pipermail/mechanize-users/2007-May/000140.html
|
8
|
+
* WWW::Mechanize#follow_meta_refresh will allow you to automatically follow
|
9
|
+
meta refresh tags. [#10032]
|
10
|
+
* Adding x-gzip to accepted content-encoding. Thanks Simon Strandgaard
|
11
|
+
http://rubyforge.org/tracker/index.php?func=detail&aid=11167&group_id=1453&atid=5711
|
12
|
+
* Added Digest Authentication support. Thanks to Ryan Davis and Eric Hodel,
|
13
|
+
you get a gold star!
|
14
|
+
|
3
15
|
== 0.6.8
|
4
16
|
|
5
17
|
* Keep alive can be shut off now with WWW::Mechanize#keep_alive
|
data/Manifest.txt
CHANGED
@@ -60,6 +60,7 @@ test/htdocs/relative/tc_relative_links.html
|
|
60
60
|
test/htdocs/tc_bad_links.html
|
61
61
|
test/htdocs/tc_checkboxes.html
|
62
62
|
test/htdocs/tc_encoded_links.html
|
63
|
+
test/htdocs/tc_follow_meta.html
|
63
64
|
test/htdocs/tc_form_action.html
|
64
65
|
test/htdocs/tc_links.html
|
65
66
|
test/htdocs/tc_no_attributes.html
|
@@ -77,6 +78,7 @@ test/tc_cookie_jar.rb
|
|
77
78
|
test/tc_cookies.rb
|
78
79
|
test/tc_encoded_links.rb
|
79
80
|
test/tc_errors.rb
|
81
|
+
test/tc_follow_meta.rb
|
80
82
|
test/tc_form_action.rb
|
81
83
|
test/tc_form_as_hash.rb
|
82
84
|
test/tc_form_button.rb
|
data/lib/mechanize.rb
CHANGED
@@ -28,6 +28,7 @@ require 'uri'
|
|
28
28
|
require 'webrick/httputils'
|
29
29
|
require 'zlib'
|
30
30
|
require 'stringio'
|
31
|
+
require 'digest/md5'
|
31
32
|
require 'mechanize/monkey_patch'
|
32
33
|
require 'mechanize/cookie'
|
33
34
|
require 'mechanize/errors'
|
@@ -63,7 +64,7 @@ class Mechanize
|
|
63
64
|
##
|
64
65
|
# The version of Mechanize you are using.
|
65
66
|
|
66
|
-
VERSION = '0.6.
|
67
|
+
VERSION = '0.6.9'
|
67
68
|
|
68
69
|
##
|
69
70
|
# User Agent aliases
|
@@ -92,12 +93,16 @@ class Mechanize
|
|
92
93
|
attr_accessor :keep_alive_time
|
93
94
|
attr_accessor :keep_alive
|
94
95
|
attr_accessor :conditional_requests
|
96
|
+
attr_accessor :follow_meta_refresh
|
95
97
|
|
96
98
|
attr_reader :history
|
97
99
|
attr_reader :pluggable_parser
|
98
100
|
|
99
101
|
alias :follow_redirect? :redirect_ok
|
100
102
|
|
103
|
+
@@nonce_count = -1
|
104
|
+
CNONCE = Digest::MD5.hexdigest("%x" % (Time.now.to_i + rand(65535)))
|
105
|
+
|
101
106
|
def initialize
|
102
107
|
# attr_accessors
|
103
108
|
@cookie_jar = CookieJar.new
|
@@ -116,9 +121,11 @@ class Mechanize
|
|
116
121
|
@history = WWW::Mechanize::History.new
|
117
122
|
@pluggable_parser = PluggableParser.new
|
118
123
|
|
119
|
-
#
|
120
|
-
@user = nil #
|
121
|
-
@password = nil #
|
124
|
+
# Auth variables
|
125
|
+
@user = nil # Auth User
|
126
|
+
@password = nil # Auth Password
|
127
|
+
@digest = nil # DigestAuth Digest
|
128
|
+
@auth_hash = {} # Keep track of urls for sending auth
|
122
129
|
|
123
130
|
# Proxy settings
|
124
131
|
@proxy_addr = nil
|
@@ -128,6 +135,8 @@ class Mechanize
|
|
128
135
|
|
129
136
|
@conditional_requests = true
|
130
137
|
|
138
|
+
@follow_meta_refresh = false
|
139
|
+
|
131
140
|
# Connection Cache & Keep alive
|
132
141
|
@connection_cache = {}
|
133
142
|
@keep_alive_time = 300
|
@@ -157,8 +166,12 @@ class Mechanize
|
|
157
166
|
|
158
167
|
# Sets the user and password to be used for basic authentication.
|
159
168
|
def basic_auth(user, password)
|
160
|
-
|
161
|
-
|
169
|
+
auth(user, password)
|
170
|
+
end
|
171
|
+
|
172
|
+
def auth(user, password)
|
173
|
+
@user = user
|
174
|
+
@password = password
|
162
175
|
end
|
163
176
|
|
164
177
|
# Fetches the URL passed in and returns a page.
|
@@ -309,15 +322,67 @@ class Mechanize
|
|
309
322
|
end
|
310
323
|
end
|
311
324
|
|
312
|
-
|
325
|
+
if( @auth_hash[uri.to_s] )
|
326
|
+
raise 'Please provide username and password' unless @user || @password
|
327
|
+
case @auth_hash[uri.to_s]
|
328
|
+
when :basic
|
329
|
+
request.basic_auth(@user, @password)
|
330
|
+
when :digest
|
331
|
+
@digest_response ||= nil
|
332
|
+
@digest_response = self.gen_auth_header(uri, @digest) if @digest
|
333
|
+
request.add_field('Authorization', @digest_response) if @digest_response
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
313
337
|
request
|
314
338
|
end
|
315
339
|
|
340
|
+
def gen_auth_header(uri, auth_header, is_IIS = false)
|
341
|
+
@@nonce_count += 1
|
342
|
+
|
343
|
+
user = @digest_user
|
344
|
+
password = @digest_password
|
345
|
+
|
346
|
+
auth_header =~ /^(\w+) (.*)/
|
347
|
+
|
348
|
+
params = {}
|
349
|
+
$2.gsub(/(\w+)="(.*?)"/) { params[$1] = $2 }
|
350
|
+
|
351
|
+
a_1 = "#{@user}:#{params['realm']}:#{@password}"
|
352
|
+
a_2 = "GET:#{uri.path}"
|
353
|
+
request_digest = ''
|
354
|
+
request_digest << Digest::MD5.hexdigest(a_1)
|
355
|
+
request_digest << ':' << params['nonce']
|
356
|
+
request_digest << ':' << ('%08x' % @@nonce_count)
|
357
|
+
request_digest << ':' << CNONCE
|
358
|
+
request_digest << ':' << params['qop']
|
359
|
+
request_digest << ':' << Digest::MD5.hexdigest(a_2)
|
360
|
+
|
361
|
+
header = ''
|
362
|
+
header << "Digest username=\"#{@user}\", "
|
363
|
+
header << "realm=\"#{params['realm']}\", "
|
364
|
+
if is_IIS then
|
365
|
+
header << "qop=\"#{params['qop']}\", "
|
366
|
+
else
|
367
|
+
header << "qop=#{params['qop']}, "
|
368
|
+
end
|
369
|
+
header << "uri=\"#{uri.path}\", "
|
370
|
+
header << "nonce=\"#{params['nonce']}\", "
|
371
|
+
header << "nc=#{'%08x' % @@nonce_count}, "
|
372
|
+
header << "cnonce=\"#{CNONCE}\", "
|
373
|
+
header << "response=\"#{Digest::MD5.hexdigest(request_digest)}\""
|
374
|
+
|
375
|
+
return header
|
376
|
+
end
|
377
|
+
|
316
378
|
private
|
317
379
|
|
318
380
|
def to_absolute_uri(url, cur_page=current_page())
|
319
381
|
unless url.is_a? URI
|
320
|
-
url = url.to_s.strip
|
382
|
+
url = url.to_s.strip.gsub(/[^#{0.chr}-#{125.chr}]/) { |match|
|
383
|
+
sprintf('%%%X', match.unpack($KCODE == 'UTF8' ? 'U' : 'c')[0])
|
384
|
+
}
|
385
|
+
|
321
386
|
url = URI.parse(
|
322
387
|
Util.html_unescape(
|
323
388
|
url.split(/%[0-9A-Fa-f]{2}/).zip(
|
@@ -467,6 +532,8 @@ class Mechanize
|
|
467
532
|
when 'gzip'
|
468
533
|
log.debug('gunzip body') if log
|
469
534
|
Zlib::GzipReader.new(body).read
|
535
|
+
when 'x-gzip'
|
536
|
+
body.read
|
470
537
|
else
|
471
538
|
raise 'Unsupported content encoding'
|
472
539
|
end
|
@@ -509,6 +576,10 @@ class Mechanize
|
|
509
576
|
|
510
577
|
res_klass = Net::HTTPResponse::CODE_TO_OBJ[page.code.to_s]
|
511
578
|
|
579
|
+
if follow_meta_refresh && (redirect = page.meta.first)
|
580
|
+
return redirect.click
|
581
|
+
end
|
582
|
+
|
512
583
|
return page if res_klass <= Net::HTTPSuccess
|
513
584
|
|
514
585
|
if res_klass == Net::HTTPNotModified
|
@@ -522,6 +593,15 @@ class Mechanize
|
|
522
593
|
page = fetch_page(abs_uri, fetch_request(abs_uri), page)
|
523
594
|
@history.push(page, from_uri)
|
524
595
|
return page
|
596
|
+
elsif res_klass <= Net::HTTPUnauthorized
|
597
|
+
if response['www-authenticate'] =~ /Digest/i
|
598
|
+
@auth_hash[uri.to_s] = :digest
|
599
|
+
@digest = response['www-authenticate']
|
600
|
+
return fetch_page(uri, fetch_request(uri), cur_page, request_data)
|
601
|
+
else
|
602
|
+
@auth_hash[uri.to_s] = :basic
|
603
|
+
return fetch_page(uri, fetch_request(uri), cur_page, request_data)
|
604
|
+
end
|
525
605
|
end
|
526
606
|
|
527
607
|
raise ResponseCodeError.new(page), "Unhandled response", caller
|
@@ -555,7 +635,7 @@ class Mechanize
|
|
555
635
|
$1.to_i
|
556
636
|
end
|
557
637
|
|
558
|
-
number ? (number.
|
638
|
+
number ? ([number].pack('U') rescue match) : match
|
559
639
|
}
|
560
640
|
end
|
561
641
|
end
|
data/lib/mechanize/page.rb
CHANGED
@@ -54,6 +54,7 @@ module WWW
|
|
54
54
|
parse_html if @body && @watch_for_set
|
55
55
|
end
|
56
56
|
|
57
|
+
# Find a form with +name+. Form will be yeilded if a block is given.
|
57
58
|
def form(name)
|
58
59
|
f = forms.name(name).first
|
59
60
|
yield f if block_given?
|
@@ -87,6 +88,11 @@ module WWW
|
|
87
88
|
@links << Link.new(node, @mech, self)
|
88
89
|
end
|
89
90
|
|
91
|
+
# Find all the 'area' tags
|
92
|
+
(@parser/'area').each do |node|
|
93
|
+
@links << Link.new(node, @mech, self)
|
94
|
+
end
|
95
|
+
|
90
96
|
# Find all 'meta' tags
|
91
97
|
(@parser/'meta').each do |node|
|
92
98
|
next unless node['http-equiv']
|
@@ -0,0 +1,25 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'rubygems'
|
5
|
+
require 'mechanize'
|
6
|
+
require 'test_includes'
|
7
|
+
|
8
|
+
class FollowMetaTest < Test::Unit::TestCase
|
9
|
+
include TestMethods
|
10
|
+
|
11
|
+
def setup
|
12
|
+
@agent = WWW::Mechanize.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_follow_meta
|
16
|
+
page = @agent.get('http://localhost/tc_follow_meta.html')
|
17
|
+
assert_equal('http://localhost/tc_follow_meta.html', page.uri.to_s)
|
18
|
+
assert_equal(1, page.meta.length)
|
19
|
+
|
20
|
+
@agent.follow_meta_refresh = true
|
21
|
+
page = @agent.get('http://localhost/tc_follow_meta.html')
|
22
|
+
assert_equal('http://localhost/index.html', page.uri.to_s)
|
23
|
+
assert_equal(3, @agent.history.length)
|
24
|
+
end
|
25
|
+
end
|
data/test/tc_links.rb
CHANGED
data/test/tc_mech.rb
CHANGED
@@ -20,6 +20,17 @@ class TestMechMethods < Test::Unit::TestCase
|
|
20
20
|
assert_nothing_raised {
|
21
21
|
@agent.get('http://localhost/?a=b&b=c&c=d')
|
22
22
|
}
|
23
|
+
assert_nothing_raised {
|
24
|
+
@agent.get("http://localhost/?a=#{[0xd6].pack('U')}")
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_kcode_url
|
29
|
+
$KCODE = 'u'
|
30
|
+
page = @agent.get("http://localhost/?a=#{[0xd6].pack('U')}")
|
31
|
+
assert_not_nil(page)
|
32
|
+
assert_equal('http://localhost/?a=%D6', page.uri.to_s)
|
33
|
+
$KCODE = 'NONE'
|
23
34
|
end
|
24
35
|
|
25
36
|
def test_history
|
data/test/tc_pluggable_parser.rb
CHANGED
@@ -63,7 +63,7 @@ class PluggableParserTest < Test::Unit::TestCase
|
|
63
63
|
@agent.pluggable_parser.html = Filter
|
64
64
|
page = @agent.get("http://localhost:#{PORT}/find_link.html")
|
65
65
|
assert_kind_of(Filter, page)
|
66
|
-
assert_equal(
|
66
|
+
assert_equal(19, page.links.length)
|
67
67
|
assert_not_nil(page.links.text('Net::DAAP::Client').first)
|
68
68
|
assert_equal(1, page.links.text('Net::DAAP::Client').length)
|
69
69
|
end
|
@@ -74,7 +74,7 @@ class PluggableParserTest < Test::Unit::TestCase
|
|
74
74
|
assert_kind_of(Class, @agent.pluggable_parser['text/html'])
|
75
75
|
assert_equal(Filter, @agent.pluggable_parser['text/html'])
|
76
76
|
assert_kind_of(Filter, page)
|
77
|
-
assert_equal(
|
77
|
+
assert_equal(19, page.links.length)
|
78
78
|
assert_not_nil(page.links.text('Net::DAAP::Client').first)
|
79
79
|
assert_equal(1, page.links.text('Net::DAAP::Client').length)
|
80
80
|
end
|
data/test/test_all.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: mechanize
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.6.
|
7
|
-
date: 2007-
|
6
|
+
version: 0.6.9
|
7
|
+
date: 2007-06-24 00:00:00 -07:00
|
8
8
|
summary: Mechanize provides automated web-browsing
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -95,6 +95,7 @@ files:
|
|
95
95
|
- test/htdocs/tc_bad_links.html
|
96
96
|
- test/htdocs/tc_checkboxes.html
|
97
97
|
- test/htdocs/tc_encoded_links.html
|
98
|
+
- test/htdocs/tc_follow_meta.html
|
98
99
|
- test/htdocs/tc_form_action.html
|
99
100
|
- test/htdocs/tc_links.html
|
100
101
|
- test/htdocs/tc_no_attributes.html
|
@@ -112,6 +113,7 @@ files:
|
|
112
113
|
- test/tc_cookies.rb
|
113
114
|
- test/tc_encoded_links.rb
|
114
115
|
- test/tc_errors.rb
|
116
|
+
- test/tc_follow_meta.rb
|
115
117
|
- test/tc_form_action.rb
|
116
118
|
- test/tc_form_as_hash.rb
|
117
119
|
- test/tc_form_button.rb
|
@@ -153,8 +155,17 @@ files:
|
|
153
155
|
- test/test_servlets.rb
|
154
156
|
test_files:
|
155
157
|
- test/test_all.rb
|
156
|
-
rdoc_options:
|
157
|
-
|
158
|
+
rdoc_options:
|
159
|
+
- "--main"
|
160
|
+
- README.txt
|
161
|
+
extra_rdoc_files:
|
162
|
+
- CHANGELOG.txt
|
163
|
+
- EXAMPLES.txt
|
164
|
+
- GUIDE.txt
|
165
|
+
- LICENSE.txt
|
166
|
+
- Manifest.txt
|
167
|
+
- NOTES.txt
|
168
|
+
- README.txt
|
158
169
|
executables: []
|
159
170
|
extensions: []
|
160
171
|
requirements: []
|
@@ -177,5 +188,5 @@ dependencies:
|
|
177
188
|
-
|
178
189
|
- ">="
|
179
190
|
- !ruby/object:Gem::Version
|
180
|
-
version: 1.2.
|
191
|
+
version: 1.2.1
|
181
192
|
version:
|