mechanize 1.0.1.beta.20110107104205 → 2.0.pre.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (89) hide show
  1. data.tar.gz.sig +2 -0
  2. data/{lib/mechanize/chain/post_connect_hook.rb → .gemtest} +0 -0
  3. data/CHANGELOG.rdoc +51 -6
  4. data/EXAMPLES.rdoc +5 -3
  5. data/GUIDE.rdoc +72 -32
  6. data/LICENSE.rdoc +20 -340
  7. data/Manifest.txt +20 -27
  8. data/README.rdoc +12 -9
  9. data/Rakefile +5 -2
  10. data/examples/spider.rb +13 -2
  11. data/lib/mechanize.rb +545 -267
  12. data/lib/mechanize/content_type_error.rb +1 -1
  13. data/lib/mechanize/cookie.rb +72 -65
  14. data/lib/mechanize/cookie_jar.rb +197 -148
  15. data/lib/mechanize/element_matcher.rb +35 -0
  16. data/lib/mechanize/file.rb +3 -1
  17. data/lib/mechanize/file_connection.rb +17 -0
  18. data/lib/mechanize/file_request.rb +26 -0
  19. data/lib/mechanize/file_response.rb +61 -47
  20. data/lib/mechanize/form.rb +57 -58
  21. data/lib/mechanize/form/image_button.rb +2 -3
  22. data/lib/mechanize/form/multi_select_list.rb +71 -55
  23. data/lib/mechanize/form/select_list.rb +34 -62
  24. data/lib/mechanize/monkey_patch.rb +13 -11
  25. data/lib/mechanize/page.rb +277 -270
  26. data/lib/mechanize/page/image.rb +6 -2
  27. data/lib/mechanize/redirect_limit_reached_error.rb +1 -1
  28. data/lib/mechanize/redirect_not_get_or_head_error.rb +1 -1
  29. data/lib/mechanize/response_code_error.rb +3 -3
  30. data/lib/mechanize/unsupported_scheme_error.rb +1 -1
  31. data/lib/mechanize/uri_resolver.rb +82 -0
  32. data/lib/mechanize/util.rb +76 -60
  33. data/test/helper.rb +35 -5
  34. data/test/htdocs/dir with spaces/foo.html +1 -0
  35. data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
  36. data/test/htdocs/tc_base_images.html +10 -0
  37. data/test/htdocs/tc_images.html +8 -0
  38. data/test/htdocs/test_click.html +11 -0
  39. data/test/servlets.rb +3 -2
  40. data/test/test_authenticate.rb +5 -5
  41. data/test/test_errors.rb +8 -8
  42. data/test/test_follow_meta.rb +4 -4
  43. data/test/test_form_as_hash.rb +4 -4
  44. data/test/test_forms.rb +3 -7
  45. data/test/test_hash_api.rb +2 -2
  46. data/test/test_headers.rb +1 -1
  47. data/test/test_images.rb +19 -0
  48. data/test/test_mech.rb +6 -6
  49. data/test/test_mechanize.rb +687 -0
  50. data/test/{test_cookie_class.rb → test_mechanize_cookie.rb} +52 -45
  51. data/test/test_mechanize_cookie_jar.rb +400 -0
  52. data/test/test_mechanize_file.rb +7 -1
  53. data/test/test_mechanize_file_request.rb +19 -0
  54. data/test/test_mechanize_file_response.rb +21 -0
  55. data/test/test_mechanize_form_image_button.rb +12 -0
  56. data/test/test_mechanize_page.rb +165 -0
  57. data/test/test_mechanize_uri_resolver.rb +29 -0
  58. data/test/{test_util.rb → test_mechanize_util.rb} +1 -1
  59. data/test/test_multi_select.rb +12 -0
  60. data/test/test_post_form.rb +7 -0
  61. data/test/test_redirect_verb_handling.rb +6 -6
  62. data/test/test_scheme.rb +0 -7
  63. data/test/test_verbs.rb +3 -3
  64. metadata +106 -72
  65. metadata.gz.sig +0 -0
  66. data/lib/mechanize/chain.rb +0 -36
  67. data/lib/mechanize/chain/auth_headers.rb +0 -78
  68. data/lib/mechanize/chain/body_decoding_handler.rb +0 -50
  69. data/lib/mechanize/chain/connection_resolver.rb +0 -28
  70. data/lib/mechanize/chain/custom_headers.rb +0 -21
  71. data/lib/mechanize/chain/handler.rb +0 -9
  72. data/lib/mechanize/chain/header_resolver.rb +0 -48
  73. data/lib/mechanize/chain/parameter_resolver.rb +0 -22
  74. data/lib/mechanize/chain/pre_connect_hook.rb +0 -20
  75. data/lib/mechanize/chain/request_resolver.rb +0 -31
  76. data/lib/mechanize/chain/response_body_parser.rb +0 -36
  77. data/lib/mechanize/chain/response_header_handler.rb +0 -34
  78. data/lib/mechanize/chain/response_reader.rb +0 -39
  79. data/lib/mechanize/chain/ssl_resolver.rb +0 -40
  80. data/lib/mechanize/chain/uri_resolver.rb +0 -75
  81. data/test/chain/test_argument_validator.rb +0 -14
  82. data/test/chain/test_auth_headers.rb +0 -25
  83. data/test/chain/test_custom_headers.rb +0 -18
  84. data/test/chain/test_header_resolver.rb +0 -27
  85. data/test/chain/test_parameter_resolver.rb +0 -35
  86. data/test/chain/test_request_resolver.rb +0 -29
  87. data/test/chain/test_response_reader.rb +0 -24
  88. data/test/test_cookie_jar.rb +0 -324
  89. data/test/test_page.rb +0 -124
Binary file
@@ -1,36 +0,0 @@
1
- require 'mechanize/chain/handler'
2
- require 'mechanize/chain/uri_resolver'
3
- require 'mechanize/chain/parameter_resolver'
4
- require 'mechanize/chain/request_resolver'
5
- require 'mechanize/chain/custom_headers'
6
- require 'mechanize/chain/connection_resolver'
7
- require 'mechanize/chain/ssl_resolver'
8
- require 'mechanize/chain/pre_connect_hook'
9
- require 'mechanize/chain/auth_headers'
10
- require 'mechanize/chain/header_resolver'
11
- require 'mechanize/chain/response_body_parser'
12
- require 'mechanize/chain/response_header_handler'
13
- require 'mechanize/chain/response_reader'
14
- require 'mechanize/chain/body_decoding_handler'
15
-
16
- class Mechanize
17
- class Chain
18
- attr_accessor :http
19
-
20
- def initialize(list, http = nil)
21
- @http = http
22
- @list = list
23
- @list.each { |l| l.chain = self }
24
- end
25
-
26
- def handle(request)
27
- @list.first.handle(self, request)
28
- end
29
-
30
- def pass(obj, request)
31
- next_link = @list[@list.index(obj) + 1]
32
- next_link.handle(self, request) if next_link
33
- end
34
- end
35
- end
36
-
@@ -1,78 +0,0 @@
1
- class Mechanize
2
- class Chain
3
- class AuthHeaders
4
- include Mechanize::Handler
5
-
6
- @@nonce_count = Hash.new(0)
7
- CNONCE = Digest::MD5.hexdigest("%x" % (Time.now.to_i + rand(65535)))
8
-
9
- def initialize(auth_hash, user, password, digest)
10
- @auth_hash = auth_hash
11
- @user = user
12
- @password = password
13
- @digest = digest
14
- end
15
-
16
- def handle(ctx, params)
17
- uri = params[:uri]
18
- request = params[:request]
19
-
20
- if( @auth_hash[uri.host] )
21
- case @auth_hash[uri.host]
22
- when :basic
23
- request.basic_auth(@user, @password)
24
- when :iis_digest
25
- digest_response = self.gen_auth_header(uri,request, @digest, true)
26
- request['Authorization'] = digest_response
27
- when :digest
28
- if @digest
29
- digest_response = self.gen_auth_header(uri,request, @digest)
30
- request['Authorization'] = digest_response
31
- end
32
- end
33
- end
34
- super
35
- end
36
-
37
- def gen_auth_header(uri, request, auth_header, is_IIS = false)
38
- auth_header =~ /^(\w+) (.*)/
39
-
40
- params = {}
41
- $2.gsub(/(\w+)=("[^"]*"|[^,]*)/) {
42
- params[$1] = $2.gsub(/^"/, '').gsub(/"$/, '')
43
- }
44
-
45
- @@nonce_count[params['nonce']] += 1
46
-
47
- a_1 = "#{@user}:#{params['realm']}:#{@password}"
48
- a_2 = "#{request.method}:#{uri.path}"
49
- request_digest = ''
50
- request_digest << Digest::MD5.hexdigest(a_1)
51
- request_digest << ':' << params['nonce']
52
- request_digest << ':' << ('%08x' % @@nonce_count[params['nonce']])
53
- request_digest << ':' << CNONCE
54
- request_digest << ':' << params['qop']
55
- request_digest << ':' << Digest::MD5.hexdigest(a_2)
56
-
57
- header = ''
58
- header << "Digest username=\"#{@user}\", "
59
- if is_IIS then
60
- header << "qop=\"#{params['qop']}\", "
61
- else
62
- header << "qop=#{params['qop']}, "
63
- end
64
- header << "uri=\"#{uri.path}\", "
65
- header << %w{ algorithm opaque nonce realm }.map { |field|
66
- next unless params[field]
67
- "#{field}=\"#{params[field]}\""
68
- }.compact.join(', ')
69
-
70
- header << ", nc=#{'%08x' % @@nonce_count[params['nonce']]}, "
71
- header << "cnonce=\"#{CNONCE}\", "
72
- header << "response=\"#{Digest::MD5.hexdigest(request_digest)}\""
73
-
74
- return header
75
- end
76
- end
77
- end
78
- end
@@ -1,50 +0,0 @@
1
- class Mechanize
2
- class Chain
3
- class BodyDecodingHandler
4
- include Mechanize::Handler
5
-
6
- def handle(ctx, options)
7
- body = options[:response_body]
8
- response = options[:response]
9
-
10
- options[:response_body] =
11
- if encoding = response['Content-Encoding']
12
- case encoding.downcase
13
- when 'gzip'
14
- Mechanize.log.debug('gunzip body') if Mechanize.log
15
- if response['Content-Length'].to_i > 0 || body.length > 0
16
- begin
17
- Zlib::GzipReader.new(body).read
18
- rescue Zlib::BufError, Zlib::GzipFile::Error
19
- if Mechanize.log
20
- Mechanize.log.error('Caught a Zlib::BufError')
21
- end
22
- body.rewind
23
- body.read(10)
24
- Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.read)
25
- rescue Zlib::DataError
26
- if Mechanize.log
27
- Mechanize.log.error("Caught a Zlib::DataError, unable to decode page: #{$!.to_s}")
28
- end
29
- ''
30
- end
31
- else
32
- ''
33
- end
34
- when 'x-gzip'
35
- body.read
36
- when '7bit'
37
- body.read
38
- when 'none'
39
- body.read
40
- else
41
- raise 'Unsupported content encoding'
42
- end
43
- else
44
- body.read
45
- end
46
- super
47
- end
48
- end
49
- end
50
- end
@@ -1,28 +0,0 @@
1
- class Mechanize
2
- class Chain
3
- class ConnectionResolver
4
- include Mechanize::Handler
5
-
6
- def handle(ctx, params)
7
- uri = params[:uri]
8
- http_obj = nil
9
-
10
- case uri.scheme.downcase
11
- when 'http', 'https' then
12
- http_obj = ctx.http
13
- when 'file' then
14
- http_obj = Object.new
15
- class << http_obj
16
- def request(uri, request)
17
- yield FileResponse.new(CGI.unescape(uri.path))
18
- end
19
- end
20
- end
21
-
22
- params[:connection] = http_obj
23
-
24
- super
25
- end
26
- end
27
- end
28
- end
@@ -1,21 +0,0 @@
1
- class Mechanize
2
- class Chain
3
- class CustomHeaders
4
- include Mechanize::Handler
5
-
6
- def handle(ctx, params)
7
- request = params[:request]
8
- params[:headers].each do |k,v|
9
- case k
10
- when :etag then request["ETag"] = v
11
- when :if_modified_since then request["If-Modified-Since"] = v
12
- else
13
- raise ArgumentError.new("unknown header symbol #{k}") if k.is_a? Symbol
14
- request[k] = v
15
- end
16
- end
17
- super
18
- end
19
- end
20
- end
21
- end
@@ -1,9 +0,0 @@
1
- class Mechanize
2
- module Handler
3
- attr_accessor :chain
4
-
5
- def handle(ctx, request)
6
- chain.pass(self, request)
7
- end
8
- end
9
- end
@@ -1,48 +0,0 @@
1
- class Mechanize
2
- class Chain
3
- class HeaderResolver
4
- include Mechanize::Handler
5
- def initialize(cookie_jar, user_agent, gzip_enabled, headers)
6
- @cookie_jar = cookie_jar
7
- @user_agent = user_agent
8
- @gzip_enabled = gzip_enabled
9
- @headers = headers
10
- end
11
-
12
- def handle(ctx, params)
13
- uri = params[:uri]
14
- referer = params[:referer]
15
- request = params[:request]
16
-
17
- if @gzip_enabled
18
- request['Accept-Encoding'] = 'gzip,identity'
19
- else
20
- request['Accept-Encoding'] = 'identity'
21
- end
22
- request['Accept-Language'] = 'en-us,en;q=0.5'
23
- host = "#{uri.host}#{[80, 443].include?(uri.port.to_i) ? '' : ':' + uri.port.to_s}"
24
- request['Host'] = host
25
- request['Accept-Charset'] = 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'
26
-
27
- unless @cookie_jar.empty?(uri)
28
- cookies = @cookie_jar.cookies(uri)
29
- cookie = cookies.length > 0 ? cookies.join("; ") : nil
30
- request.add_field('Cookie', cookie)
31
- end
32
-
33
- # Add Referer header to request except https => http
34
- if referer && referer.uri && (!(URI::HTTPS === referer.uri) or URI::HTTPS === uri)
35
- request['Referer'] = referer.uri.to_s
36
- end
37
-
38
- # Add User-Agent header to request
39
- request['User-Agent'] = @user_agent if @user_agent
40
-
41
- @headers.each do |k,v|
42
- request[k] = v
43
- end if request
44
- super
45
- end
46
- end
47
- end
48
- end
@@ -1,22 +0,0 @@
1
- class Mechanize
2
- class Chain
3
- class ParameterResolver
4
- include Mechanize::Handler
5
-
6
- def handle(ctx, params)
7
- parameters = params[:params]
8
- uri = params[:uri]
9
- case params[:verb]
10
- when :head, :get, :delete, :trace
11
- if parameters and parameters.length > 0
12
- uri.query ||= ''
13
- uri.query << '&' if uri.query.length > 0
14
- uri.query << Util.build_query_string(parameters)
15
- end
16
- params[:params] = nil
17
- end
18
- super
19
- end
20
- end
21
- end
22
- end
@@ -1,20 +0,0 @@
1
- class Mechanize
2
- class Chain
3
- class PreConnectHook
4
- include Mechanize::Handler
5
-
6
- attr_accessor :hooks
7
- def initialize
8
- @hooks = []
9
- end
10
-
11
- def handle(ctx, params)
12
- @hooks.each { |hook| hook.call(params) }
13
- super
14
- end
15
- end
16
-
17
- class PostConnectHook < PreConnectHook
18
- end
19
- end
20
- end
@@ -1,31 +0,0 @@
1
- class Mechanize
2
- class Chain
3
- class RequestResolver
4
- include Mechanize::Handler
5
-
6
- def handle(ctx, params)
7
- uri = params[:uri]
8
- if %w{ http https }.include?(uri.scheme.downcase)
9
- klass = Net::HTTP.const_get(params[:verb].to_s.capitalize)
10
- params[:request] ||= klass.new(uri.request_uri)
11
- params[:request].body = params[:params].first if params[:params]
12
- end
13
-
14
- if %w{ file }.include?(uri.scheme.downcase)
15
- o = Struct.new(:uri).new(uri)
16
- class << o
17
- def add_field(*args); end
18
- alias :[]= :add_field
19
- def path
20
- uri.path
21
- end
22
- def each_header; end
23
- end
24
- params[:request] ||= o
25
- end
26
-
27
- super
28
- end
29
- end
30
- end
31
- end
@@ -1,36 +0,0 @@
1
- class Mechanize
2
- class Chain
3
- class ResponseBodyParser
4
- include Mechanize::Handler
5
-
6
- def initialize(pluggable_parser, watch_for_set)
7
- @pluggable_parser = pluggable_parser
8
- @watch_for_set = watch_for_set
9
- end
10
-
11
- def handle(ctx, params)
12
- response = params[:response]
13
- response_body = params[:response_body]
14
- uri = params[:uri]
15
-
16
- content_type = nil
17
- unless response['Content-Type'].nil?
18
- data = response['Content-Type'].match(/^([^;]*)/)
19
- content_type = data[1].downcase.split(',')[0] unless data.nil?
20
- end
21
-
22
- # Find our pluggable parser
23
- parser_klass = @pluggable_parser.parser(content_type)
24
- params[:page] = parser_klass.new(uri, response, response_body,
25
- response.code) { |parser|
26
- parser.mech = params[:agent] if parser.respond_to? :mech=
27
- if parser.respond_to?(:watch_for_set=) && @watch_for_set
28
- parser.watch_for_set = @watch_for_set
29
- end
30
- }
31
-
32
- super
33
- end
34
- end
35
- end
36
- end
@@ -1,34 +0,0 @@
1
- class Mechanize
2
- class Chain
3
- class ResponseHeaderHandler
4
- include Mechanize::Handler
5
-
6
- def initialize(cookie_jar)
7
- @cookie_jar = cookie_jar
8
- end
9
-
10
- def handle(ctx, params)
11
- response = params[:response]
12
- uri = params[:uri]
13
- page = params[:page]
14
-
15
- if page.is_a?(Page) && page.body =~ /Set-Cookie/n
16
- page.search('//head/meta[@http-equiv="Set-Cookie"]').each do |meta|
17
- Cookie::parse(uri, meta['content']) { |c|
18
- Mechanize.log.debug("saved cookie: #{c}") if Mechanize.log
19
- @cookie_jar.add(uri, c)
20
- }
21
- end
22
- end
23
-
24
- (response.get_fields('Set-Cookie')||[]).each do |cookie|
25
- Cookie::parse(uri, cookie) { |c|
26
- Mechanize.log.debug("saved cookie: #{c}") if Mechanize.log
27
- @cookie_jar.add(uri, c)
28
- }
29
- end
30
- super
31
- end
32
- end
33
- end
34
- end
@@ -1,39 +0,0 @@
1
- class Mechanize
2
- class Chain
3
- class ResponseReader
4
- include Mechanize::Handler
5
-
6
- def initialize(response)
7
- @response = response
8
- end
9
-
10
- def handle(ctx, params)
11
- params[:response] = @response
12
- body = StringIO.new
13
- total = 0
14
- @response.read_body { |part|
15
- total += part.length
16
- body.write(part)
17
- Mechanize.log.debug("Read #{total} bytes") if Mechanize.log
18
- }
19
- body.rewind
20
-
21
- res_klass = Net::HTTPResponse::CODE_TO_OBJ[@response.code.to_s]
22
- raise ResponseCodeError.new(@response) unless res_klass
23
-
24
- # Net::HTTP ignores EOFError if Content-length is given, so we emulate it here.
25
- unless res_klass <= Net::HTTPRedirection
26
- raise EOFError if (!params[:request].is_a?(Net::HTTP::Head)) && @response.content_length() && @response.content_length() != total
27
- end
28
-
29
- @response.each_header { |k,v|
30
- Mechanize.log.debug("response-header: #{ k } => #{ v }")
31
- } if Mechanize.log
32
-
33
- params[:response_body] = body
34
- params[:res_klass] = res_klass
35
- super
36
- end
37
- end
38
- end
39
- end