mechanize 0.7.8 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (96) hide show
  1. data/History.txt +14 -0
  2. data/Manifest.txt +30 -5
  3. data/README.txt +5 -5
  4. data/Rakefile +6 -0
  5. data/{eg → examples}/flickr_upload.rb +0 -0
  6. data/{eg → examples}/mech-dump.rb +0 -0
  7. data/{eg → examples}/proxy_req.rb +0 -0
  8. data/{eg → examples}/rubyforge.rb +0 -0
  9. data/{eg → examples}/spider.rb +0 -0
  10. data/lib/www/mechanize.rb +183 -404
  11. data/lib/www/mechanize/chain.rb +34 -0
  12. data/lib/www/mechanize/chain/auth_headers.rb +79 -0
  13. data/lib/www/mechanize/chain/body_decoding_handler.rb +43 -0
  14. data/lib/www/mechanize/chain/connection_resolver.rb +78 -0
  15. data/lib/www/mechanize/chain/custom_headers.rb +23 -0
  16. data/lib/www/mechanize/chain/handler.rb +9 -0
  17. data/lib/www/mechanize/chain/header_resolver.rb +47 -0
  18. data/lib/www/mechanize/chain/parameter_resolver.rb +23 -0
  19. data/lib/www/mechanize/chain/post_connect_hook.rb +0 -0
  20. data/lib/www/mechanize/chain/pre_connect_hook.rb +22 -0
  21. data/lib/www/mechanize/chain/request_resolver.rb +28 -0
  22. data/lib/www/mechanize/chain/response_body_parser.rb +40 -0
  23. data/lib/www/mechanize/chain/response_header_handler.rb +50 -0
  24. data/lib/www/mechanize/chain/response_reader.rb +41 -0
  25. data/lib/www/mechanize/chain/ssl_resolver.rb +36 -0
  26. data/lib/www/mechanize/chain/uri_resolver.rb +56 -0
  27. data/lib/www/mechanize/cookie.rb +1 -1
  28. data/lib/www/mechanize/file_response.rb +60 -0
  29. data/lib/www/mechanize/form.rb +12 -4
  30. data/lib/www/mechanize/form/field.rb +2 -2
  31. data/lib/www/mechanize/form/file_upload.rb +1 -1
  32. data/lib/www/mechanize/form/option.rb +1 -1
  33. data/lib/www/mechanize/list.rb +4 -0
  34. data/lib/www/mechanize/page.rb +20 -10
  35. data/lib/www/mechanize/util.rb +29 -0
  36. data/mechanize.gemspec +4 -4
  37. data/test/chain/test_argument_validator.rb +14 -0
  38. data/test/chain/test_custom_headers.rb +18 -0
  39. data/test/chain/test_parameter_resolver.rb +35 -0
  40. data/test/chain/test_request_resolver.rb +29 -0
  41. data/test/chain/test_response_reader.rb +24 -0
  42. data/test/helper.rb +3 -1
  43. data/test/servlets.rb +43 -0
  44. data/test/test_authenticate.rb +13 -12
  45. data/test/test_bad_links.rb +1 -1
  46. data/test/test_blank_form.rb +1 -1
  47. data/test/test_checkboxes.rb +1 -1
  48. data/test/test_content_type.rb +1 -1
  49. data/test/test_cookie_class.rb +1 -1
  50. data/test/test_cookie_jar.rb +1 -1
  51. data/test/test_cookies.rb +1 -1
  52. data/test/test_encoded_links.rb +1 -1
  53. data/test/test_errors.rb +1 -1
  54. data/test/test_follow_meta.rb +1 -1
  55. data/test/test_form_action.rb +1 -1
  56. data/test/test_form_as_hash.rb +1 -1
  57. data/test/test_form_button.rb +22 -17
  58. data/test/test_form_no_inputname.rb +1 -1
  59. data/test/test_forms.rb +2 -1
  60. data/test/test_frames.rb +1 -1
  61. data/test/test_get_headers.rb +1 -1
  62. data/test/test_gzipping.rb +1 -1
  63. data/test/test_hash_api.rb +17 -14
  64. data/test/test_history.rb +1 -1
  65. data/test/test_history_added.rb +1 -1
  66. data/test/test_html_unscape_forms.rb +1 -1
  67. data/test/test_if_modified_since.rb +1 -1
  68. data/test/test_keep_alive.rb +1 -1
  69. data/test/test_links.rb +1 -1
  70. data/test/test_mech.rb +18 -11
  71. data/test/test_mechanize_file.rb +1 -1
  72. data/test/test_multi_select.rb +1 -1
  73. data/test/test_no_attributes.rb +1 -1
  74. data/test/test_option.rb +2 -1
  75. data/test/test_page.rb +1 -1
  76. data/test/test_pluggable_parser.rb +1 -1
  77. data/test/test_post_form.rb +1 -1
  78. data/test/test_pretty_print.rb +1 -1
  79. data/test/test_radiobutton.rb +1 -1
  80. data/test/test_redirect_limit_reached.rb +1 -1
  81. data/test/test_referer.rb +1 -1
  82. data/test/test_relative_links.rb +1 -1
  83. data/test/test_response_code.rb +7 -1
  84. data/test/test_save_file.rb +1 -1
  85. data/test/test_scheme.rb +44 -0
  86. data/test/test_select.rb +1 -1
  87. data/test/test_select_all.rb +1 -1
  88. data/test/test_select_none.rb +1 -1
  89. data/test/test_select_noopts.rb +1 -1
  90. data/test/test_set_fields.rb +1 -1
  91. data/test/test_ssl_server.rb +1 -1
  92. data/test/test_subclass.rb +4 -11
  93. data/test/test_textarea.rb +1 -1
  94. data/test/test_upload.rb +1 -1
  95. data/test/test_verbs.rb +22 -0
  96. metadata +39 -7
@@ -0,0 +1,34 @@
1
+ require 'www/mechanize/chain/handler'
2
+ require 'www/mechanize/chain/uri_resolver'
3
+ require 'www/mechanize/chain/parameter_resolver'
4
+ require 'www/mechanize/chain/request_resolver'
5
+ require 'www/mechanize/chain/custom_headers'
6
+ require 'www/mechanize/chain/connection_resolver'
7
+ require 'www/mechanize/chain/ssl_resolver'
8
+ require 'www/mechanize/chain/pre_connect_hook'
9
+ require 'www/mechanize/chain/auth_headers'
10
+ require 'www/mechanize/chain/header_resolver'
11
+ require 'www/mechanize/chain/response_body_parser'
12
+ require 'www/mechanize/chain/response_header_handler'
13
+ require 'www/mechanize/chain/response_reader'
14
+ require 'www/mechanize/chain/body_decoding_handler'
15
+
16
+ module WWW
17
+ class Mechanize
18
+ class Chain
19
+ def initialize(list)
20
+ @list = list
21
+ @list.each { |l| l.chain = self }
22
+ end
23
+
24
+ def handle(request)
25
+ @list.first.handle(self, request)
26
+ end
27
+
28
+ def pass(obj, request)
29
+ next_link = @list[@list.index(obj) + 1]
30
+ next_link.handle(self, request) if next_link
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,79 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class AuthHeaders
5
+ include WWW::Handler
6
+
7
+ @@nonce_count = -1
8
+ CNONCE = Digest::MD5.hexdigest("%x" % (Time.now.to_i + rand(65535)))
9
+
10
+ def initialize(auth_hash, user, password, digest)
11
+ @auth_hash = auth_hash
12
+ @user = user
13
+ @password = password
14
+ @digest = digest
15
+ end
16
+
17
+ def handle(ctx, params)
18
+ uri = params[:uri]
19
+ request = params[:request]
20
+
21
+ if( @auth_hash[uri.host] )
22
+ case @auth_hash[uri.host]
23
+ when :basic
24
+ request.basic_auth(@user, @password)
25
+ when :iis_digest
26
+ digest_response = self.gen_auth_header(uri,request, @digest, true)
27
+ request.add_field('Authorization', digest_response)
28
+ when :digest
29
+ if @digest
30
+ digest_response = self.gen_auth_header(uri,request, @digest)
31
+ request.add_field('Authorization', digest_response)
32
+ end
33
+ end
34
+ end
35
+ super
36
+ end
37
+
38
+ def gen_auth_header(uri, request, auth_header, is_IIS = false)
39
+ @@nonce_count += 1
40
+
41
+ auth_header =~ /^(\w+) (.*)/
42
+
43
+ params = {}
44
+ $2.gsub(/(\w+)=("[^"]*"|[^,]*)/) {
45
+ params[$1] = $2.gsub(/^"/, '').gsub(/"$/, '')
46
+ }
47
+
48
+ a_1 = "#{@user}:#{params['realm']}:#{@password}"
49
+ a_2 = "#{request.method}:#{uri.path}"
50
+ request_digest = ''
51
+ request_digest << Digest::MD5.hexdigest(a_1)
52
+ request_digest << ':' << params['nonce']
53
+ request_digest << ':' << ('%08x' % @@nonce_count)
54
+ request_digest << ':' << CNONCE
55
+ request_digest << ':' << params['qop']
56
+ request_digest << ':' << Digest::MD5.hexdigest(a_2)
57
+
58
+ header = ''
59
+ header << "Digest username=\"#{@user}\", "
60
+ header << "realm=\"#{params['realm']}\", "
61
+ if is_IIS then
62
+ header << "qop=\"#{params['qop']}\", "
63
+ else
64
+ header << "qop=#{params['qop']}, "
65
+ end
66
+ header << "uri=\"#{uri.path}\", "
67
+ header << "algorithm=\"#{params['algorithm']}\", "
68
+ header << "opaque=\"#{params['opaque']}\", " if params['opaque']
69
+ header << "nonce=\"#{params['nonce']}\", "
70
+ header << "nc=#{'%08x' % @@nonce_count}, "
71
+ header << "cnonce=\"#{CNONCE}\", "
72
+ header << "response=\"#{Digest::MD5.hexdigest(request_digest)}\""
73
+
74
+ return header
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,43 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class BodyDecodingHandler
5
+ include WWW::Handler
6
+
7
+ def handle(ctx, options)
8
+ body = options[:response_body]
9
+ response = options[:response]
10
+
11
+ options[:response_body] =
12
+ if encoding = response['Content-Encoding']
13
+ case encoding.downcase
14
+ when 'gzip'
15
+ Mechanize.log.debug('gunzip body') if Mechanize.log
16
+ if response['Content-Length'].to_i > 0 || body.length > 0
17
+ begin
18
+ Zlib::GzipReader.new(body).read
19
+ rescue Zlib::BufError, Zlib::GzipFile::Error
20
+ if Mechanize.log
21
+ Mechanize.log.error('Caught a Zlib::BufError')
22
+ end
23
+ body.rewind
24
+ body.read(10)
25
+ Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.read)
26
+ end
27
+ else
28
+ ''
29
+ end
30
+ when 'x-gzip'
31
+ body.read
32
+ else
33
+ raise 'Unsupported content encoding'
34
+ end
35
+ else
36
+ body.read
37
+ end
38
+ super
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,78 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class ConnectionResolver
5
+ include WWW::Handler
6
+
7
+ def initialize( connection_cache,
8
+ keep_alive,
9
+ proxy_addr,
10
+ proxy_port,
11
+ proxy_user,
12
+ proxy_pass )
13
+
14
+ @connection_cache = connection_cache
15
+ @keep_alive = keep_alive
16
+ @proxy_addr = proxy_addr
17
+ @proxy_port = proxy_port
18
+ @proxy_user = proxy_user
19
+ @proxy_pass = proxy_pass
20
+ end
21
+
22
+ def handle(ctx, params)
23
+ uri = params[:uri]
24
+ http_obj = nil
25
+
26
+ case uri.scheme.downcase
27
+ when 'http', 'https'
28
+ cache_obj = (@connection_cache["#{uri.host}:#{uri.port}"] ||= {
29
+ :connection => nil,
30
+ :keep_alive_options => {},
31
+ })
32
+ http_obj = cache_obj[:connection]
33
+ if http_obj.nil? || ! http_obj.started?
34
+ http_obj = cache_obj[:connection] =
35
+ Net::HTTP.new( uri.host,
36
+ uri.port,
37
+ @proxy_addr,
38
+ @proxy_port,
39
+ @proxy_user,
40
+ @proxy_pass
41
+ )
42
+ cache_obj[:keep_alive_options] = {}
43
+ end
44
+
45
+ # If we're keeping connections alive and the last request time is too
46
+ # long ago, stop the connection. Or, if the max requests left is 1,
47
+ # reset the connection.
48
+ if @keep_alive && http_obj.started?
49
+ opts = cache_obj[:keep_alive_options]
50
+ if((opts[:timeout] &&
51
+ Time.now.to_i - cache_obj[:last_request_time] > opts[:timeout].to_i) ||
52
+ opts[:max] && opts[:max].to_i == 1)
53
+
54
+ log.debug('Finishing stale connection') if log
55
+ http_obj.finish
56
+
57
+ end
58
+ end
59
+
60
+ cache_obj[:last_request_time] = Time.now.to_i
61
+ when 'file'
62
+ http_obj = Object.new
63
+ class << http_obj
64
+ def started?; true; end
65
+ def request(request, *args, &block)
66
+ response = FileResponse.new(request.uri.path)
67
+ yield response
68
+ end
69
+ end
70
+ end
71
+
72
+ params[:connection] = http_obj
73
+ super
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,23 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class CustomHeaders
5
+ include WWW::Handler
6
+
7
+ def handle(ctx, params)
8
+ request = params[:request]
9
+ params[:headers].each do |k,v|
10
+ case k
11
+ when :etag then request.add_field("ETag", v)
12
+ when :if_modified_since then request.add_field("If-Modified-Since", v)
13
+ else
14
+ raise ArgumentError.new("unknown header symbol #{k}") if k.is_a? Symbol
15
+ request.add_field(k,v)
16
+ end
17
+ end
18
+ super
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,9 @@
1
+ module WWW
2
+ module Handler
3
+ attr_accessor :chain
4
+
5
+ def handle(ctx, request)
6
+ chain.pass(self, request)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,47 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class HeaderResolver
5
+ include WWW::Handler
6
+ def initialize(keep_alive, keep_alive_time, cookie_jar, user_agent)
7
+ @keep_alive = keep_alive
8
+ @keep_alive_time = keep_alive_time
9
+ @cookie_jar = cookie_jar
10
+ @user_agent = user_agent
11
+ end
12
+
13
+ def handle(ctx, params)
14
+ uri = params[:uri]
15
+ referer = params[:referer]
16
+ request = params[:request]
17
+
18
+ if @keep_alive
19
+ request['Connection'] = 'keep-alive'
20
+ request['Keep-Alive'] = @keep_alive_time.to_s
21
+ else
22
+ request['Connection'] = 'close'
23
+ end
24
+ request['Accept-Encoding'] = 'gzip,identity'
25
+ request['Accept-Language'] = 'en-us,en;q=0.5'
26
+ request['Host'] = uri.host
27
+ request['Accept-Charset'] = 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'
28
+
29
+ unless @cookie_jar.empty?(uri)
30
+ cookies = @cookie_jar.cookies(uri)
31
+ cookie = cookies.length > 0 ? cookies.join("; ") : nil
32
+ request.add_field('Cookie', cookie)
33
+ end
34
+
35
+ # Add Referer header to request
36
+ if referer && referer.uri
37
+ request['Referer'] = referer.uri.to_s
38
+ end
39
+
40
+ # Add User-Agent header to request
41
+ request['User-Agent'] = @user_agent if @user_agent
42
+ super
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,23 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class ParameterResolver
5
+ include WWW::Handler
6
+
7
+ def handle(ctx, params)
8
+ parameters = params[:params]
9
+ uri = params[:uri]
10
+ if params[:verb] == :get
11
+ if parameters.length > 0
12
+ uri.query ||= ''
13
+ uri.query << '&' if uri.query.length > 0
14
+ uri.query << Util.build_query_string(parameters)
15
+ end
16
+ params[:params] = []
17
+ end
18
+ super
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
File without changes
@@ -0,0 +1,22 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class PreConnectHook
5
+ include WWW::Handler
6
+
7
+ attr_accessor :hooks
8
+ def initialize
9
+ @hooks = []
10
+ end
11
+
12
+ def handle(ctx, params)
13
+ @hooks.each { |hook| hook.call(params) }
14
+ super
15
+ end
16
+ end
17
+
18
+ class PostConnectHook < PreConnectHook
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,28 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class RequestResolver
5
+ include WWW::Handler
6
+
7
+ def handle(ctx, params)
8
+ uri = params[:uri]
9
+ if %w{ http https }.include?(uri.scheme.downcase)
10
+ klass = Net::HTTP.const_get(params[:verb].to_s.capitalize)
11
+ params[:request] ||= klass.new(uri.request_uri)
12
+ end
13
+
14
+ if %w{ file }.include?(uri.scheme.downcase)
15
+ o = Struct.new(:uri).new(uri)
16
+ class << o
17
+ def add_field(*args); end
18
+ alias :[]= :add_field
19
+ end
20
+ params[:request] ||= o
21
+ end
22
+
23
+ super
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,40 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class ResponseBodyParser
5
+ include WWW::Handler
6
+
7
+ def initialize(pluggable_parser, watch_for_set)
8
+ @pluggable_parser = pluggable_parser
9
+ @watch_for_set = watch_for_set
10
+ end
11
+
12
+ def handle(ctx, params)
13
+ response = params[:response]
14
+ response_body = params[:response_body]
15
+ uri = params[:uri]
16
+
17
+ content_type = nil
18
+ unless response['Content-Type'].nil?
19
+ data = response['Content-Type'].match(/^([^;]*)/)
20
+ content_type = data[1].downcase unless data.nil?
21
+ end
22
+
23
+ # Find our pluggable parser
24
+ params[:page] = @pluggable_parser.parser(content_type).new(
25
+ uri,
26
+ response,
27
+ response_body,
28
+ response.code
29
+ ) { |parser|
30
+ parser.mech = params[:agent] if parser.respond_to? :mech=
31
+ if parser.respond_to?(:watch_for_set=) && @watch_for_set
32
+ parser.watch_for_set = @watch_for_set
33
+ end
34
+ }
35
+ super
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end