tenderlove-mechanize 0.9.3.20090623142847 → 0.9.3.20090911221705

Sign up to get free protection for your applications and to get access to all the features.
Files changed (165) hide show
  1. data/Manifest.txt +55 -48
  2. data/Rakefile +12 -22
  3. data/lib/mechanize.rb +618 -4
  4. data/lib/mechanize/chain.rb +33 -0
  5. data/lib/mechanize/chain/auth_headers.rb +78 -0
  6. data/lib/mechanize/chain/body_decoding_handler.rb +46 -0
  7. data/lib/mechanize/chain/connection_resolver.rb +76 -0
  8. data/lib/mechanize/chain/custom_headers.rb +21 -0
  9. data/lib/{www/mechanize → mechanize}/chain/handler.rb +1 -1
  10. data/lib/mechanize/chain/header_resolver.rb +51 -0
  11. data/lib/mechanize/chain/parameter_resolver.rb +22 -0
  12. data/lib/{www/mechanize → mechanize}/chain/post_connect_hook.rb +0 -0
  13. data/lib/mechanize/chain/pre_connect_hook.rb +20 -0
  14. data/lib/mechanize/chain/request_resolver.rb +30 -0
  15. data/lib/mechanize/chain/response_body_parser.rb +38 -0
  16. data/lib/mechanize/chain/response_header_handler.rb +48 -0
  17. data/lib/mechanize/chain/response_reader.rb +39 -0
  18. data/lib/mechanize/chain/ssl_resolver.rb +40 -0
  19. data/lib/mechanize/chain/uri_resolver.rb +75 -0
  20. data/lib/mechanize/content_type_error.rb +14 -0
  21. data/lib/mechanize/cookie.rb +70 -0
  22. data/lib/mechanize/cookie_jar.rb +188 -0
  23. data/lib/mechanize/file.rb +71 -0
  24. data/lib/mechanize/file_response.rb +60 -0
  25. data/lib/mechanize/file_saver.rb +37 -0
  26. data/lib/mechanize/form.rb +378 -0
  27. data/lib/mechanize/form/button.rb +9 -0
  28. data/lib/mechanize/form/check_box.rb +11 -0
  29. data/lib/mechanize/form/field.rb +30 -0
  30. data/lib/mechanize/form/file_upload.rb +22 -0
  31. data/lib/mechanize/form/image_button.rb +21 -0
  32. data/lib/mechanize/form/multi_select_list.rb +67 -0
  33. data/lib/mechanize/form/option.rb +49 -0
  34. data/lib/mechanize/form/radio_button.rb +49 -0
  35. data/lib/mechanize/form/select_list.rb +43 -0
  36. data/lib/mechanize/headers.rb +11 -0
  37. data/lib/mechanize/history.rb +65 -0
  38. data/lib/mechanize/inspect.rb +88 -0
  39. data/lib/{www/mechanize → mechanize}/monkey_patch.rb +4 -6
  40. data/lib/mechanize/page.rb +206 -0
  41. data/lib/mechanize/page/base.rb +8 -0
  42. data/lib/mechanize/page/frame.rb +20 -0
  43. data/lib/mechanize/page/image.rb +26 -0
  44. data/lib/mechanize/page/label.rb +20 -0
  45. data/lib/mechanize/page/link.rb +48 -0
  46. data/lib/mechanize/page/meta.rb +50 -0
  47. data/lib/mechanize/pluggable_parsers.rb +101 -0
  48. data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
  49. data/lib/mechanize/redirect_not_get_or_head_error.rb +18 -0
  50. data/lib/mechanize/response_code_error.rb +22 -0
  51. data/lib/mechanize/unsupported_scheme_error.rb +8 -0
  52. data/lib/mechanize/util.rb +67 -0
  53. data/mechanize.gemspec +8 -8
  54. data/test/chain/test_argument_validator.rb +2 -2
  55. data/test/chain/test_auth_headers.rb +2 -2
  56. data/test/chain/test_custom_headers.rb +2 -2
  57. data/test/chain/test_header_resolver.rb +3 -3
  58. data/test/chain/test_parameter_resolver.rb +4 -4
  59. data/test/chain/test_request_resolver.rb +4 -4
  60. data/test/chain/test_response_reader.rb +3 -3
  61. data/test/helper.rb +1 -1
  62. data/test/htdocs/tc_bad_charset.html +9 -0
  63. data/test/htdocs/tc_charset.html +6 -0
  64. data/test/htdocs/test_bad_encoding.html +52 -0
  65. data/test/test_authenticate.rb +3 -3
  66. data/test/test_bad_links.rb +1 -1
  67. data/test/test_blank_form.rb +1 -1
  68. data/test/test_checkboxes.rb +1 -1
  69. data/test/test_content_type.rb +2 -2
  70. data/test/test_cookie_class.rb +12 -12
  71. data/test/test_cookie_jar.rb +13 -13
  72. data/test/test_cookies.rb +1 -1
  73. data/test/test_encoded_links.rb +1 -1
  74. data/test/test_errors.rb +2 -2
  75. data/test/test_follow_meta.rb +3 -3
  76. data/test/test_form_action.rb +1 -1
  77. data/test/test_form_as_hash.rb +1 -1
  78. data/test/test_form_button.rb +2 -2
  79. data/test/test_form_no_inputname.rb +1 -1
  80. data/test/test_forms.rb +1 -1
  81. data/test/test_frames.rb +1 -1
  82. data/test/test_get_headers.rb +1 -1
  83. data/test/test_gzipping.rb +2 -2
  84. data/test/test_hash_api.rb +1 -1
  85. data/test/test_history.rb +7 -7
  86. data/test/test_history_added.rb +1 -1
  87. data/test/test_html_unscape_forms.rb +7 -7
  88. data/test/test_if_modified_since.rb +1 -1
  89. data/test/test_keep_alive.rb +1 -1
  90. data/test/test_links.rb +2 -2
  91. data/test/test_mech.rb +2 -2
  92. data/test/test_mechanize_file.rb +7 -7
  93. data/test/test_meta.rb +2 -2
  94. data/test/test_multi_select.rb +1 -1
  95. data/test/test_no_attributes.rb +1 -1
  96. data/test/test_option.rb +1 -1
  97. data/test/test_page.rb +3 -3
  98. data/test/test_pluggable_parser.rb +14 -14
  99. data/test/test_post_form.rb +1 -1
  100. data/test/test_pretty_print.rb +2 -2
  101. data/test/test_radiobutton.rb +1 -1
  102. data/test/test_redirect_limit_reached.rb +1 -3
  103. data/test/test_redirect_verb_handling.rb +1 -3
  104. data/test/test_referer.rb +1 -1
  105. data/test/test_relative_links.rb +1 -1
  106. data/test/test_request.rb +1 -1
  107. data/test/test_response_code.rb +3 -3
  108. data/test/test_save_file.rb +3 -3
  109. data/test/test_scheme.rb +3 -3
  110. data/test/test_select.rb +2 -2
  111. data/test/test_select_all.rb +1 -1
  112. data/test/test_select_none.rb +1 -1
  113. data/test/test_select_noopts.rb +1 -1
  114. data/test/test_set_fields.rb +1 -1
  115. data/test/test_ssl_server.rb +1 -1
  116. data/test/test_subclass.rb +1 -1
  117. data/test/test_textarea.rb +1 -1
  118. data/test/test_upload.rb +1 -1
  119. data/test/test_verbs.rb +1 -1
  120. metadata +61 -56
  121. data/lib/www/mechanize.rb +0 -619
  122. data/lib/www/mechanize/chain.rb +0 -34
  123. data/lib/www/mechanize/chain/auth_headers.rb +0 -80
  124. data/lib/www/mechanize/chain/body_decoding_handler.rb +0 -48
  125. data/lib/www/mechanize/chain/connection_resolver.rb +0 -78
  126. data/lib/www/mechanize/chain/custom_headers.rb +0 -23
  127. data/lib/www/mechanize/chain/header_resolver.rb +0 -53
  128. data/lib/www/mechanize/chain/parameter_resolver.rb +0 -24
  129. data/lib/www/mechanize/chain/pre_connect_hook.rb +0 -22
  130. data/lib/www/mechanize/chain/request_resolver.rb +0 -32
  131. data/lib/www/mechanize/chain/response_body_parser.rb +0 -40
  132. data/lib/www/mechanize/chain/response_header_handler.rb +0 -50
  133. data/lib/www/mechanize/chain/response_reader.rb +0 -41
  134. data/lib/www/mechanize/chain/ssl_resolver.rb +0 -42
  135. data/lib/www/mechanize/chain/uri_resolver.rb +0 -77
  136. data/lib/www/mechanize/content_type_error.rb +0 -16
  137. data/lib/www/mechanize/cookie.rb +0 -72
  138. data/lib/www/mechanize/cookie_jar.rb +0 -191
  139. data/lib/www/mechanize/file.rb +0 -73
  140. data/lib/www/mechanize/file_response.rb +0 -62
  141. data/lib/www/mechanize/file_saver.rb +0 -39
  142. data/lib/www/mechanize/form.rb +0 -360
  143. data/lib/www/mechanize/form/button.rb +0 -8
  144. data/lib/www/mechanize/form/check_box.rb +0 -13
  145. data/lib/www/mechanize/form/field.rb +0 -28
  146. data/lib/www/mechanize/form/file_upload.rb +0 -24
  147. data/lib/www/mechanize/form/image_button.rb +0 -23
  148. data/lib/www/mechanize/form/multi_select_list.rb +0 -69
  149. data/lib/www/mechanize/form/option.rb +0 -51
  150. data/lib/www/mechanize/form/radio_button.rb +0 -38
  151. data/lib/www/mechanize/form/select_list.rb +0 -45
  152. data/lib/www/mechanize/headers.rb +0 -12
  153. data/lib/www/mechanize/history.rb +0 -67
  154. data/lib/www/mechanize/inspect.rb +0 -90
  155. data/lib/www/mechanize/page.rb +0 -181
  156. data/lib/www/mechanize/page/base.rb +0 -10
  157. data/lib/www/mechanize/page/frame.rb +0 -22
  158. data/lib/www/mechanize/page/link.rb +0 -50
  159. data/lib/www/mechanize/page/meta.rb +0 -51
  160. data/lib/www/mechanize/pluggable_parsers.rb +0 -103
  161. data/lib/www/mechanize/redirect_limit_reached_error.rb +0 -18
  162. data/lib/www/mechanize/redirect_not_get_or_head_error.rb +0 -20
  163. data/lib/www/mechanize/response_code_error.rb +0 -25
  164. data/lib/www/mechanize/unsupported_scheme_error.rb +0 -10
  165. data/lib/www/mechanize/util.rb +0 -76
@@ -1,34 +0,0 @@
1
- require 'www/mechanize/chain/handler'
2
- require 'www/mechanize/chain/uri_resolver'
3
- require 'www/mechanize/chain/parameter_resolver'
4
- require 'www/mechanize/chain/request_resolver'
5
- require 'www/mechanize/chain/custom_headers'
6
- require 'www/mechanize/chain/connection_resolver'
7
- require 'www/mechanize/chain/ssl_resolver'
8
- require 'www/mechanize/chain/pre_connect_hook'
9
- require 'www/mechanize/chain/auth_headers'
10
- require 'www/mechanize/chain/header_resolver'
11
- require 'www/mechanize/chain/response_body_parser'
12
- require 'www/mechanize/chain/response_header_handler'
13
- require 'www/mechanize/chain/response_reader'
14
- require 'www/mechanize/chain/body_decoding_handler'
15
-
16
- module WWW
17
- class Mechanize
18
- class Chain
19
- def initialize(list)
20
- @list = list
21
- @list.each { |l| l.chain = self }
22
- end
23
-
24
- def handle(request)
25
- @list.first.handle(self, request)
26
- end
27
-
28
- def pass(obj, request)
29
- next_link = @list[@list.index(obj) + 1]
30
- next_link.handle(self, request) if next_link
31
- end
32
- end
33
- end
34
- end
@@ -1,80 +0,0 @@
1
- module WWW
2
- class Mechanize
3
- class Chain
4
- class AuthHeaders
5
- include WWW::Handler
6
-
7
- @@nonce_count = Hash.new(0)
8
- CNONCE = Digest::MD5.hexdigest("%x" % (Time.now.to_i + rand(65535)))
9
-
10
- def initialize(auth_hash, user, password, digest)
11
- @auth_hash = auth_hash
12
- @user = user
13
- @password = password
14
- @digest = digest
15
- end
16
-
17
- def handle(ctx, params)
18
- uri = params[:uri]
19
- request = params[:request]
20
-
21
- if( @auth_hash[uri.host] )
22
- case @auth_hash[uri.host]
23
- when :basic
24
- request.basic_auth(@user, @password)
25
- when :iis_digest
26
- digest_response = self.gen_auth_header(uri,request, @digest, true)
27
- request['Authorization'] = digest_response
28
- when :digest
29
- if @digest
30
- digest_response = self.gen_auth_header(uri,request, @digest)
31
- request['Authorization'] = digest_response
32
- end
33
- end
34
- end
35
- super
36
- end
37
-
38
- def gen_auth_header(uri, request, auth_header, is_IIS = false)
39
- auth_header =~ /^(\w+) (.*)/
40
-
41
- params = {}
42
- $2.gsub(/(\w+)=("[^"]*"|[^,]*)/) {
43
- params[$1] = $2.gsub(/^"/, '').gsub(/"$/, '')
44
- }
45
-
46
- @@nonce_count[params['nonce']] += 1
47
-
48
- a_1 = "#{@user}:#{params['realm']}:#{@password}"
49
- a_2 = "#{request.method}:#{uri.path}"
50
- request_digest = ''
51
- request_digest << Digest::MD5.hexdigest(a_1)
52
- request_digest << ':' << params['nonce']
53
- request_digest << ':' << ('%08x' % @@nonce_count[params['nonce']])
54
- request_digest << ':' << CNONCE
55
- request_digest << ':' << params['qop']
56
- request_digest << ':' << Digest::MD5.hexdigest(a_2)
57
-
58
- header = ''
59
- header << "Digest username=\"#{@user}\", "
60
- if is_IIS then
61
- header << "qop=\"#{params['qop']}\", "
62
- else
63
- header << "qop=#{params['qop']}, "
64
- end
65
- header << "uri=\"#{uri.path}\", "
66
- header << %w{ algorithm opaque nonce realm }.map { |field|
67
- next unless params[field]
68
- "#{field}=\"#{params[field]}\""
69
- }.compact.join(', ')
70
-
71
- header << ", nc=#{'%08x' % @@nonce_count[params['nonce']]}, "
72
- header << "cnonce=\"#{CNONCE}\", "
73
- header << "response=\"#{Digest::MD5.hexdigest(request_digest)}\""
74
-
75
- return header
76
- end
77
- end
78
- end
79
- end
80
- end
@@ -1,48 +0,0 @@
1
- module WWW
2
- class Mechanize
3
- class Chain
4
- class BodyDecodingHandler
5
- include WWW::Handler
6
-
7
- def handle(ctx, options)
8
- body = options[:response_body]
9
- response = options[:response]
10
-
11
- options[:response_body] =
12
- if encoding = response['Content-Encoding']
13
- case encoding.downcase
14
- when 'gzip'
15
- Mechanize.log.debug('gunzip body') if Mechanize.log
16
- if response['Content-Length'].to_i > 0 || body.length > 0
17
- begin
18
- Zlib::GzipReader.new(body).read
19
- rescue Zlib::BufError, Zlib::GzipFile::Error
20
- if Mechanize.log
21
- Mechanize.log.error('Caught a Zlib::BufError')
22
- end
23
- body.rewind
24
- body.read(10)
25
- Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.read)
26
- rescue Zlib::DataError
27
- if Mechanize.log
28
- Mechanize.log.error("Caught a Zlib::DataError, unable to decode page: #{$!.to_s}")
29
- end
30
- ''
31
- end
32
- else
33
- ''
34
- end
35
- when 'x-gzip'
36
- body.read
37
- else
38
- raise 'Unsupported content encoding'
39
- end
40
- else
41
- body.read
42
- end
43
- super
44
- end
45
- end
46
- end
47
- end
48
- end
@@ -1,78 +0,0 @@
1
- module WWW
2
- class Mechanize
3
- class Chain
4
- class ConnectionResolver
5
- include WWW::Handler
6
-
7
- def initialize( connection_cache,
8
- keep_alive,
9
- proxy_addr,
10
- proxy_port,
11
- proxy_user,
12
- proxy_pass )
13
-
14
- @connection_cache = connection_cache
15
- @keep_alive = keep_alive
16
- @proxy_addr = proxy_addr
17
- @proxy_port = proxy_port
18
- @proxy_user = proxy_user
19
- @proxy_pass = proxy_pass
20
- end
21
-
22
- def handle(ctx, params)
23
- uri = params[:uri]
24
- http_obj = nil
25
-
26
- case uri.scheme.downcase
27
- when 'http', 'https'
28
- cache_obj = (@connection_cache["#{uri.host}:#{uri.port}"] ||= {
29
- :connection => nil,
30
- :keep_alive_options => {},
31
- })
32
- http_obj = cache_obj[:connection]
33
- if http_obj.nil? || ! http_obj.started?
34
- http_obj = cache_obj[:connection] =
35
- Net::HTTP.new( uri.host,
36
- uri.port,
37
- @proxy_addr,
38
- @proxy_port,
39
- @proxy_user,
40
- @proxy_pass
41
- )
42
- cache_obj[:keep_alive_options] = {}
43
- end
44
-
45
- # If we're keeping connections alive and the last request time is too
46
- # long ago, stop the connection. Or, if the max requests left is 1,
47
- # reset the connection.
48
- if @keep_alive && http_obj.started?
49
- opts = cache_obj[:keep_alive_options]
50
- if((opts[:timeout] &&
51
- Time.now.to_i - cache_obj[:last_request_time] > opts[:timeout].to_i) ||
52
- opts[:max] && opts[:max].to_i == 1)
53
-
54
- Mechanize.log.debug('Finishing stale connection') if Mechanize.log
55
- http_obj.finish
56
-
57
- end
58
- end
59
-
60
- cache_obj[:last_request_time] = Time.now.to_i
61
- when 'file'
62
- http_obj = Object.new
63
- class << http_obj
64
- def started?; true; end
65
- def request(request, *args, &block)
66
- response = FileResponse.new(request.uri.path)
67
- yield response
68
- end
69
- end
70
- end
71
-
72
- params[:connection] = http_obj
73
- super
74
- end
75
- end
76
- end
77
- end
78
- end
@@ -1,23 +0,0 @@
1
- module WWW
2
- class Mechanize
3
- class Chain
4
- class CustomHeaders
5
- include WWW::Handler
6
-
7
- def handle(ctx, params)
8
- request = params[:request]
9
- params[:headers].each do |k,v|
10
- case k
11
- when :etag then request["ETag"] = v
12
- when :if_modified_since then request["If-Modified-Since"] = v
13
- else
14
- raise ArgumentError.new("unknown header symbol #{k}") if k.is_a? Symbol
15
- request[k] = v
16
- end
17
- end
18
- super
19
- end
20
- end
21
- end
22
- end
23
- end
@@ -1,53 +0,0 @@
1
- module WWW
2
- class Mechanize
3
- class Chain
4
- class HeaderResolver
5
- include WWW::Handler
6
- def initialize(keep_alive, keep_alive_time, cookie_jar, user_agent, headers)
7
- @keep_alive = keep_alive
8
- @keep_alive_time = keep_alive_time
9
- @cookie_jar = cookie_jar
10
- @user_agent = user_agent
11
- @headers = headers
12
- end
13
-
14
- def handle(ctx, params)
15
- uri = params[:uri]
16
- referer = params[:referer]
17
- request = params[:request]
18
-
19
- if @keep_alive
20
- request['Connection'] = 'keep-alive'
21
- request['Keep-Alive'] = @keep_alive_time.to_s
22
- else
23
- request['Connection'] = 'close'
24
- end
25
- request['Accept-Encoding'] = 'gzip,identity'
26
- request['Accept-Language'] = 'en-us,en;q=0.5'
27
- host = "#{uri.host}#{[80, 443].include?(uri.port.to_i) ? '' : ':' + uri.port.to_s}"
28
- request['Host'] = host
29
- request['Accept-Charset'] = 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'
30
-
31
- unless @cookie_jar.empty?(uri)
32
- cookies = @cookie_jar.cookies(uri)
33
- cookie = cookies.length > 0 ? cookies.join("; ") : nil
34
- request.add_field('Cookie', cookie)
35
- end
36
-
37
- # Add Referer header to request
38
- if referer && referer.uri
39
- request['Referer'] = referer.uri.to_s
40
- end
41
-
42
- # Add User-Agent header to request
43
- request['User-Agent'] = @user_agent if @user_agent
44
-
45
- @headers.each do |k,v|
46
- request[k] = v
47
- end if request
48
- super
49
- end
50
- end
51
- end
52
- end
53
- end
@@ -1,24 +0,0 @@
1
- module WWW
2
- class Mechanize
3
- class Chain
4
- class ParameterResolver
5
- include WWW::Handler
6
-
7
- def handle(ctx, params)
8
- parameters = params[:params]
9
- uri = params[:uri]
10
- case params[:verb]
11
- when :head, :get, :delete, :trace
12
- if parameters.length > 0
13
- uri.query ||= ''
14
- uri.query << '&' if uri.query.length > 0
15
- uri.query << Util.build_query_string(parameters)
16
- end
17
- params[:params] = []
18
- end
19
- super
20
- end
21
- end
22
- end
23
- end
24
- end
@@ -1,22 +0,0 @@
1
- module WWW
2
- class Mechanize
3
- class Chain
4
- class PreConnectHook
5
- include WWW::Handler
6
-
7
- attr_accessor :hooks
8
- def initialize
9
- @hooks = []
10
- end
11
-
12
- def handle(ctx, params)
13
- @hooks.each { |hook| hook.call(params) }
14
- super
15
- end
16
- end
17
-
18
- class PostConnectHook < PreConnectHook
19
- end
20
- end
21
- end
22
- end
@@ -1,32 +0,0 @@
1
- module WWW
2
- class Mechanize
3
- class Chain
4
- class RequestResolver
5
- include WWW::Handler
6
-
7
- def handle(ctx, params)
8
- uri = params[:uri]
9
- if %w{ http https }.include?(uri.scheme.downcase)
10
- klass = Net::HTTP.const_get(params[:verb].to_s.capitalize)
11
- params[:request] ||= klass.new(uri.request_uri)
12
- end
13
-
14
- if %w{ file }.include?(uri.scheme.downcase)
15
- o = Struct.new(:uri).new(uri)
16
- class << o
17
- def add_field(*args); end
18
- alias :[]= :add_field
19
- def path
20
- uri.path
21
- end
22
- def each_header; end
23
- end
24
- params[:request] ||= o
25
- end
26
-
27
- super
28
- end
29
- end
30
- end
31
- end
32
- end
@@ -1,40 +0,0 @@
1
- module WWW
2
- class Mechanize
3
- class Chain
4
- class ResponseBodyParser
5
- include WWW::Handler
6
-
7
- def initialize(pluggable_parser, watch_for_set)
8
- @pluggable_parser = pluggable_parser
9
- @watch_for_set = watch_for_set
10
- end
11
-
12
- def handle(ctx, params)
13
- response = params[:response]
14
- response_body = params[:response_body]
15
- uri = params[:uri]
16
-
17
- content_type = nil
18
- unless response['Content-Type'].nil?
19
- data = response['Content-Type'].match(/^([^;]*)/)
20
- content_type = data[1].downcase.split(',')[0] unless data.nil?
21
- end
22
-
23
- # Find our pluggable parser
24
- params[:page] = @pluggable_parser.parser(content_type).new(
25
- uri,
26
- response,
27
- response_body,
28
- response.code
29
- ) { |parser|
30
- parser.mech = params[:agent] if parser.respond_to? :mech=
31
- if parser.respond_to?(:watch_for_set=) && @watch_for_set
32
- parser.watch_for_set = @watch_for_set
33
- end
34
- }
35
- super
36
- end
37
- end
38
- end
39
- end
40
- end