mechanize 1.0.1.beta.20110107104205 → 2.0.pre.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (89) hide show
  1. data.tar.gz.sig +2 -0
  2. data/{lib/mechanize/chain/post_connect_hook.rb → .gemtest} +0 -0
  3. data/CHANGELOG.rdoc +51 -6
  4. data/EXAMPLES.rdoc +5 -3
  5. data/GUIDE.rdoc +72 -32
  6. data/LICENSE.rdoc +20 -340
  7. data/Manifest.txt +20 -27
  8. data/README.rdoc +12 -9
  9. data/Rakefile +5 -2
  10. data/examples/spider.rb +13 -2
  11. data/lib/mechanize.rb +545 -267
  12. data/lib/mechanize/content_type_error.rb +1 -1
  13. data/lib/mechanize/cookie.rb +72 -65
  14. data/lib/mechanize/cookie_jar.rb +197 -148
  15. data/lib/mechanize/element_matcher.rb +35 -0
  16. data/lib/mechanize/file.rb +3 -1
  17. data/lib/mechanize/file_connection.rb +17 -0
  18. data/lib/mechanize/file_request.rb +26 -0
  19. data/lib/mechanize/file_response.rb +61 -47
  20. data/lib/mechanize/form.rb +57 -58
  21. data/lib/mechanize/form/image_button.rb +2 -3
  22. data/lib/mechanize/form/multi_select_list.rb +71 -55
  23. data/lib/mechanize/form/select_list.rb +34 -62
  24. data/lib/mechanize/monkey_patch.rb +13 -11
  25. data/lib/mechanize/page.rb +277 -270
  26. data/lib/mechanize/page/image.rb +6 -2
  27. data/lib/mechanize/redirect_limit_reached_error.rb +1 -1
  28. data/lib/mechanize/redirect_not_get_or_head_error.rb +1 -1
  29. data/lib/mechanize/response_code_error.rb +3 -3
  30. data/lib/mechanize/unsupported_scheme_error.rb +1 -1
  31. data/lib/mechanize/uri_resolver.rb +82 -0
  32. data/lib/mechanize/util.rb +76 -60
  33. data/test/helper.rb +35 -5
  34. data/test/htdocs/dir with spaces/foo.html +1 -0
  35. data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
  36. data/test/htdocs/tc_base_images.html +10 -0
  37. data/test/htdocs/tc_images.html +8 -0
  38. data/test/htdocs/test_click.html +11 -0
  39. data/test/servlets.rb +3 -2
  40. data/test/test_authenticate.rb +5 -5
  41. data/test/test_errors.rb +8 -8
  42. data/test/test_follow_meta.rb +4 -4
  43. data/test/test_form_as_hash.rb +4 -4
  44. data/test/test_forms.rb +3 -7
  45. data/test/test_hash_api.rb +2 -2
  46. data/test/test_headers.rb +1 -1
  47. data/test/test_images.rb +19 -0
  48. data/test/test_mech.rb +6 -6
  49. data/test/test_mechanize.rb +687 -0
  50. data/test/{test_cookie_class.rb → test_mechanize_cookie.rb} +52 -45
  51. data/test/test_mechanize_cookie_jar.rb +400 -0
  52. data/test/test_mechanize_file.rb +7 -1
  53. data/test/test_mechanize_file_request.rb +19 -0
  54. data/test/test_mechanize_file_response.rb +21 -0
  55. data/test/test_mechanize_form_image_button.rb +12 -0
  56. data/test/test_mechanize_page.rb +165 -0
  57. data/test/test_mechanize_uri_resolver.rb +29 -0
  58. data/test/{test_util.rb → test_mechanize_util.rb} +1 -1
  59. data/test/test_multi_select.rb +12 -0
  60. data/test/test_post_form.rb +7 -0
  61. data/test/test_redirect_verb_handling.rb +6 -6
  62. data/test/test_scheme.rb +0 -7
  63. data/test/test_verbs.rb +3 -3
  64. metadata +106 -72
  65. metadata.gz.sig +0 -0
  66. data/lib/mechanize/chain.rb +0 -36
  67. data/lib/mechanize/chain/auth_headers.rb +0 -78
  68. data/lib/mechanize/chain/body_decoding_handler.rb +0 -50
  69. data/lib/mechanize/chain/connection_resolver.rb +0 -28
  70. data/lib/mechanize/chain/custom_headers.rb +0 -21
  71. data/lib/mechanize/chain/handler.rb +0 -9
  72. data/lib/mechanize/chain/header_resolver.rb +0 -48
  73. data/lib/mechanize/chain/parameter_resolver.rb +0 -22
  74. data/lib/mechanize/chain/pre_connect_hook.rb +0 -20
  75. data/lib/mechanize/chain/request_resolver.rb +0 -31
  76. data/lib/mechanize/chain/response_body_parser.rb +0 -36
  77. data/lib/mechanize/chain/response_header_handler.rb +0 -34
  78. data/lib/mechanize/chain/response_reader.rb +0 -39
  79. data/lib/mechanize/chain/ssl_resolver.rb +0 -40
  80. data/lib/mechanize/chain/uri_resolver.rb +0 -75
  81. data/test/chain/test_argument_validator.rb +0 -14
  82. data/test/chain/test_auth_headers.rb +0 -25
  83. data/test/chain/test_custom_headers.rb +0 -18
  84. data/test/chain/test_header_resolver.rb +0 -27
  85. data/test/chain/test_parameter_resolver.rb +0 -35
  86. data/test/chain/test_request_resolver.rb +0 -29
  87. data/test/chain/test_response_reader.rb +0 -24
  88. data/test/test_cookie_jar.rb +0 -324
  89. data/test/test_page.rb +0 -124
@@ -4,7 +4,7 @@ class Mechanize
4
4
  # parse a content type that it does not know how to handle. For example
5
5
  # if Mechanize::Page were to try to parse a PDF, a ContentTypeError
6
6
  # would be thrown.
7
- class ContentTypeError < RuntimeError
7
+ class ContentTypeError < Mechanize::Error
8
8
  attr_reader :content_type
9
9
 
10
10
  def initialize(content_type)
@@ -1,78 +1,85 @@
1
1
  require 'time'
2
2
  require 'webrick/cookie'
3
3
 
4
- class Mechanize
5
- # This class is used to represent an HTTP Cookie.
6
- class Cookie < WEBrick::Cookie
7
- def self.parse(uri, str, log = Mechanize.log)
8
- return str.split(/,(?=[^;,]*=)|,$/).collect { |c|
9
- cookie_elem = c.split(/;+/)
10
- first_elem = cookie_elem.shift
11
- first_elem.strip!
12
- key, value = first_elem.split(/\=/, 2)
4
+ # This class is used to represent an HTTP Cookie.
5
+ class Mechanize::Cookie < WEBrick::Cookie
13
6
 
14
- cookie = nil
15
- begin
16
- cookie = new(key, value.dup)
17
- rescue
18
- log.warn("Couldn't parse key/value: #{first_elem}") if log
19
- end
20
- next unless cookie
7
+ attr_accessor :session
8
+
9
+ def self.parse(uri, str, log = Mechanize.log)
10
+ return str.split(/,(?=[^;,]*=)|,$/).map { |c|
11
+ cookie_elem = c.split(/;+/)
12
+ first_elem = cookie_elem.shift
13
+ first_elem.strip!
14
+ key, value = first_elem.split(/\=/, 2)
15
+
16
+ cookie = nil
17
+ begin
18
+ cookie = new(key, value.dup)
19
+ rescue
20
+ log.warn("Couldn't parse key/value: #{first_elem}") if log
21
+ end
22
+
23
+ next unless cookie
24
+
25
+ cookie_elem.each do |pair|
26
+ pair.strip!
27
+ key, value = pair.split(/\=/, 2)
28
+ next unless key
29
+ value = WEBrick::HTTPUtils.dequote(value.strip) if value
30
+
31
+ case key.downcase
32
+ when "domain" then
33
+ value = ".#{value}" unless value =~ /^\./
34
+ cookie.domain = value
35
+ when "path" then
36
+ cookie.path = value
37
+ when 'expires'
38
+ if value.empty? then
39
+ cookie.session = true
40
+ next
41
+ end
21
42
 
22
- cookie_elem.each do |pair|
23
- pair.strip!
24
- key, value = pair.split(/\=/, 2)
25
- if value
26
- value = WEBrick::HTTPUtils.dequote(value.strip)
43
+ begin
44
+ cookie.expires = Time::parse(value)
45
+ rescue
46
+ log.warn("Couldn't parse expires: #{value}") if log
47
+ end
48
+ when "max-age" then
49
+ begin
50
+ cookie.max_age = Integer(value)
51
+ rescue
52
+ log.warn("Couldn't parse max age '#{value}'") if log
53
+ cookie.max_age = nil
27
54
  end
28
- case key.downcase
29
- when "domain" then cookie.domain = value.sub(/^\./, '')
30
- when "path" then cookie.path = value
31
- when 'expires'
32
- begin
33
- cookie.expires = Time::parse(value)
34
- rescue
35
- if log
36
- log.warn("Couldn't parse expires: #{value}")
37
- end
38
- end
39
- when "max-age" then
40
- begin
41
- cookie.max_age = Integer(value)
42
- rescue
43
- log.warn("Couldn't parse max age '#{value}'") if log
44
- cookie.max_age = nil
45
- end
46
- when "comment" then cookie.comment = value
47
- when "version" then
48
- begin
49
- cookie.version = Integer(value)
50
- rescue
51
- log.warn("Couldn't parse version '#{value}'") if log
52
- cookie.version = nil
53
- end
54
- when "secure" then cookie.secure = true
55
+ when "comment" then cookie.comment = value
56
+ when "version" then
57
+ begin
58
+ cookie.version = Integer(value)
59
+ rescue
60
+ log.warn("Couldn't parse version '#{value}'") if log
61
+ cookie.version = nil
55
62
  end
63
+ when "secure" then cookie.secure = true
56
64
  end
65
+ end
57
66
 
58
- cookie.path ||= uri.path.to_s.sub(%r%[^/]*$%, '')
59
- cookie.secure ||= false
60
- cookie.domain ||= uri.host
61
- # Move this in to the cookie jar
62
- yield cookie if block_given?
63
- }
64
- end
67
+ cookie.path ||= uri.path.to_s.sub(%r%[^/]*$%, '')
68
+ cookie.secure ||= false
69
+ cookie.domain ||= uri.host
70
+ # Move this in to the cookie jar
71
+ yield cookie if block_given?
65
72
 
66
- def expired?
67
- if expires.nil?
68
- false
69
- else
70
- Time.now > expires
71
- end
72
- end
73
+ cookie
74
+ }
75
+ end
76
+
77
+ def expired?
78
+ return false unless expires
79
+ Time.now > expires
80
+ end
73
81
 
74
- def to_s
75
- "#{@name}=#{@value}"
76
- end
82
+ def to_s
83
+ "#{@name}=#{@value}"
77
84
  end
78
85
  end
@@ -1,186 +1,235 @@
1
- require 'yaml'
1
+ ##
2
+ # This class is used to manage the Cookies that have been returned from
3
+ # any particular website.
2
4
 
3
- class Mechanize
4
- # This class is used to manage the Cookies that have been returned from
5
- # any particular website.
6
- class CookieJar
7
- attr_reader :jar
5
+ class Mechanize::CookieJar
8
6
 
9
- def initialize
10
- @jar = {}
11
- end
12
-
13
- # Add a cookie to the Jar.
14
- def add(uri, cookie)
15
- return unless uri.host =~ /#{CookieJar.strip_port(cookie.domain)}$/i
7
+ # add_cookie wants something resembling a URI.
16
8
 
17
- normal_domain = cookie.domain.downcase
9
+ FakeURI = Struct.new(:host) # :nodoc:
18
10
 
19
- unless @jar.has_key?(normal_domain)
20
- @jar[normal_domain] = Hash.new { |h,k| h[k] = {} }
21
- end
11
+ attr_reader :jar
22
12
 
23
- @jar[normal_domain][cookie.path] ||= {}
24
- @jar[normal_domain][cookie.path][cookie.name] = cookie
25
- cleanup
26
- cookie
27
- end
13
+ def initialize
14
+ @jar = {}
15
+ end
28
16
 
29
- # Fetch the cookies that should be used for the URI object passed in.
30
- def cookies(url)
31
- cleanup
32
- url.path = '/' if url.path.empty?
17
+ def initialize_copy other # :nodoc:
18
+ @jar = Marshal.load Marshal.dump other.jar
19
+ end
33
20
 
34
- domains = @jar.find_all { |domain, _|
35
- url.host =~ /#{CookieJar.strip_port(domain)}$/i
36
- }
21
+ # Add a cookie to the Jar.
22
+ def add(uri, cookie)
23
+ return unless valid_cookie_for_uri?(uri, cookie)
37
24
 
38
- return [] unless domains.length > 0
25
+ normal_domain = cookie.domain.downcase
39
26
 
40
- cookies = domains.map { |_,paths|
41
- paths.find_all { |path, _|
42
- url.path =~ /^#{Regexp.escape(path)}/
43
- }.map { |_,cookie| cookie.values }
44
- }.flatten
27
+ @jar[normal_domain] ||= {} unless @jar.has_key?(normal_domain)
45
28
 
46
- cookies.find_all { |cookie| ! cookie.expired? }
47
- end
29
+ @jar[normal_domain][cookie.path] ||= {}
30
+ @jar[normal_domain][cookie.path][cookie.name] = cookie
48
31
 
49
- def empty?(url)
50
- cookies(url).length > 0 ? false : true
51
- end
32
+ cookie
33
+ end
52
34
 
53
- def to_a
54
- cookies = []
55
- @jar.each do |domain, paths|
56
- paths.each do |path, names|
57
- cookies << names.values
58
- end
35
+ # Fetch the cookies that should be used for the URI object passed in.
36
+ def cookies(url)
37
+ cleanup
38
+ url.path = '/' if url.path.empty?
39
+
40
+ domains = @jar.find_all { |domain, _|
41
+ cookie_domain = self.class.strip_port(domain)
42
+ if cookie_domain.start_with?('.')
43
+ url.host =~ /#{Regexp.escape cookie_domain}$/i
44
+ else
45
+ url.host =~ /^#{Regexp.escape cookie_domain}$/i
59
46
  end
60
- cookies.flatten
61
- end
47
+ }
62
48
 
63
- # Save the cookie jar to a file in the format specified.
64
- #
65
- # Available formats:
66
- # :yaml <- YAML structure
67
- # :cookiestxt <- Mozilla's cookies.txt format
68
- def save_as(file, format = :yaml)
69
- ::File.open(file, "w") { |f|
70
- case format
71
- when :yaml then
72
- YAML::dump(@jar, f)
73
- when :cookiestxt then
74
- dump_cookiestxt(f)
75
- else
76
- raise "Unknown cookie jar file format"
77
- end
78
- }
79
- end
49
+ return [] unless domains.length > 0
80
50
 
81
- # Load cookie jar from a file in the format specified.
82
- #
83
- # Available formats:
84
- # :yaml <- YAML structure.
85
- # :cookiestxt <- Mozilla's cookies.txt format
86
- def load(file, format = :yaml)
87
- @jar = ::File.open(file) { |f|
88
- case format
89
- when :yaml then
90
- YAML::load(f)
91
- when :cookiestxt then
92
- load_cookiestxt(f)
93
- else
94
- raise "Unknown cookie jar file format"
95
- end
96
- }
97
- end
51
+ cookies = domains.map { |_,paths|
52
+ paths.find_all { |path, _|
53
+ url.path =~ /^#{Regexp.escape(path)}/
54
+ }.map { |_,cookie| cookie.values }
55
+ }.flatten
98
56
 
99
- # Clear the cookie jar
100
- def clear!
101
- @jar = {}
102
- end
57
+ cookies.find_all { |cookie| ! cookie.expired? }
58
+ end
103
59
 
104
- # Read cookies from Mozilla cookies.txt-style IO stream
105
- def load_cookiestxt(io)
106
- now = Time.now
107
- fakeuri = Struct.new(:host) # add_cookie wants something resembling a URI.
60
+ def empty?(url)
61
+ cookies(url).length > 0 ? false : true
62
+ end
108
63
 
109
- io.each_line do |line|
110
- line.chomp!
111
- line.gsub!(/#.+/, '')
112
- fields = line.split("\t")
64
+ def to_a
65
+ cleanup
113
66
 
114
- next if fields.length != 7
67
+ @jar.map do |domain, paths|
68
+ paths.map do |path, names|
69
+ names.values
70
+ end
71
+ end.flatten
72
+ end
115
73
 
116
- expires_seconds = fields[4].to_i
74
+ # Save the cookie jar to a file in the format specified.
75
+ #
76
+ # Available formats:
77
+ # :yaml <- YAML structure
78
+ # :cookiestxt <- Mozilla's cookies.txt format
79
+ def save_as(file, format = :yaml)
80
+ jar = dup
81
+ jar.cleanup true
82
+
83
+ open(file, 'w') { |f|
84
+ case format
85
+ when :yaml then
117
86
  begin
118
- expires = (expires_seconds == 0) ? nil : Time.at(expires_seconds)
119
- rescue
120
- next
121
- # Just in case we ever decide to support DateTime...
122
- # expires = DateTime.new(1970,1,1) + ((expires_seconds + 1) / (60*60*24.0))
87
+ require 'psych'
88
+ rescue LoadError
123
89
  end
124
- next if (expires_seconds != 0) && (expires < now)
125
-
126
- c = Mechanize::Cookie.new(fields[5], fields[6])
127
- c.domain = fields[0]
128
- # Field 1 indicates whether the cookie can be read by other machines at the same domain.
129
- # This is computed by the cookie implementation, based on the domain value.
130
- c.path = fields[2] # Path for which the cookie is relevant
131
- c.secure = (fields[3] == "TRUE") # Requires a secure connection
132
- c.expires = expires # Time the cookie expires.
133
- c.version = 0 # Conforms to Netscape cookie spec.
134
-
135
- add(fakeuri.new(c.domain), c)
90
+
91
+ require 'yaml'
92
+
93
+ YAML.dump(jar.jar, f)
94
+ when :cookiestxt then
95
+ jar.dump_cookiestxt(f)
96
+ else
97
+ raise ArgumentError, "Unknown cookie jar file format"
136
98
  end
137
- @jar
138
- end
99
+ }
139
100
 
140
- # Write cookies to Mozilla cookies.txt-style IO stream
141
- def dump_cookiestxt(io)
142
- to_a.each do |cookie|
143
- fields = []
144
- fields[0] = cookie.domain
101
+ self
102
+ end
145
103
 
146
- if cookie.domain =~ /^\./
147
- fields[1] = "TRUE"
148
- else
149
- fields[1] = "FALSE"
150
- end
104
+ # Load cookie jar from a file in the format specified.
105
+ #
106
+ # Available formats:
107
+ # :yaml <- YAML structure.
108
+ # :cookiestxt <- Mozilla's cookies.txt format
109
+ def load(file, format = :yaml)
110
+ @jar = open(file) { |f|
111
+ case format
112
+ when :yaml then
113
+ YAML::load(f)
114
+ when :cookiestxt then
115
+ load_cookiestxt(f)
116
+ else
117
+ raise ArgumentError, "Unknown cookie jar file format"
118
+ end
119
+ }
151
120
 
152
- fields[2] = cookie.path
121
+ cleanup
153
122
 
154
- if cookie.secure == true
155
- fields[3] = "TRUE"
156
- else
157
- fields[3] = "FALSE"
158
- end
123
+ self
124
+ end
125
+
126
+ # Clear the cookie jar
127
+ def clear!
128
+ @jar = {}
129
+ end
130
+
131
+ # Read cookies from Mozilla cookies.txt-style IO stream
132
+ def load_cookiestxt(io)
133
+ now = Time.now
134
+
135
+ io.each_line do |line|
136
+ line.chomp!
137
+ line.gsub!(/#.+/, '')
138
+ fields = line.split("\t")
139
+
140
+ next if fields.length != 7
141
+
142
+ expires_seconds = fields[4].to_i
143
+ expires = (expires_seconds == 0) ? nil : Time.at(expires_seconds)
144
+ next if expires and (expires < now)
145
+
146
+ c = Mechanize::Cookie.new(fields[5], fields[6])
147
+ c.domain = fields[0]
148
+ # Field 1 indicates whether the cookie can be read by other machines at
149
+ # the same domain. This is computed by the cookie implementation, based
150
+ # on the domain value.
151
+ c.path = fields[2] # Path for which the cookie is relevant
152
+ c.secure = (fields[3] == "TRUE") # Requires a secure connection
153
+ c.expires = expires # Time the cookie expires.
154
+ c.version = 0 # Conforms to Netscape cookie spec.
155
+
156
+ add(FakeURI.new(c.domain), c)
157
+ end
158
+
159
+ @jar
160
+ end
159
161
 
160
- fields[4] = cookie.expires.to_i.to_s
162
+ # Write cookies to Mozilla cookies.txt-style IO stream
163
+ def dump_cookiestxt(io)
164
+ to_a.each do |cookie|
165
+ fields = []
166
+ fields[0] = cookie.domain
161
167
 
162
- fields[5] = cookie.name
163
- fields[6] = cookie.value
164
- io.puts(fields.join("\t"))
168
+ if cookie.domain =~ /^\./
169
+ fields[1] = "TRUE"
170
+ else
171
+ fields[1] = "FALSE"
165
172
  end
173
+
174
+ fields[2] = cookie.path
175
+
176
+ if cookie.secure == true
177
+ fields[3] = "TRUE"
178
+ else
179
+ fields[3] = "FALSE"
180
+ end
181
+
182
+ fields[4] = cookie.expires.to_i.to_s
183
+
184
+ fields[5] = cookie.name
185
+ fields[6] = cookie.value
186
+ io.puts(fields.join("\t"))
166
187
  end
188
+ end
167
189
 
168
- private
169
- # Remove expired cookies
170
- def cleanup
171
- @jar.each do |domain, paths|
172
- paths.each do |path, names|
173
- names.each do |cookie_name, cookie|
174
- if cookie.expired?
175
- paths[path].delete(cookie_name)
176
- end
177
- end
190
+ private
191
+ # Determine if the cookie's domain and path are valid for
192
+ # the uri.host based on the rules in RFC 2965
193
+ def valid_cookie_for_uri?(uri, cookie)
194
+ cookie_domain = self.class.strip_port(cookie.domain)
195
+
196
+ # reject cookies whose domains do not contain an embedded dot
197
+ # cookies for localhost and .local. are exempt from this rule
198
+ return false if
199
+ cookie_domain !~ /.\../ && cookie_domain !~ /(localhost|\.?local)\.?$/
200
+
201
+ cookie_domain = if cookie_domain.start_with? '.' then
202
+ ".?#{Regexp.escape cookie_domain[1..-1]}"
203
+ else
204
+ Regexp.escape cookie_domain
205
+ end
206
+
207
+ # Permitted: A Set-Cookie for x.foo.com for Domain=.foo.com
208
+ # Not Permitted: A Set-Cookie for y.x.foo.com for Domain=.foo.com because
209
+ # y.x contains a dot
210
+ # Not Permitted: A Set-Cookie for foo.com for Domain=.bar.com
211
+ match = uri.host.match(/#{cookie_domain}/i)
212
+ return false if match.nil? || match.pre_match =~ /.\../
213
+
214
+ true
215
+ end
216
+
217
+ protected
218
+
219
+ # Remove expired cookies
220
+ def cleanup session = false
221
+ @jar.each do |domain, paths|
222
+ paths.each do |path, names|
223
+ names.each do |cookie_name, cookie|
224
+ paths[path].delete(cookie_name) if
225
+ cookie.expired? or (session and cookie.session)
178
226
  end
179
227
  end
180
228
  end
229
+ end
181
230
 
182
- def self.strip_port(host)
183
- host.gsub(/:[0-9]+$/,'')
184
- end
231
+ def self.strip_port(host)
232
+ host.gsub(/:[0-9]+$/,'')
185
233
  end
186
234
  end
235
+