aai10-mechanize 2.0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. data/.autotest +6 -0
  2. data/.gitignore +9 -0
  3. data/CHANGELOG.rdoc +652 -0
  4. data/EXAMPLES.rdoc +187 -0
  5. data/FAQ.rdoc +11 -0
  6. data/GUIDE.rdoc +163 -0
  7. data/LICENSE.rdoc +20 -0
  8. data/Manifest.txt +172 -0
  9. data/README.rdoc +63 -0
  10. data/Rakefile +36 -0
  11. data/aai10-mechanize.gemspec +20 -0
  12. data/examples/flickr_upload.rb +22 -0
  13. data/examples/mech-dump.rb +5 -0
  14. data/examples/proxy_req.rb +7 -0
  15. data/examples/rubyforge.rb +20 -0
  16. data/examples/spider.rb +21 -0
  17. data/lib/mechanize.rb +664 -0
  18. data/lib/mechanize/content_type_error.rb +14 -0
  19. data/lib/mechanize/cookie.rb +116 -0
  20. data/lib/mechanize/cookie_jar.rb +202 -0
  21. data/lib/mechanize/element_matcher.rb +35 -0
  22. data/lib/mechanize/file.rb +80 -0
  23. data/lib/mechanize/file_connection.rb +17 -0
  24. data/lib/mechanize/file_request.rb +26 -0
  25. data/lib/mechanize/file_response.rb +74 -0
  26. data/lib/mechanize/file_saver.rb +37 -0
  27. data/lib/mechanize/form.rb +478 -0
  28. data/lib/mechanize/form/button.rb +9 -0
  29. data/lib/mechanize/form/check_box.rb +11 -0
  30. data/lib/mechanize/form/field.rb +44 -0
  31. data/lib/mechanize/form/file_upload.rb +23 -0
  32. data/lib/mechanize/form/image_button.rb +20 -0
  33. data/lib/mechanize/form/multi_select_list.rb +83 -0
  34. data/lib/mechanize/form/option.rb +49 -0
  35. data/lib/mechanize/form/radio_button.rb +48 -0
  36. data/lib/mechanize/form/select_list.rb +40 -0
  37. data/lib/mechanize/headers.rb +25 -0
  38. data/lib/mechanize/history.rb +83 -0
  39. data/lib/mechanize/http.rb +3 -0
  40. data/lib/mechanize/http/agent.rb +738 -0
  41. data/lib/mechanize/inspect.rb +88 -0
  42. data/lib/mechanize/monkey_patch.rb +37 -0
  43. data/lib/mechanize/page.rb +408 -0
  44. data/lib/mechanize/page/base.rb +8 -0
  45. data/lib/mechanize/page/frame.rb +27 -0
  46. data/lib/mechanize/page/image.rb +30 -0
  47. data/lib/mechanize/page/label.rb +20 -0
  48. data/lib/mechanize/page/link.rb +82 -0
  49. data/lib/mechanize/page/meta_refresh.rb +56 -0
  50. data/lib/mechanize/pluggable_parsers.rb +101 -0
  51. data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
  52. data/lib/mechanize/redirect_not_get_or_head_error.rb +19 -0
  53. data/lib/mechanize/response_code_error.rb +22 -0
  54. data/lib/mechanize/response_read_error.rb +27 -0
  55. data/lib/mechanize/robots_disallowed_error.rb +29 -0
  56. data/lib/mechanize/unsupported_scheme_error.rb +8 -0
  57. data/lib/mechanize/util.rb +113 -0
  58. data/test/data/htpasswd +1 -0
  59. data/test/data/server.crt +16 -0
  60. data/test/data/server.csr +12 -0
  61. data/test/data/server.key +15 -0
  62. data/test/data/server.pem +15 -0
  63. data/test/helper.rb +175 -0
  64. data/test/htdocs/alt_text.html +10 -0
  65. data/test/htdocs/bad_form_test.html +9 -0
  66. data/test/htdocs/button.jpg +0 -0
  67. data/test/htdocs/canonical_uri.html +9 -0
  68. data/test/htdocs/dir with spaces/foo.html +1 -0
  69. data/test/htdocs/empty_form.html +6 -0
  70. data/test/htdocs/file_upload.html +26 -0
  71. data/test/htdocs/find_link.html +41 -0
  72. data/test/htdocs/form_multi_select.html +16 -0
  73. data/test/htdocs/form_multival.html +37 -0
  74. data/test/htdocs/form_no_action.html +18 -0
  75. data/test/htdocs/form_no_input_name.html +16 -0
  76. data/test/htdocs/form_select.html +16 -0
  77. data/test/htdocs/form_select_all.html +16 -0
  78. data/test/htdocs/form_select_none.html +17 -0
  79. data/test/htdocs/form_select_noopts.html +10 -0
  80. data/test/htdocs/form_set_fields.html +14 -0
  81. data/test/htdocs/form_test.html +188 -0
  82. data/test/htdocs/frame_referer_test.html +10 -0
  83. data/test/htdocs/frame_test.html +30 -0
  84. data/test/htdocs/google.html +13 -0
  85. data/test/htdocs/iframe_test.html +16 -0
  86. data/test/htdocs/index.html +6 -0
  87. data/test/htdocs/link with space.html +5 -0
  88. data/test/htdocs/meta_cookie.html +11 -0
  89. data/test/htdocs/no_title_test.html +6 -0
  90. data/test/htdocs/nofollow.html +9 -0
  91. data/test/htdocs/noindex.html +9 -0
  92. data/test/htdocs/norobots.html +8 -0
  93. data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
  94. data/test/htdocs/rel_nofollow.html +8 -0
  95. data/test/htdocs/relative/tc_relative_links.html +21 -0
  96. data/test/htdocs/robots.html +8 -0
  97. data/test/htdocs/robots.txt +2 -0
  98. data/test/htdocs/tc_bad_charset.html +9 -0
  99. data/test/htdocs/tc_bad_links.html +5 -0
  100. data/test/htdocs/tc_base_images.html +10 -0
  101. data/test/htdocs/tc_base_link.html +8 -0
  102. data/test/htdocs/tc_blank_form.html +11 -0
  103. data/test/htdocs/tc_charset.html +6 -0
  104. data/test/htdocs/tc_checkboxes.html +19 -0
  105. data/test/htdocs/tc_encoded_links.html +5 -0
  106. data/test/htdocs/tc_field_precedence.html +11 -0
  107. data/test/htdocs/tc_follow_meta.html +8 -0
  108. data/test/htdocs/tc_form_action.html +48 -0
  109. data/test/htdocs/tc_images.html +8 -0
  110. data/test/htdocs/tc_links.html +18 -0
  111. data/test/htdocs/tc_meta_in_body.html +9 -0
  112. data/test/htdocs/tc_no_attributes.html +16 -0
  113. data/test/htdocs/tc_pretty_print.html +17 -0
  114. data/test/htdocs/tc_radiobuttons.html +17 -0
  115. data/test/htdocs/tc_referer.html +16 -0
  116. data/test/htdocs/tc_relative_links.html +19 -0
  117. data/test/htdocs/tc_textarea.html +23 -0
  118. data/test/htdocs/test_bad_encoding.html +52 -0
  119. data/test/htdocs/test_click.html +11 -0
  120. data/test/htdocs/unusual______.html +5 -0
  121. data/test/servlets.rb +402 -0
  122. data/test/ssl_server.rb +48 -0
  123. data/test/test_cookies.rb +129 -0
  124. data/test/test_form_action.rb +52 -0
  125. data/test/test_form_as_hash.rb +59 -0
  126. data/test/test_form_button.rb +46 -0
  127. data/test/test_frames.rb +34 -0
  128. data/test/test_headers.rb +33 -0
  129. data/test/test_history.rb +118 -0
  130. data/test/test_history_added.rb +16 -0
  131. data/test/test_html_unscape_forms.rb +46 -0
  132. data/test/test_if_modified_since.rb +20 -0
  133. data/test/test_images.rb +19 -0
  134. data/test/test_mechanize.rb +852 -0
  135. data/test/test_mechanize_cookie.rb +345 -0
  136. data/test/test_mechanize_cookie_jar.rb +433 -0
  137. data/test/test_mechanize_file.rb +53 -0
  138. data/test/test_mechanize_file_request.rb +19 -0
  139. data/test/test_mechanize_file_response.rb +21 -0
  140. data/test/test_mechanize_form.rb +576 -0
  141. data/test/test_mechanize_form_check_box.rb +37 -0
  142. data/test/test_mechanize_form_encoding.rb +120 -0
  143. data/test/test_mechanize_form_field.rb +21 -0
  144. data/test/test_mechanize_form_image_button.rb +12 -0
  145. data/test/test_mechanize_form_textarea.rb +51 -0
  146. data/test/test_mechanize_http_agent.rb +697 -0
  147. data/test/test_mechanize_link.rb +84 -0
  148. data/test/test_mechanize_page_encoding.rb +147 -0
  149. data/test/test_mechanize_page_link.rb +382 -0
  150. data/test/test_mechanize_page_meta_refresh.rb +115 -0
  151. data/test/test_mechanize_redirect_not_get_or_head_error.rb +18 -0
  152. data/test/test_mechanize_subclass.rb +22 -0
  153. data/test/test_mechanize_util.rb +92 -0
  154. data/test/test_multi_select.rb +118 -0
  155. data/test/test_no_attributes.rb +13 -0
  156. data/test/test_option.rb +18 -0
  157. data/test/test_pluggable_parser.rb +136 -0
  158. data/test/test_post_form.rb +37 -0
  159. data/test/test_pretty_print.rb +22 -0
  160. data/test/test_radiobutton.rb +75 -0
  161. data/test/test_redirect_limit_reached.rb +39 -0
  162. data/test/test_redirect_ok.rb +25 -0
  163. data/test/test_referer.rb +81 -0
  164. data/test/test_relative_links.rb +40 -0
  165. data/test/test_request.rb +13 -0
  166. data/test/test_response_code.rb +53 -0
  167. data/test/test_robots.rb +72 -0
  168. data/test/test_save_file.rb +48 -0
  169. data/test/test_scheme.rb +48 -0
  170. data/test/test_select.rb +119 -0
  171. data/test/test_select_all.rb +15 -0
  172. data/test/test_select_none.rb +15 -0
  173. data/test/test_select_noopts.rb +18 -0
  174. data/test/test_set_fields.rb +44 -0
  175. data/test/test_ssl_server.rb +20 -0
  176. metadata +360 -0
@@ -0,0 +1,14 @@
1
+ class Mechanize
2
+ # =Synopsis
3
+ # This class contains an error for when a pluggable parser tries to
4
+ # parse a content type that it does not know how to handle. For example
5
+ # if Mechanize::Page were to try to parse a PDF, a ContentTypeError
6
+ # would be thrown.
7
+ class ContentTypeError < Mechanize::Error
8
+ attr_reader :content_type
9
+
10
+ def initialize(content_type)
11
+ @content_type = content_type
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,116 @@
1
+ require 'time'
2
+ require 'webrick/cookie'
3
+
4
+ # This class is used to represent an HTTP Cookie.
5
+ class Mechanize::Cookie < WEBrick::Cookie
6
+
7
+ attr_accessor :session
8
+
9
+ class << self
10
+ def parse(uri, str, log = Mechanize.log)
11
+ return str.split(/,(?=[^;,]*=)|,$/).map { |c|
12
+ cookie_elem = c.split(/;+/)
13
+ first_elem = cookie_elem.shift
14
+ first_elem.strip!
15
+ key, value = first_elem.split(/\=/, 2)
16
+
17
+ cookie = nil
18
+ begin
19
+ cookie = new(key, value.dup)
20
+ rescue
21
+ log.warn("Couldn't parse key/value: #{first_elem}") if log
22
+ end
23
+
24
+ next unless cookie
25
+
26
+ cookie_elem.each do |pair|
27
+ pair.strip!
28
+ key, value = pair.split(/\=/, 2)
29
+ next unless key
30
+ value = WEBrick::HTTPUtils.dequote(value.strip) if value
31
+
32
+ case key.downcase
33
+ when "domain" then
34
+ cookie.domain = value
35
+ when "path" then
36
+ cookie.path = value
37
+ when 'expires'
38
+ if value.empty? then
39
+ cookie.session = true
40
+ next
41
+ end
42
+
43
+ begin
44
+ cookie.expires = Time::parse(value)
45
+ rescue
46
+ log.warn("Couldn't parse expires: #{value}") if log
47
+ end
48
+ when "max-age" then
49
+ begin
50
+ cookie.max_age = Integer(value)
51
+ rescue
52
+ log.warn("Couldn't parse max age '#{value}'") if log
53
+ cookie.max_age = nil
54
+ end
55
+ when "comment" then cookie.comment = value
56
+ when "version" then
57
+ begin
58
+ cookie.version = Integer(value)
59
+ rescue
60
+ log.warn("Couldn't parse version '#{value}'") if log
61
+ cookie.version = nil
62
+ end
63
+ when "secure" then cookie.secure = true
64
+ end
65
+ end
66
+
67
+ cookie.path ||= (uri + './').path
68
+ cookie.secure ||= false
69
+ cookie.domain ||= uri.host
70
+ # Move this in to the cookie jar
71
+ yield cookie if block_given?
72
+
73
+ cookie
74
+ }
75
+ end
76
+
77
+ def normalize_domain(domain)
78
+ # RFC 6265 #4.1.2.3
79
+ return nil if domain.end_with?('.')
80
+ domain.downcase.tap { |dom|
81
+ dom.sub!(/:[0-9]+$/,'')
82
+ dom.sub!(/^\./,'')
83
+ }
84
+ end
85
+ end
86
+
87
+ alias set_domain domain=
88
+ def domain=(domain)
89
+ set_domain(self.class.normalize_domain(domain))
90
+ end
91
+
92
+ def expired?
93
+ return false unless expires
94
+ Time.now > expires
95
+ end
96
+
97
+ alias secure? secure
98
+
99
+ def acceptable_from_uri?(uri)
100
+ dom = domain or return false
101
+ host = self.class.normalize_domain(uri.host)
102
+
103
+ return true if host == dom
104
+ return false if dom.match(/^(?!local)[^.]+$/)
105
+ return host.end_with?('.' << dom)
106
+ end
107
+
108
+ def valid_for_uri?(uri)
109
+ return false if secure? && uri.scheme != 'https'
110
+ acceptable_from_uri?(uri) && uri.path.start_with?(path)
111
+ end
112
+
113
+ def to_s
114
+ "#{@name}=#{@value}"
115
+ end
116
+ end
@@ -0,0 +1,202 @@
1
+ ##
2
+ # This class is used to manage the Cookies that have been returned from
3
+ # any particular website.
4
+
5
+ class Mechanize::CookieJar
6
+
7
+ # add_cookie wants something resembling a URI.
8
+
9
+ FakeURI = Struct.new(:host) # :nodoc:
10
+
11
+ attr_reader :jar
12
+
13
+ def initialize
14
+ @jar = {}
15
+ end
16
+
17
+ def initialize_copy other # :nodoc:
18
+ @jar = Marshal.load Marshal.dump other.jar
19
+ end
20
+
21
+ # Add a cookie to the Jar.
22
+ def add(uri, cookie)
23
+ return unless cookie.acceptable_from_uri?(uri)
24
+
25
+ normal_domain = cookie.domain.downcase
26
+
27
+ @jar[normal_domain] ||= {} unless @jar.has_key?(normal_domain)
28
+
29
+ @jar[normal_domain][cookie.path] ||= {}
30
+ @jar[normal_domain][cookie.path][cookie.name] = cookie
31
+
32
+ cookie
33
+ end
34
+
35
+ # Fetch the cookies that should be used for the URI object passed in.
36
+ def cookies(url)
37
+ cleanup
38
+ url.path = '/' if url.path.empty?
39
+
40
+ [].tap { |cookies|
41
+ @jar.each { |domain, paths|
42
+ paths.each { |path, hash|
43
+ hash.each_value { |cookie|
44
+ next if cookie.expired? || !cookie.valid_for_uri?(url)
45
+ cookies << cookie
46
+ }
47
+ }
48
+ }
49
+ }
50
+ end
51
+
52
+ def empty?(url)
53
+ cookies(url).length > 0 ? false : true
54
+ end
55
+
56
+ def to_a
57
+ cleanup
58
+
59
+ @jar.map do |domain, paths|
60
+ paths.map do |path, names|
61
+ names.values
62
+ end
63
+ end.flatten
64
+ end
65
+
66
+ # Save the cookie jar to a file in the format specified.
67
+ #
68
+ # Available formats:
69
+ # :yaml <- YAML structure
70
+ # :cookiestxt <- Mozilla's cookies.txt format
71
+ def save_as(file, format = :yaml)
72
+ jar = dup
73
+ jar.cleanup true
74
+
75
+ open(file, 'w') { |f|
76
+ case format
77
+ when :yaml then
78
+ load_yaml
79
+
80
+ YAML.dump(jar.jar, f)
81
+ when :cookiestxt then
82
+ jar.dump_cookiestxt(f)
83
+ else
84
+ raise ArgumentError, "Unknown cookie jar file format"
85
+ end
86
+ }
87
+
88
+ self
89
+ end
90
+
91
+ # Load cookie jar from a file in the format specified.
92
+ #
93
+ # Available formats:
94
+ # :yaml <- YAML structure.
95
+ # :cookiestxt <- Mozilla's cookies.txt format
96
+ def load(file, format = :yaml)
97
+ @jar = open(file) { |f|
98
+ case format
99
+ when :yaml then
100
+ load_yaml
101
+
102
+ YAML.load(f)
103
+ when :cookiestxt then
104
+ load_cookiestxt(f)
105
+ else
106
+ raise ArgumentError, "Unknown cookie jar file format"
107
+ end
108
+ }
109
+
110
+ cleanup
111
+
112
+ self
113
+ end
114
+
115
+ def load_yaml # :nodoc:
116
+ begin
117
+ require 'psych'
118
+ rescue LoadError
119
+ end
120
+
121
+ require 'yaml'
122
+ end
123
+
124
+ # Clear the cookie jar
125
+ def clear!
126
+ @jar = {}
127
+ end
128
+
129
+ # Read cookies from Mozilla cookies.txt-style IO stream
130
+ def load_cookiestxt(io)
131
+ now = Time.now
132
+
133
+ io.each_line do |line|
134
+ line.chomp!
135
+ line.gsub!(/#.+/, '')
136
+ fields = line.split("\t")
137
+
138
+ next if fields.length != 7
139
+
140
+ expires_seconds = fields[4].to_i
141
+ expires = (expires_seconds == 0) ? nil : Time.at(expires_seconds)
142
+ next if expires and (expires < now)
143
+
144
+ c = Mechanize::Cookie.new(fields[5], fields[6])
145
+ c.domain = fields[0]
146
+ # Field 1 indicates whether the cookie can be read by other machines at
147
+ # the same domain. This is computed by the cookie implementation, based
148
+ # on the domain value.
149
+ c.path = fields[2] # Path for which the cookie is relevant
150
+ c.secure = (fields[3] == "TRUE") # Requires a secure connection
151
+ c.expires = expires # Time the cookie expires.
152
+ c.version = 0 # Conforms to Netscape cookie spec.
153
+
154
+ add(FakeURI.new(c.domain), c)
155
+ end
156
+
157
+ @jar
158
+ end
159
+
160
+ # Write cookies to Mozilla cookies.txt-style IO stream
161
+ def dump_cookiestxt(io)
162
+ to_a.each do |cookie|
163
+ fields = []
164
+ fields[0] = cookie.domain
165
+
166
+ if cookie.domain =~ /^\./
167
+ fields[1] = "TRUE"
168
+ else
169
+ fields[1] = "FALSE"
170
+ end
171
+
172
+ fields[2] = cookie.path
173
+
174
+ if cookie.secure == true
175
+ fields[3] = "TRUE"
176
+ else
177
+ fields[3] = "FALSE"
178
+ end
179
+
180
+ fields[4] = cookie.expires.to_i.to_s
181
+
182
+ fields[5] = cookie.name
183
+ fields[6] = cookie.value
184
+ io.puts(fields.join("\t"))
185
+ end
186
+ end
187
+
188
+ protected
189
+
190
+ # Remove expired cookies
191
+ def cleanup session = false
192
+ @jar.each do |domain, paths|
193
+ paths.each do |path, names|
194
+ names.each do |cookie_name, cookie|
195
+ paths[path].delete(cookie_name) if
196
+ cookie.expired? or (session and cookie.session)
197
+ end
198
+ end
199
+ end
200
+ end
201
+ end
202
+
@@ -0,0 +1,35 @@
1
+ module Mechanize::ElementMatcher
2
+
3
+ def elements_with singular, plural = "#{singular}s"
4
+ class_eval <<-CODE
5
+ def #{plural}_with criteria = {}
6
+ criteria = if String === criteria then
7
+ {:name => criteria}
8
+ else
9
+ criteria.map do |k, v|
10
+ k = :dom_id if k.to_sym == :id
11
+ [k, v]
12
+ end
13
+ end
14
+
15
+ f = #{plural}.find_all do |thing|
16
+ criteria.all? do |k,v|
17
+ v === thing.send(k)
18
+ end
19
+ end
20
+ yield f if block_given?
21
+ f
22
+ end
23
+
24
+ def #{singular}_with criteria = {}
25
+ f = #{plural}_with(criteria).first
26
+ yield f if block_given?
27
+ f
28
+ end
29
+
30
+ alias :#{singular} :#{singular}_with
31
+ CODE
32
+ end
33
+
34
+ end
35
+
@@ -0,0 +1,80 @@
1
+ class Mechanize
2
+ # = Synopsis
3
+ # This is the default (and base) class for the Pluggable Parsers. If
4
+ # Mechanize cannot find an appropriate class to use for the content type,
5
+ # this class will be used. For example, if you download a JPG, Mechanize
6
+ # will not know how to parse it, so this class will be instantiated.
7
+ #
8
+ # This is a good class to use as the base class for building your own
9
+ # pluggable parsers.
10
+ #
11
+ # == Example
12
+ # require 'rubygems'
13
+ # require 'mechanize'
14
+ #
15
+ # agent = Mechanize.new
16
+ # agent.get('http://example.com/foo.jpg').class #=> Mechanize::File
17
+ #
18
+ class File
19
+ extend Forwardable
20
+
21
+ attr_accessor :uri, :response, :body, :code, :filename
22
+ alias :header :response
23
+ def_delegator :header, :[], :[]
24
+ def_delegator :header, :[]=, :[]=
25
+ def_delegator :header, :key?, :key?
26
+ def_delegator :header, :each, :each
27
+ def_delegator :header, :canonical_each, :canonical_each
28
+
29
+ alias :content :body
30
+
31
+ def initialize(uri=nil, response=nil, body=nil, code=nil)
32
+ @uri = uri
33
+ @body = body
34
+ @code = code
35
+ @response = Headers.new
36
+
37
+ # Copy the headers in to a hash to prevent memory leaks
38
+ if response
39
+ response.each { |k,v|
40
+ @response[k] = v
41
+ }
42
+ end
43
+
44
+ @filename = 'index.html'
45
+
46
+ # Set the filename
47
+ if disposition = @response['content-disposition']
48
+ disposition.split(/;\s*/).each do |pair|
49
+ k,v = pair.split(/=/, 2)
50
+ @filename = v if k && k.downcase == 'filename'
51
+ end
52
+ else
53
+ if @uri
54
+ @filename = @uri.path.split(/\//).last || 'index.html'
55
+ @filename << ".html" unless @filename =~ /\./
56
+ end
57
+ end
58
+
59
+ yield self if block_given?
60
+ end
61
+
62
+ # Use this method to save the content of this object to filename
63
+ def save_as(filename = nil)
64
+ if filename.nil?
65
+ filename = @filename
66
+ number = 1
67
+ while(::File.exists?(filename))
68
+ filename = "#{@filename}.#{number}"
69
+ number += 1
70
+ end
71
+ end
72
+
73
+ ::File::open(filename, "wb") { |f|
74
+ f.write body
75
+ }
76
+ end
77
+
78
+ alias :save :save_as
79
+ end
80
+ end