aai10-mechanize 2.0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (176) hide show
  1. data/.autotest +6 -0
  2. data/.gitignore +9 -0
  3. data/CHANGELOG.rdoc +652 -0
  4. data/EXAMPLES.rdoc +187 -0
  5. data/FAQ.rdoc +11 -0
  6. data/GUIDE.rdoc +163 -0
  7. data/LICENSE.rdoc +20 -0
  8. data/Manifest.txt +172 -0
  9. data/README.rdoc +63 -0
  10. data/Rakefile +36 -0
  11. data/aai10-mechanize.gemspec +20 -0
  12. data/examples/flickr_upload.rb +22 -0
  13. data/examples/mech-dump.rb +5 -0
  14. data/examples/proxy_req.rb +7 -0
  15. data/examples/rubyforge.rb +20 -0
  16. data/examples/spider.rb +21 -0
  17. data/lib/mechanize.rb +664 -0
  18. data/lib/mechanize/content_type_error.rb +14 -0
  19. data/lib/mechanize/cookie.rb +116 -0
  20. data/lib/mechanize/cookie_jar.rb +202 -0
  21. data/lib/mechanize/element_matcher.rb +35 -0
  22. data/lib/mechanize/file.rb +80 -0
  23. data/lib/mechanize/file_connection.rb +17 -0
  24. data/lib/mechanize/file_request.rb +26 -0
  25. data/lib/mechanize/file_response.rb +74 -0
  26. data/lib/mechanize/file_saver.rb +37 -0
  27. data/lib/mechanize/form.rb +478 -0
  28. data/lib/mechanize/form/button.rb +9 -0
  29. data/lib/mechanize/form/check_box.rb +11 -0
  30. data/lib/mechanize/form/field.rb +44 -0
  31. data/lib/mechanize/form/file_upload.rb +23 -0
  32. data/lib/mechanize/form/image_button.rb +20 -0
  33. data/lib/mechanize/form/multi_select_list.rb +83 -0
  34. data/lib/mechanize/form/option.rb +49 -0
  35. data/lib/mechanize/form/radio_button.rb +48 -0
  36. data/lib/mechanize/form/select_list.rb +40 -0
  37. data/lib/mechanize/headers.rb +25 -0
  38. data/lib/mechanize/history.rb +83 -0
  39. data/lib/mechanize/http.rb +3 -0
  40. data/lib/mechanize/http/agent.rb +738 -0
  41. data/lib/mechanize/inspect.rb +88 -0
  42. data/lib/mechanize/monkey_patch.rb +37 -0
  43. data/lib/mechanize/page.rb +408 -0
  44. data/lib/mechanize/page/base.rb +8 -0
  45. data/lib/mechanize/page/frame.rb +27 -0
  46. data/lib/mechanize/page/image.rb +30 -0
  47. data/lib/mechanize/page/label.rb +20 -0
  48. data/lib/mechanize/page/link.rb +82 -0
  49. data/lib/mechanize/page/meta_refresh.rb +56 -0
  50. data/lib/mechanize/pluggable_parsers.rb +101 -0
  51. data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
  52. data/lib/mechanize/redirect_not_get_or_head_error.rb +19 -0
  53. data/lib/mechanize/response_code_error.rb +22 -0
  54. data/lib/mechanize/response_read_error.rb +27 -0
  55. data/lib/mechanize/robots_disallowed_error.rb +29 -0
  56. data/lib/mechanize/unsupported_scheme_error.rb +8 -0
  57. data/lib/mechanize/util.rb +113 -0
  58. data/test/data/htpasswd +1 -0
  59. data/test/data/server.crt +16 -0
  60. data/test/data/server.csr +12 -0
  61. data/test/data/server.key +15 -0
  62. data/test/data/server.pem +15 -0
  63. data/test/helper.rb +175 -0
  64. data/test/htdocs/alt_text.html +10 -0
  65. data/test/htdocs/bad_form_test.html +9 -0
  66. data/test/htdocs/button.jpg +0 -0
  67. data/test/htdocs/canonical_uri.html +9 -0
  68. data/test/htdocs/dir with spaces/foo.html +1 -0
  69. data/test/htdocs/empty_form.html +6 -0
  70. data/test/htdocs/file_upload.html +26 -0
  71. data/test/htdocs/find_link.html +41 -0
  72. data/test/htdocs/form_multi_select.html +16 -0
  73. data/test/htdocs/form_multival.html +37 -0
  74. data/test/htdocs/form_no_action.html +18 -0
  75. data/test/htdocs/form_no_input_name.html +16 -0
  76. data/test/htdocs/form_select.html +16 -0
  77. data/test/htdocs/form_select_all.html +16 -0
  78. data/test/htdocs/form_select_none.html +17 -0
  79. data/test/htdocs/form_select_noopts.html +10 -0
  80. data/test/htdocs/form_set_fields.html +14 -0
  81. data/test/htdocs/form_test.html +188 -0
  82. data/test/htdocs/frame_referer_test.html +10 -0
  83. data/test/htdocs/frame_test.html +30 -0
  84. data/test/htdocs/google.html +13 -0
  85. data/test/htdocs/iframe_test.html +16 -0
  86. data/test/htdocs/index.html +6 -0
  87. data/test/htdocs/link with space.html +5 -0
  88. data/test/htdocs/meta_cookie.html +11 -0
  89. data/test/htdocs/no_title_test.html +6 -0
  90. data/test/htdocs/nofollow.html +9 -0
  91. data/test/htdocs/noindex.html +9 -0
  92. data/test/htdocs/norobots.html +8 -0
  93. data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
  94. data/test/htdocs/rel_nofollow.html +8 -0
  95. data/test/htdocs/relative/tc_relative_links.html +21 -0
  96. data/test/htdocs/robots.html +8 -0
  97. data/test/htdocs/robots.txt +2 -0
  98. data/test/htdocs/tc_bad_charset.html +9 -0
  99. data/test/htdocs/tc_bad_links.html +5 -0
  100. data/test/htdocs/tc_base_images.html +10 -0
  101. data/test/htdocs/tc_base_link.html +8 -0
  102. data/test/htdocs/tc_blank_form.html +11 -0
  103. data/test/htdocs/tc_charset.html +6 -0
  104. data/test/htdocs/tc_checkboxes.html +19 -0
  105. data/test/htdocs/tc_encoded_links.html +5 -0
  106. data/test/htdocs/tc_field_precedence.html +11 -0
  107. data/test/htdocs/tc_follow_meta.html +8 -0
  108. data/test/htdocs/tc_form_action.html +48 -0
  109. data/test/htdocs/tc_images.html +8 -0
  110. data/test/htdocs/tc_links.html +18 -0
  111. data/test/htdocs/tc_meta_in_body.html +9 -0
  112. data/test/htdocs/tc_no_attributes.html +16 -0
  113. data/test/htdocs/tc_pretty_print.html +17 -0
  114. data/test/htdocs/tc_radiobuttons.html +17 -0
  115. data/test/htdocs/tc_referer.html +16 -0
  116. data/test/htdocs/tc_relative_links.html +19 -0
  117. data/test/htdocs/tc_textarea.html +23 -0
  118. data/test/htdocs/test_bad_encoding.html +52 -0
  119. data/test/htdocs/test_click.html +11 -0
  120. data/test/htdocs/unusual______.html +5 -0
  121. data/test/servlets.rb +402 -0
  122. data/test/ssl_server.rb +48 -0
  123. data/test/test_cookies.rb +129 -0
  124. data/test/test_form_action.rb +52 -0
  125. data/test/test_form_as_hash.rb +59 -0
  126. data/test/test_form_button.rb +46 -0
  127. data/test/test_frames.rb +34 -0
  128. data/test/test_headers.rb +33 -0
  129. data/test/test_history.rb +118 -0
  130. data/test/test_history_added.rb +16 -0
  131. data/test/test_html_unscape_forms.rb +46 -0
  132. data/test/test_if_modified_since.rb +20 -0
  133. data/test/test_images.rb +19 -0
  134. data/test/test_mechanize.rb +852 -0
  135. data/test/test_mechanize_cookie.rb +345 -0
  136. data/test/test_mechanize_cookie_jar.rb +433 -0
  137. data/test/test_mechanize_file.rb +53 -0
  138. data/test/test_mechanize_file_request.rb +19 -0
  139. data/test/test_mechanize_file_response.rb +21 -0
  140. data/test/test_mechanize_form.rb +576 -0
  141. data/test/test_mechanize_form_check_box.rb +37 -0
  142. data/test/test_mechanize_form_encoding.rb +120 -0
  143. data/test/test_mechanize_form_field.rb +21 -0
  144. data/test/test_mechanize_form_image_button.rb +12 -0
  145. data/test/test_mechanize_form_textarea.rb +51 -0
  146. data/test/test_mechanize_http_agent.rb +697 -0
  147. data/test/test_mechanize_link.rb +84 -0
  148. data/test/test_mechanize_page_encoding.rb +147 -0
  149. data/test/test_mechanize_page_link.rb +382 -0
  150. data/test/test_mechanize_page_meta_refresh.rb +115 -0
  151. data/test/test_mechanize_redirect_not_get_or_head_error.rb +18 -0
  152. data/test/test_mechanize_subclass.rb +22 -0
  153. data/test/test_mechanize_util.rb +92 -0
  154. data/test/test_multi_select.rb +118 -0
  155. data/test/test_no_attributes.rb +13 -0
  156. data/test/test_option.rb +18 -0
  157. data/test/test_pluggable_parser.rb +136 -0
  158. data/test/test_post_form.rb +37 -0
  159. data/test/test_pretty_print.rb +22 -0
  160. data/test/test_radiobutton.rb +75 -0
  161. data/test/test_redirect_limit_reached.rb +39 -0
  162. data/test/test_redirect_ok.rb +25 -0
  163. data/test/test_referer.rb +81 -0
  164. data/test/test_relative_links.rb +40 -0
  165. data/test/test_request.rb +13 -0
  166. data/test/test_response_code.rb +53 -0
  167. data/test/test_robots.rb +72 -0
  168. data/test/test_save_file.rb +48 -0
  169. data/test/test_scheme.rb +48 -0
  170. data/test/test_select.rb +119 -0
  171. data/test/test_select_all.rb +15 -0
  172. data/test/test_select_none.rb +15 -0
  173. data/test/test_select_noopts.rb +18 -0
  174. data/test/test_set_fields.rb +44 -0
  175. data/test/test_ssl_server.rb +20 -0
  176. metadata +360 -0
@@ -0,0 +1,14 @@
1
+ class Mechanize
2
+ # =Synopsis
3
+ # This class contains an error for when a pluggable parser tries to
4
+ # parse a content type that it does not know how to handle. For example
5
+ # if Mechanize::Page were to try to parse a PDF, a ContentTypeError
6
+ # would be thrown.
7
+ class ContentTypeError < Mechanize::Error
8
+ attr_reader :content_type
9
+
10
+ def initialize(content_type)
11
+ @content_type = content_type
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,116 @@
1
+ require 'time'
2
+ require 'webrick/cookie'
3
+
4
+ # This class is used to represent an HTTP Cookie.
5
+ class Mechanize::Cookie < WEBrick::Cookie
6
+
7
+ attr_accessor :session
8
+
9
+ class << self
10
+ def parse(uri, str, log = Mechanize.log)
11
+ return str.split(/,(?=[^;,]*=)|,$/).map { |c|
12
+ cookie_elem = c.split(/;+/)
13
+ first_elem = cookie_elem.shift
14
+ first_elem.strip!
15
+ key, value = first_elem.split(/\=/, 2)
16
+
17
+ cookie = nil
18
+ begin
19
+ cookie = new(key, value.dup)
20
+ rescue
21
+ log.warn("Couldn't parse key/value: #{first_elem}") if log
22
+ end
23
+
24
+ next unless cookie
25
+
26
+ cookie_elem.each do |pair|
27
+ pair.strip!
28
+ key, value = pair.split(/\=/, 2)
29
+ next unless key
30
+ value = WEBrick::HTTPUtils.dequote(value.strip) if value
31
+
32
+ case key.downcase
33
+ when "domain" then
34
+ cookie.domain = value
35
+ when "path" then
36
+ cookie.path = value
37
+ when 'expires'
38
+ if value.empty? then
39
+ cookie.session = true
40
+ next
41
+ end
42
+
43
+ begin
44
+ cookie.expires = Time::parse(value)
45
+ rescue
46
+ log.warn("Couldn't parse expires: #{value}") if log
47
+ end
48
+ when "max-age" then
49
+ begin
50
+ cookie.max_age = Integer(value)
51
+ rescue
52
+ log.warn("Couldn't parse max age '#{value}'") if log
53
+ cookie.max_age = nil
54
+ end
55
+ when "comment" then cookie.comment = value
56
+ when "version" then
57
+ begin
58
+ cookie.version = Integer(value)
59
+ rescue
60
+ log.warn("Couldn't parse version '#{value}'") if log
61
+ cookie.version = nil
62
+ end
63
+ when "secure" then cookie.secure = true
64
+ end
65
+ end
66
+
67
+ cookie.path ||= (uri + './').path
68
+ cookie.secure ||= false
69
+ cookie.domain ||= uri.host
70
+ # Move this in to the cookie jar
71
+ yield cookie if block_given?
72
+
73
+ cookie
74
+ }
75
+ end
76
+
77
+ def normalize_domain(domain)
78
+ # RFC 6265 #4.1.2.3
79
+ return nil if domain.end_with?('.')
80
+ domain.downcase.tap { |dom|
81
+ dom.sub!(/:[0-9]+$/,'')
82
+ dom.sub!(/^\./,'')
83
+ }
84
+ end
85
+ end
86
+
87
+ alias set_domain domain=
88
+ def domain=(domain)
89
+ set_domain(self.class.normalize_domain(domain))
90
+ end
91
+
92
+ def expired?
93
+ return false unless expires
94
+ Time.now > expires
95
+ end
96
+
97
+ alias secure? secure
98
+
99
+ def acceptable_from_uri?(uri)
100
+ dom = domain or return false
101
+ host = self.class.normalize_domain(uri.host)
102
+
103
+ return true if host == dom
104
+ return false if dom.match(/^(?!local)[^.]+$/)
105
+ return host.end_with?('.' << dom)
106
+ end
107
+
108
+ def valid_for_uri?(uri)
109
+ return false if secure? && uri.scheme != 'https'
110
+ acceptable_from_uri?(uri) && uri.path.start_with?(path)
111
+ end
112
+
113
+ def to_s
114
+ "#{@name}=#{@value}"
115
+ end
116
+ end
@@ -0,0 +1,202 @@
1
+ ##
2
+ # This class is used to manage the Cookies that have been returned from
3
+ # any particular website.
4
+
5
+ class Mechanize::CookieJar
6
+
7
+ # add_cookie wants something resembling a URI.
8
+
9
+ FakeURI = Struct.new(:host) # :nodoc:
10
+
11
+ attr_reader :jar
12
+
13
+ def initialize
14
+ @jar = {}
15
+ end
16
+
17
+ def initialize_copy other # :nodoc:
18
+ @jar = Marshal.load Marshal.dump other.jar
19
+ end
20
+
21
+ # Add a cookie to the Jar.
22
+ def add(uri, cookie)
23
+ return unless cookie.acceptable_from_uri?(uri)
24
+
25
+ normal_domain = cookie.domain.downcase
26
+
27
+ @jar[normal_domain] ||= {} unless @jar.has_key?(normal_domain)
28
+
29
+ @jar[normal_domain][cookie.path] ||= {}
30
+ @jar[normal_domain][cookie.path][cookie.name] = cookie
31
+
32
+ cookie
33
+ end
34
+
35
+ # Fetch the cookies that should be used for the URI object passed in.
36
+ def cookies(url)
37
+ cleanup
38
+ url.path = '/' if url.path.empty?
39
+
40
+ [].tap { |cookies|
41
+ @jar.each { |domain, paths|
42
+ paths.each { |path, hash|
43
+ hash.each_value { |cookie|
44
+ next if cookie.expired? || !cookie.valid_for_uri?(url)
45
+ cookies << cookie
46
+ }
47
+ }
48
+ }
49
+ }
50
+ end
51
+
52
+ def empty?(url)
53
+ cookies(url).length > 0 ? false : true
54
+ end
55
+
56
+ def to_a
57
+ cleanup
58
+
59
+ @jar.map do |domain, paths|
60
+ paths.map do |path, names|
61
+ names.values
62
+ end
63
+ end.flatten
64
+ end
65
+
66
+ # Save the cookie jar to a file in the format specified.
67
+ #
68
+ # Available formats:
69
+ # :yaml <- YAML structure
70
+ # :cookiestxt <- Mozilla's cookies.txt format
71
+ def save_as(file, format = :yaml)
72
+ jar = dup
73
+ jar.cleanup true
74
+
75
+ open(file, 'w') { |f|
76
+ case format
77
+ when :yaml then
78
+ load_yaml
79
+
80
+ YAML.dump(jar.jar, f)
81
+ when :cookiestxt then
82
+ jar.dump_cookiestxt(f)
83
+ else
84
+ raise ArgumentError, "Unknown cookie jar file format"
85
+ end
86
+ }
87
+
88
+ self
89
+ end
90
+
91
+ # Load cookie jar from a file in the format specified.
92
+ #
93
+ # Available formats:
94
+ # :yaml <- YAML structure.
95
+ # :cookiestxt <- Mozilla's cookies.txt format
96
+ def load(file, format = :yaml)
97
+ @jar = open(file) { |f|
98
+ case format
99
+ when :yaml then
100
+ load_yaml
101
+
102
+ YAML.load(f)
103
+ when :cookiestxt then
104
+ load_cookiestxt(f)
105
+ else
106
+ raise ArgumentError, "Unknown cookie jar file format"
107
+ end
108
+ }
109
+
110
+ cleanup
111
+
112
+ self
113
+ end
114
+
115
+ def load_yaml # :nodoc:
116
+ begin
117
+ require 'psych'
118
+ rescue LoadError
119
+ end
120
+
121
+ require 'yaml'
122
+ end
123
+
124
+ # Clear the cookie jar
125
+ def clear!
126
+ @jar = {}
127
+ end
128
+
129
+ # Read cookies from Mozilla cookies.txt-style IO stream
130
+ def load_cookiestxt(io)
131
+ now = Time.now
132
+
133
+ io.each_line do |line|
134
+ line.chomp!
135
+ line.gsub!(/#.+/, '')
136
+ fields = line.split("\t")
137
+
138
+ next if fields.length != 7
139
+
140
+ expires_seconds = fields[4].to_i
141
+ expires = (expires_seconds == 0) ? nil : Time.at(expires_seconds)
142
+ next if expires and (expires < now)
143
+
144
+ c = Mechanize::Cookie.new(fields[5], fields[6])
145
+ c.domain = fields[0]
146
+ # Field 1 indicates whether the cookie can be read by other machines at
147
+ # the same domain. This is computed by the cookie implementation, based
148
+ # on the domain value.
149
+ c.path = fields[2] # Path for which the cookie is relevant
150
+ c.secure = (fields[3] == "TRUE") # Requires a secure connection
151
+ c.expires = expires # Time the cookie expires.
152
+ c.version = 0 # Conforms to Netscape cookie spec.
153
+
154
+ add(FakeURI.new(c.domain), c)
155
+ end
156
+
157
+ @jar
158
+ end
159
+
160
+ # Write cookies to Mozilla cookies.txt-style IO stream
161
+ def dump_cookiestxt(io)
162
+ to_a.each do |cookie|
163
+ fields = []
164
+ fields[0] = cookie.domain
165
+
166
+ if cookie.domain =~ /^\./
167
+ fields[1] = "TRUE"
168
+ else
169
+ fields[1] = "FALSE"
170
+ end
171
+
172
+ fields[2] = cookie.path
173
+
174
+ if cookie.secure == true
175
+ fields[3] = "TRUE"
176
+ else
177
+ fields[3] = "FALSE"
178
+ end
179
+
180
+ fields[4] = cookie.expires.to_i.to_s
181
+
182
+ fields[5] = cookie.name
183
+ fields[6] = cookie.value
184
+ io.puts(fields.join("\t"))
185
+ end
186
+ end
187
+
188
+ protected
189
+
190
+ # Remove expired cookies
191
+ def cleanup session = false
192
+ @jar.each do |domain, paths|
193
+ paths.each do |path, names|
194
+ names.each do |cookie_name, cookie|
195
+ paths[path].delete(cookie_name) if
196
+ cookie.expired? or (session and cookie.session)
197
+ end
198
+ end
199
+ end
200
+ end
201
+ end
202
+
@@ -0,0 +1,35 @@
1
+ module Mechanize::ElementMatcher
2
+
3
+ def elements_with singular, plural = "#{singular}s"
4
+ class_eval <<-CODE
5
+ def #{plural}_with criteria = {}
6
+ criteria = if String === criteria then
7
+ {:name => criteria}
8
+ else
9
+ criteria.map do |k, v|
10
+ k = :dom_id if k.to_sym == :id
11
+ [k, v]
12
+ end
13
+ end
14
+
15
+ f = #{plural}.find_all do |thing|
16
+ criteria.all? do |k,v|
17
+ v === thing.send(k)
18
+ end
19
+ end
20
+ yield f if block_given?
21
+ f
22
+ end
23
+
24
+ def #{singular}_with criteria = {}
25
+ f = #{plural}_with(criteria).first
26
+ yield f if block_given?
27
+ f
28
+ end
29
+
30
+ alias :#{singular} :#{singular}_with
31
+ CODE
32
+ end
33
+
34
+ end
35
+
@@ -0,0 +1,80 @@
1
+ class Mechanize
2
+ # = Synopsis
3
+ # This is the default (and base) class for the Pluggable Parsers. If
4
+ # Mechanize cannot find an appropriate class to use for the content type,
5
+ # this class will be used. For example, if you download a JPG, Mechanize
6
+ # will not know how to parse it, so this class will be instantiated.
7
+ #
8
+ # This is a good class to use as the base class for building your own
9
+ # pluggable parsers.
10
+ #
11
+ # == Example
12
+ # require 'rubygems'
13
+ # require 'mechanize'
14
+ #
15
+ # agent = Mechanize.new
16
+ # agent.get('http://example.com/foo.jpg').class #=> Mechanize::File
17
+ #
18
+ class File
19
+ extend Forwardable
20
+
21
+ attr_accessor :uri, :response, :body, :code, :filename
22
+ alias :header :response
23
+ def_delegator :header, :[], :[]
24
+ def_delegator :header, :[]=, :[]=
25
+ def_delegator :header, :key?, :key?
26
+ def_delegator :header, :each, :each
27
+ def_delegator :header, :canonical_each, :canonical_each
28
+
29
+ alias :content :body
30
+
31
+ def initialize(uri=nil, response=nil, body=nil, code=nil)
32
+ @uri = uri
33
+ @body = body
34
+ @code = code
35
+ @response = Headers.new
36
+
37
+ # Copy the headers in to a hash to prevent memory leaks
38
+ if response
39
+ response.each { |k,v|
40
+ @response[k] = v
41
+ }
42
+ end
43
+
44
+ @filename = 'index.html'
45
+
46
+ # Set the filename
47
+ if disposition = @response['content-disposition']
48
+ disposition.split(/;\s*/).each do |pair|
49
+ k,v = pair.split(/=/, 2)
50
+ @filename = v if k && k.downcase == 'filename'
51
+ end
52
+ else
53
+ if @uri
54
+ @filename = @uri.path.split(/\//).last || 'index.html'
55
+ @filename << ".html" unless @filename =~ /\./
56
+ end
57
+ end
58
+
59
+ yield self if block_given?
60
+ end
61
+
62
+ # Use this method to save the content of this object to filename
63
+ def save_as(filename = nil)
64
+ if filename.nil?
65
+ filename = @filename
66
+ number = 1
67
+ while(::File.exists?(filename))
68
+ filename = "#{@filename}.#{number}"
69
+ number += 1
70
+ end
71
+ end
72
+
73
+ ::File::open(filename, "wb") { |f|
74
+ f.write body
75
+ }
76
+ end
77
+
78
+ alias :save :save_as
79
+ end
80
+ end