neocoin-mechanize 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +6 -0
- data/.gemtest +0 -0
- data/CHANGELOG.rdoc +638 -0
- data/EXAMPLES.rdoc +187 -0
- data/FAQ.rdoc +11 -0
- data/GUIDE.rdoc +163 -0
- data/LICENSE.rdoc +20 -0
- data/Manifest.txt +172 -0
- data/README.rdoc +63 -0
- data/Rakefile +36 -0
- data/examples/flickr_upload.rb +22 -0
- data/examples/mech-dump.rb +5 -0
- data/examples/proxy_req.rb +7 -0
- data/examples/rubyforge.rb +20 -0
- data/examples/spider.rb +21 -0
- data/lib/mechanize.rb +662 -0
- data/lib/mechanize/content_type_error.rb +14 -0
- data/lib/mechanize/cookie.rb +85 -0
- data/lib/mechanize/cookie_jar.rb +241 -0
- data/lib/mechanize/element_matcher.rb +35 -0
- data/lib/mechanize/file.rb +80 -0
- data/lib/mechanize/file_connection.rb +17 -0
- data/lib/mechanize/file_request.rb +26 -0
- data/lib/mechanize/file_response.rb +74 -0
- data/lib/mechanize/file_saver.rb +37 -0
- data/lib/mechanize/form.rb +478 -0
- data/lib/mechanize/form/button.rb +9 -0
- data/lib/mechanize/form/check_box.rb +11 -0
- data/lib/mechanize/form/field.rb +44 -0
- data/lib/mechanize/form/file_upload.rb +23 -0
- data/lib/mechanize/form/image_button.rb +20 -0
- data/lib/mechanize/form/multi_select_list.rb +83 -0
- data/lib/mechanize/form/option.rb +49 -0
- data/lib/mechanize/form/radio_button.rb +48 -0
- data/lib/mechanize/form/select_list.rb +40 -0
- data/lib/mechanize/headers.rb +25 -0
- data/lib/mechanize/history.rb +83 -0
- data/lib/mechanize/http.rb +3 -0
- data/lib/mechanize/http/agent.rb +738 -0
- data/lib/mechanize/inspect.rb +88 -0
- data/lib/mechanize/monkey_patch.rb +37 -0
- data/lib/mechanize/page.rb +408 -0
- data/lib/mechanize/page/base.rb +8 -0
- data/lib/mechanize/page/frame.rb +27 -0
- data/lib/mechanize/page/image.rb +30 -0
- data/lib/mechanize/page/label.rb +20 -0
- data/lib/mechanize/page/link.rb +82 -0
- data/lib/mechanize/page/meta_refresh.rb +56 -0
- data/lib/mechanize/pluggable_parsers.rb +101 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +19 -0
- data/lib/mechanize/response_code_error.rb +22 -0
- data/lib/mechanize/response_read_error.rb +27 -0
- data/lib/mechanize/robots_disallowed_error.rb +29 -0
- data/lib/mechanize/unsupported_scheme_error.rb +8 -0
- data/lib/mechanize/util.rb +113 -0
- data/test/data/htpasswd +1 -0
- data/test/data/server.crt +16 -0
- data/test/data/server.csr +12 -0
- data/test/data/server.key +15 -0
- data/test/data/server.pem +15 -0
- data/test/helper.rb +175 -0
- data/test/htdocs/alt_text.html +10 -0
- data/test/htdocs/bad_form_test.html +9 -0
- data/test/htdocs/button.jpg +0 -0
- data/test/htdocs/canonical_uri.html +9 -0
- data/test/htdocs/dir with spaces/foo.html +1 -0
- data/test/htdocs/empty_form.html +6 -0
- data/test/htdocs/file_upload.html +26 -0
- data/test/htdocs/find_link.html +41 -0
- data/test/htdocs/form_multi_select.html +16 -0
- data/test/htdocs/form_multival.html +37 -0
- data/test/htdocs/form_no_action.html +18 -0
- data/test/htdocs/form_no_input_name.html +16 -0
- data/test/htdocs/form_select.html +16 -0
- data/test/htdocs/form_select_all.html +16 -0
- data/test/htdocs/form_select_none.html +17 -0
- data/test/htdocs/form_select_noopts.html +10 -0
- data/test/htdocs/form_set_fields.html +14 -0
- data/test/htdocs/form_test.html +188 -0
- data/test/htdocs/frame_referer_test.html +10 -0
- data/test/htdocs/frame_test.html +30 -0
- data/test/htdocs/google.html +13 -0
- data/test/htdocs/iframe_test.html +16 -0
- data/test/htdocs/index.html +6 -0
- data/test/htdocs/link with space.html +5 -0
- data/test/htdocs/meta_cookie.html +11 -0
- data/test/htdocs/no_title_test.html +6 -0
- data/test/htdocs/nofollow.html +9 -0
- data/test/htdocs/noindex.html +9 -0
- data/test/htdocs/norobots.html +8 -0
- data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
- data/test/htdocs/rel_nofollow.html +8 -0
- data/test/htdocs/relative/tc_relative_links.html +21 -0
- data/test/htdocs/robots.html +8 -0
- data/test/htdocs/robots.txt +2 -0
- data/test/htdocs/tc_bad_charset.html +9 -0
- data/test/htdocs/tc_bad_links.html +5 -0
- data/test/htdocs/tc_base_images.html +10 -0
- data/test/htdocs/tc_base_link.html +8 -0
- data/test/htdocs/tc_blank_form.html +11 -0
- data/test/htdocs/tc_charset.html +6 -0
- data/test/htdocs/tc_checkboxes.html +19 -0
- data/test/htdocs/tc_encoded_links.html +5 -0
- data/test/htdocs/tc_field_precedence.html +11 -0
- data/test/htdocs/tc_follow_meta.html +8 -0
- data/test/htdocs/tc_form_action.html +48 -0
- data/test/htdocs/tc_images.html +8 -0
- data/test/htdocs/tc_links.html +18 -0
- data/test/htdocs/tc_meta_in_body.html +9 -0
- data/test/htdocs/tc_no_attributes.html +16 -0
- data/test/htdocs/tc_pretty_print.html +17 -0
- data/test/htdocs/tc_radiobuttons.html +17 -0
- data/test/htdocs/tc_referer.html +16 -0
- data/test/htdocs/tc_relative_links.html +19 -0
- data/test/htdocs/tc_textarea.html +23 -0
- data/test/htdocs/test_bad_encoding.html +52 -0
- data/test/htdocs/test_click.html +11 -0
- data/test/htdocs/unusual______.html +5 -0
- data/test/servlets.rb +402 -0
- data/test/ssl_server.rb +48 -0
- data/test/test_cookies.rb +129 -0
- data/test/test_form_action.rb +52 -0
- data/test/test_form_as_hash.rb +59 -0
- data/test/test_form_button.rb +46 -0
- data/test/test_frames.rb +34 -0
- data/test/test_headers.rb +33 -0
- data/test/test_history.rb +118 -0
- data/test/test_history_added.rb +16 -0
- data/test/test_html_unscape_forms.rb +46 -0
- data/test/test_if_modified_since.rb +20 -0
- data/test/test_images.rb +19 -0
- data/test/test_mechanize.rb +842 -0
- data/test/test_mechanize_cookie.rb +345 -0
- data/test/test_mechanize_cookie_jar.rb +401 -0
- data/test/test_mechanize_file.rb +53 -0
- data/test/test_mechanize_file_request.rb +19 -0
- data/test/test_mechanize_file_response.rb +21 -0
- data/test/test_mechanize_form.rb +576 -0
- data/test/test_mechanize_form_check_box.rb +37 -0
- data/test/test_mechanize_form_encoding.rb +120 -0
- data/test/test_mechanize_form_field.rb +21 -0
- data/test/test_mechanize_form_image_button.rb +12 -0
- data/test/test_mechanize_form_textarea.rb +51 -0
- data/test/test_mechanize_http_agent.rb +697 -0
- data/test/test_mechanize_link.rb +84 -0
- data/test/test_mechanize_page_encoding.rb +147 -0
- data/test/test_mechanize_page_link.rb +382 -0
- data/test/test_mechanize_page_meta_refresh.rb +115 -0
- data/test/test_mechanize_redirect_not_get_or_head_error.rb +18 -0
- data/test/test_mechanize_subclass.rb +22 -0
- data/test/test_mechanize_util.rb +92 -0
- data/test/test_multi_select.rb +118 -0
- data/test/test_no_attributes.rb +13 -0
- data/test/test_option.rb +18 -0
- data/test/test_pluggable_parser.rb +136 -0
- data/test/test_post_form.rb +37 -0
- data/test/test_pretty_print.rb +22 -0
- data/test/test_radiobutton.rb +75 -0
- data/test/test_redirect_limit_reached.rb +39 -0
- data/test/test_referer.rb +81 -0
- data/test/test_relative_links.rb +40 -0
- data/test/test_request.rb +13 -0
- data/test/test_response_code.rb +53 -0
- data/test/test_robots.rb +72 -0
- data/test/test_save_file.rb +48 -0
- data/test/test_scheme.rb +48 -0
- data/test/test_select.rb +119 -0
- data/test/test_select_all.rb +15 -0
- data/test/test_select_none.rb +15 -0
- data/test/test_select_noopts.rb +18 -0
- data/test/test_set_fields.rb +44 -0
- data/test/test_ssl_server.rb +20 -0
- metadata +354 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
class Mechanize
|
|
2
|
+
# =Synopsis
|
|
3
|
+
# This class contains an error for when a pluggable parser tries to
|
|
4
|
+
# parse a content type that it does not know how to handle. For example
|
|
5
|
+
# if Mechanize::Page were to try to parse a PDF, a ContentTypeError
|
|
6
|
+
# would be thrown.
|
|
7
|
+
class ContentTypeError < Mechanize::Error
|
|
8
|
+
attr_reader :content_type
|
|
9
|
+
|
|
10
|
+
def initialize(content_type)
|
|
11
|
+
@content_type = content_type
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
require 'time'
|
|
2
|
+
require 'webrick/cookie'
|
|
3
|
+
|
|
4
|
+
# This class is used to represent an HTTP Cookie.
|
|
5
|
+
class Mechanize::Cookie < WEBrick::Cookie
|
|
6
|
+
|
|
7
|
+
attr_accessor :session
|
|
8
|
+
|
|
9
|
+
def self.parse(uri, str, log = Mechanize.log)
|
|
10
|
+
return str.split(/,(?=[^;,]*=)|,$/).map { |c|
|
|
11
|
+
cookie_elem = c.split(/;+/)
|
|
12
|
+
first_elem = cookie_elem.shift
|
|
13
|
+
first_elem.strip!
|
|
14
|
+
key, value = first_elem.split(/\=/, 2)
|
|
15
|
+
|
|
16
|
+
cookie = nil
|
|
17
|
+
begin
|
|
18
|
+
cookie = new(key, value.dup)
|
|
19
|
+
rescue
|
|
20
|
+
log.warn("Couldn't parse key/value: #{first_elem}") if log
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
next unless cookie
|
|
24
|
+
|
|
25
|
+
cookie_elem.each do |pair|
|
|
26
|
+
pair.strip!
|
|
27
|
+
key, value = pair.split(/\=/, 2)
|
|
28
|
+
next unless key
|
|
29
|
+
value = WEBrick::HTTPUtils.dequote(value.strip) if value
|
|
30
|
+
|
|
31
|
+
case key.downcase
|
|
32
|
+
when "domain" then
|
|
33
|
+
value = ".#{value}" unless value =~ /^\./
|
|
34
|
+
cookie.domain = value
|
|
35
|
+
when "path" then
|
|
36
|
+
cookie.path = value
|
|
37
|
+
when 'expires'
|
|
38
|
+
if value.empty? then
|
|
39
|
+
cookie.session = true
|
|
40
|
+
next
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
begin
|
|
44
|
+
cookie.expires = Time::parse(value)
|
|
45
|
+
rescue
|
|
46
|
+
log.warn("Couldn't parse expires: #{value}") if log
|
|
47
|
+
end
|
|
48
|
+
when "max-age" then
|
|
49
|
+
begin
|
|
50
|
+
cookie.max_age = Integer(value)
|
|
51
|
+
rescue
|
|
52
|
+
log.warn("Couldn't parse max age '#{value}'") if log
|
|
53
|
+
cookie.max_age = nil
|
|
54
|
+
end
|
|
55
|
+
when "comment" then cookie.comment = value
|
|
56
|
+
when "version" then
|
|
57
|
+
begin
|
|
58
|
+
cookie.version = Integer(value)
|
|
59
|
+
rescue
|
|
60
|
+
log.warn("Couldn't parse version '#{value}'") if log
|
|
61
|
+
cookie.version = nil
|
|
62
|
+
end
|
|
63
|
+
when "secure" then cookie.secure = true
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
cookie.path ||= uri.path.to_s.sub(%r%[^/]*$%, '')
|
|
68
|
+
cookie.secure ||= false
|
|
69
|
+
cookie.domain ||= uri.host
|
|
70
|
+
# Move this in to the cookie jar
|
|
71
|
+
yield cookie if block_given?
|
|
72
|
+
|
|
73
|
+
cookie
|
|
74
|
+
}
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def expired?
|
|
78
|
+
return false unless expires
|
|
79
|
+
Time.now > expires
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def to_s
|
|
83
|
+
"#{@name}=#{@value}"
|
|
84
|
+
end
|
|
85
|
+
end
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
##
|
|
2
|
+
# This class is used to manage the Cookies that have been returned from
|
|
3
|
+
# any particular website.
|
|
4
|
+
|
|
5
|
+
class Mechanize::CookieJar
|
|
6
|
+
|
|
7
|
+
# add_cookie wants something resembling a URI.
|
|
8
|
+
|
|
9
|
+
FakeURI = Struct.new(:host) # :nodoc:
|
|
10
|
+
|
|
11
|
+
attr_reader :jar
|
|
12
|
+
|
|
13
|
+
def initialize
|
|
14
|
+
@jar = {}
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def initialize_copy other # :nodoc:
|
|
18
|
+
@jar = Marshal.load Marshal.dump other.jar
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Add a cookie to the Jar.
|
|
22
|
+
def add(uri, cookie)
|
|
23
|
+
return unless valid_cookie_for_uri?(uri, cookie)
|
|
24
|
+
|
|
25
|
+
normal_domain = cookie.domain.downcase
|
|
26
|
+
|
|
27
|
+
@jar[normal_domain] ||= {} unless @jar.has_key?(normal_domain)
|
|
28
|
+
|
|
29
|
+
@jar[normal_domain][cookie.path] ||= {}
|
|
30
|
+
@jar[normal_domain][cookie.path][cookie.name] = cookie
|
|
31
|
+
|
|
32
|
+
cookie
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Fetch the cookies that should be used for the URI object passed in.
|
|
36
|
+
def cookies(url)
|
|
37
|
+
cleanup
|
|
38
|
+
url.path = '/' if url.path.empty?
|
|
39
|
+
|
|
40
|
+
domains = @jar.find_all { |domain, _|
|
|
41
|
+
cookie_domain = self.class.strip_port(domain)
|
|
42
|
+
if cookie_domain.start_with?('.')
|
|
43
|
+
url.host =~ /#{Regexp.escape cookie_domain}$/i
|
|
44
|
+
else
|
|
45
|
+
url.host =~ /^#{Regexp.escape cookie_domain}$/i
|
|
46
|
+
end
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return [] unless domains.length > 0
|
|
50
|
+
|
|
51
|
+
cookies = domains.map { |_,paths|
|
|
52
|
+
paths.find_all { |path, _|
|
|
53
|
+
url.path =~ /^#{Regexp.escape(path)}/
|
|
54
|
+
}.map { |_,cookie| cookie.values }
|
|
55
|
+
}.flatten
|
|
56
|
+
|
|
57
|
+
cookies.find_all { |cookie| ! cookie.expired? }
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def empty?(url)
|
|
61
|
+
cookies(url).length > 0 ? false : true
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def to_a
|
|
65
|
+
cleanup
|
|
66
|
+
|
|
67
|
+
@jar.map do |domain, paths|
|
|
68
|
+
paths.map do |path, names|
|
|
69
|
+
names.values
|
|
70
|
+
end
|
|
71
|
+
end.flatten
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Save the cookie jar to a file in the format specified.
|
|
75
|
+
#
|
|
76
|
+
# Available formats:
|
|
77
|
+
# :yaml <- YAML structure
|
|
78
|
+
# :cookiestxt <- Mozilla's cookies.txt format
|
|
79
|
+
def save_as(file, format = :yaml)
|
|
80
|
+
jar = dup
|
|
81
|
+
jar.cleanup true
|
|
82
|
+
|
|
83
|
+
open(file, 'w') { |f|
|
|
84
|
+
case format
|
|
85
|
+
when :yaml then
|
|
86
|
+
load_yaml
|
|
87
|
+
|
|
88
|
+
YAML.dump(jar.jar, f)
|
|
89
|
+
when :cookiestxt then
|
|
90
|
+
jar.dump_cookiestxt(f)
|
|
91
|
+
else
|
|
92
|
+
raise ArgumentError, "Unknown cookie jar file format"
|
|
93
|
+
end
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
self
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Load cookie jar from a file in the format specified.
|
|
100
|
+
#
|
|
101
|
+
# Available formats:
|
|
102
|
+
# :yaml <- YAML structure.
|
|
103
|
+
# :cookiestxt <- Mozilla's cookies.txt format
|
|
104
|
+
def load(file, format = :yaml)
|
|
105
|
+
@jar = open(file) { |f|
|
|
106
|
+
case format
|
|
107
|
+
when :yaml then
|
|
108
|
+
load_yaml
|
|
109
|
+
|
|
110
|
+
YAML.load(f)
|
|
111
|
+
when :cookiestxt then
|
|
112
|
+
load_cookiestxt(f)
|
|
113
|
+
else
|
|
114
|
+
raise ArgumentError, "Unknown cookie jar file format"
|
|
115
|
+
end
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
cleanup
|
|
119
|
+
|
|
120
|
+
self
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def load_yaml # :nodoc:
|
|
124
|
+
begin
|
|
125
|
+
require 'psych'
|
|
126
|
+
rescue LoadError
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
require 'yaml'
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Clear the cookie jar
|
|
133
|
+
def clear!
|
|
134
|
+
@jar = {}
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Read cookies from Mozilla cookies.txt-style IO stream
|
|
138
|
+
def load_cookiestxt(io)
|
|
139
|
+
now = Time.now
|
|
140
|
+
|
|
141
|
+
io.each_line do |line|
|
|
142
|
+
line.chomp!
|
|
143
|
+
line.gsub!(/#.+/, '')
|
|
144
|
+
fields = line.split("\t")
|
|
145
|
+
|
|
146
|
+
next if fields.length != 7
|
|
147
|
+
|
|
148
|
+
expires_seconds = fields[4].to_i
|
|
149
|
+
expires = (expires_seconds == 0) ? nil : Time.at(expires_seconds)
|
|
150
|
+
next if expires and (expires < now)
|
|
151
|
+
|
|
152
|
+
c = Mechanize::Cookie.new(fields[5], fields[6])
|
|
153
|
+
c.domain = fields[0]
|
|
154
|
+
# Field 1 indicates whether the cookie can be read by other machines at
|
|
155
|
+
# the same domain. This is computed by the cookie implementation, based
|
|
156
|
+
# on the domain value.
|
|
157
|
+
c.path = fields[2] # Path for which the cookie is relevant
|
|
158
|
+
c.secure = (fields[3] == "TRUE") # Requires a secure connection
|
|
159
|
+
c.expires = expires # Time the cookie expires.
|
|
160
|
+
c.version = 0 # Conforms to Netscape cookie spec.
|
|
161
|
+
|
|
162
|
+
add(FakeURI.new(c.domain), c)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
@jar
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Write cookies to Mozilla cookies.txt-style IO stream
|
|
169
|
+
def dump_cookiestxt(io)
|
|
170
|
+
to_a.each do |cookie|
|
|
171
|
+
fields = []
|
|
172
|
+
fields[0] = cookie.domain
|
|
173
|
+
|
|
174
|
+
if cookie.domain =~ /^\./
|
|
175
|
+
fields[1] = "TRUE"
|
|
176
|
+
else
|
|
177
|
+
fields[1] = "FALSE"
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
fields[2] = cookie.path
|
|
181
|
+
|
|
182
|
+
if cookie.secure == true
|
|
183
|
+
fields[3] = "TRUE"
|
|
184
|
+
else
|
|
185
|
+
fields[3] = "FALSE"
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
fields[4] = cookie.expires.to_i.to_s
|
|
189
|
+
|
|
190
|
+
fields[5] = cookie.name
|
|
191
|
+
fields[6] = cookie.value
|
|
192
|
+
io.puts(fields.join("\t"))
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
private
|
|
197
|
+
# Determine if the cookie's domain and path are valid for
|
|
198
|
+
# the uri.host based on the rules in RFC 2965
|
|
199
|
+
def valid_cookie_for_uri?(uri, cookie)
|
|
200
|
+
cookie_domain = self.class.strip_port(cookie.domain)
|
|
201
|
+
|
|
202
|
+
# reject cookies whose domains do not contain an embedded dot
|
|
203
|
+
# cookies for localhost and .local. are exempt from this rule
|
|
204
|
+
return false if
|
|
205
|
+
cookie_domain !~ /.\../ && cookie_domain !~ /(localhost|\.?local)\.?$/
|
|
206
|
+
|
|
207
|
+
cookie_domain = if cookie_domain.start_with? '.' then
|
|
208
|
+
".?#{Regexp.escape cookie_domain[1..-1]}"
|
|
209
|
+
else
|
|
210
|
+
Regexp.escape cookie_domain
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# Permitted: A Set-Cookie for x.foo.com for Domain=.foo.com
|
|
214
|
+
# Not Permitted: A Set-Cookie for y.x.foo.com for Domain=.foo.com because
|
|
215
|
+
# y.x contains a dot
|
|
216
|
+
# Not Permitted: A Set-Cookie for foo.com for Domain=.bar.com
|
|
217
|
+
match = uri.host.match(/#{cookie_domain}/i)
|
|
218
|
+
return false if match.nil? || match.pre_match =~ /.\../
|
|
219
|
+
|
|
220
|
+
true
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
protected
|
|
224
|
+
|
|
225
|
+
# Remove expired cookies
|
|
226
|
+
def cleanup session = false
|
|
227
|
+
@jar.each do |domain, paths|
|
|
228
|
+
paths.each do |path, names|
|
|
229
|
+
names.each do |cookie_name, cookie|
|
|
230
|
+
paths[path].delete(cookie_name) if
|
|
231
|
+
cookie.expired? or (session and cookie.session)
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def self.strip_port(host)
|
|
238
|
+
host.gsub(/:[0-9]+$/,'')
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
module Mechanize::ElementMatcher
|
|
2
|
+
|
|
3
|
+
def elements_with singular, plural = "#{singular}s"
|
|
4
|
+
class_eval <<-CODE
|
|
5
|
+
def #{plural}_with criteria = {}
|
|
6
|
+
criteria = if String === criteria then
|
|
7
|
+
{:name => criteria}
|
|
8
|
+
else
|
|
9
|
+
criteria.map do |k, v|
|
|
10
|
+
k = :dom_id if k.to_sym == :id
|
|
11
|
+
[k, v]
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
f = #{plural}.find_all do |thing|
|
|
16
|
+
criteria.all? do |k,v|
|
|
17
|
+
v === thing.send(k)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
yield f if block_given?
|
|
21
|
+
f
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def #{singular}_with criteria = {}
|
|
25
|
+
f = #{plural}_with(criteria).first
|
|
26
|
+
yield f if block_given?
|
|
27
|
+
f
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
alias :#{singular} :#{singular}_with
|
|
31
|
+
CODE
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
end
|
|
35
|
+
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
class Mechanize
|
|
2
|
+
# = Synopsis
|
|
3
|
+
# This is the default (and base) class for the Pluggable Parsers. If
|
|
4
|
+
# Mechanize cannot find an appropriate class to use for the content type,
|
|
5
|
+
# this class will be used. For example, if you download a JPG, Mechanize
|
|
6
|
+
# will not know how to parse it, so this class will be instantiated.
|
|
7
|
+
#
|
|
8
|
+
# This is a good class to use as the base class for building your own
|
|
9
|
+
# pluggable parsers.
|
|
10
|
+
#
|
|
11
|
+
# == Example
|
|
12
|
+
# require 'rubygems'
|
|
13
|
+
# require 'mechanize'
|
|
14
|
+
#
|
|
15
|
+
# agent = Mechanize.new
|
|
16
|
+
# agent.get('http://example.com/foo.jpg').class #=> Mechanize::File
|
|
17
|
+
#
|
|
18
|
+
class File
|
|
19
|
+
extend Forwardable
|
|
20
|
+
|
|
21
|
+
attr_accessor :uri, :response, :body, :code, :filename
|
|
22
|
+
alias :header :response
|
|
23
|
+
def_delegator :header, :[], :[]
|
|
24
|
+
def_delegator :header, :[]=, :[]=
|
|
25
|
+
def_delegator :header, :key?, :key?
|
|
26
|
+
def_delegator :header, :each, :each
|
|
27
|
+
def_delegator :header, :canonical_each, :canonical_each
|
|
28
|
+
|
|
29
|
+
alias :content :body
|
|
30
|
+
|
|
31
|
+
def initialize(uri=nil, response=nil, body=nil, code=nil)
|
|
32
|
+
@uri = uri
|
|
33
|
+
@body = body
|
|
34
|
+
@code = code
|
|
35
|
+
@response = Headers.new
|
|
36
|
+
|
|
37
|
+
# Copy the headers in to a hash to prevent memory leaks
|
|
38
|
+
if response
|
|
39
|
+
response.each { |k,v|
|
|
40
|
+
@response[k] = v
|
|
41
|
+
}
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
@filename = 'index.html'
|
|
45
|
+
|
|
46
|
+
# Set the filename
|
|
47
|
+
if disposition = @response['content-disposition']
|
|
48
|
+
disposition.split(/;\s*/).each do |pair|
|
|
49
|
+
k,v = pair.split(/=/, 2)
|
|
50
|
+
@filename = v if k && k.downcase == 'filename'
|
|
51
|
+
end
|
|
52
|
+
else
|
|
53
|
+
if @uri
|
|
54
|
+
@filename = @uri.path.split(/\//).last || 'index.html'
|
|
55
|
+
@filename << ".html" unless @filename =~ /\./
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
yield self if block_given?
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Use this method to save the content of this object to filename
|
|
63
|
+
def save_as(filename = nil)
|
|
64
|
+
if filename.nil?
|
|
65
|
+
filename = @filename
|
|
66
|
+
number = 1
|
|
67
|
+
while(::File.exists?(filename))
|
|
68
|
+
filename = "#{@filename}.#{number}"
|
|
69
|
+
number += 1
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
::File::open(filename, "wb") { |f|
|
|
74
|
+
f.write body
|
|
75
|
+
}
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
alias :save :save_as
|
|
79
|
+
end
|
|
80
|
+
end
|