mechanize 2.0.1 → 2.1.pre.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (148) hide show
  1. data.tar.gz.sig +0 -0
  2. data/CHANGELOG.rdoc +82 -0
  3. data/EXAMPLES.rdoc +1 -1
  4. data/FAQ.rdoc +9 -9
  5. data/Manifest.txt +35 -48
  6. data/README.rdoc +2 -1
  7. data/Rakefile +16 -3
  8. data/lib/mechanize.rb +809 -392
  9. data/lib/mechanize/content_type_error.rb +10 -11
  10. data/lib/mechanize/cookie.rb +193 -60
  11. data/lib/mechanize/cookie_jar.rb +39 -86
  12. data/lib/mechanize/download.rb +59 -0
  13. data/lib/mechanize/element_matcher.rb +1 -0
  14. data/lib/mechanize/file.rb +61 -76
  15. data/lib/mechanize/file_saver.rb +37 -35
  16. data/lib/mechanize/form.rb +475 -410
  17. data/lib/mechanize/form/button.rb +4 -7
  18. data/lib/mechanize/form/check_box.rb +10 -9
  19. data/lib/mechanize/form/field.rb +52 -42
  20. data/lib/mechanize/form/file_upload.rb +17 -19
  21. data/lib/mechanize/form/hidden.rb +3 -0
  22. data/lib/mechanize/form/image_button.rb +15 -16
  23. data/lib/mechanize/form/keygen.rb +34 -0
  24. data/lib/mechanize/form/multi_select_list.rb +20 -9
  25. data/lib/mechanize/form/option.rb +48 -47
  26. data/lib/mechanize/form/radio_button.rb +52 -45
  27. data/lib/mechanize/form/reset.rb +3 -0
  28. data/lib/mechanize/form/select_list.rb +10 -6
  29. data/lib/mechanize/form/submit.rb +3 -0
  30. data/lib/mechanize/form/text.rb +3 -0
  31. data/lib/mechanize/form/textarea.rb +3 -0
  32. data/lib/mechanize/headers.rb +17 -19
  33. data/lib/mechanize/history.rb +60 -61
  34. data/lib/mechanize/http.rb +5 -0
  35. data/lib/mechanize/http/agent.rb +485 -218
  36. data/lib/mechanize/http/auth_challenge.rb +59 -0
  37. data/lib/mechanize/http/auth_realm.rb +31 -0
  38. data/lib/mechanize/http/content_disposition_parser.rb +188 -0
  39. data/lib/mechanize/http/www_authenticate_parser.rb +155 -0
  40. data/lib/mechanize/monkey_patch.rb +14 -35
  41. data/lib/mechanize/page.rb +34 -2
  42. data/lib/mechanize/page/base.rb +6 -7
  43. data/lib/mechanize/page/frame.rb +5 -5
  44. data/lib/mechanize/page/image.rb +23 -23
  45. data/lib/mechanize/page/label.rb +16 -16
  46. data/lib/mechanize/page/link.rb +16 -0
  47. data/lib/mechanize/page/meta_refresh.rb +19 -7
  48. data/lib/mechanize/parser.rb +173 -0
  49. data/lib/mechanize/pluggable_parsers.rb +126 -83
  50. data/lib/mechanize/redirect_limit_reached_error.rb +16 -13
  51. data/lib/mechanize/redirect_not_get_or_head_error.rb +18 -16
  52. data/lib/mechanize/response_code_error.rb +16 -17
  53. data/lib/mechanize/robots_disallowed_error.rb +22 -23
  54. data/lib/mechanize/test_case.rb +659 -0
  55. data/lib/mechanize/unauthorized_error.rb +3 -0
  56. data/lib/mechanize/unsupported_scheme_error.rb +4 -6
  57. data/lib/mechanize/util.rb +0 -12
  58. data/test/htdocs/form_order_test.html +11 -0
  59. data/test/htdocs/form_test.html +2 -2
  60. data/test/htdocs/tc_links.html +1 -0
  61. data/test/test_mechanize.rb +367 -59
  62. data/test/test_mechanize_cookie.rb +69 -4
  63. data/test/test_mechanize_cookie_jar.rb +200 -124
  64. data/test/test_mechanize_download.rb +43 -0
  65. data/test/test_mechanize_file.rb +53 -45
  66. data/test/{test_mechanize_file_response.rb → test_mechanize_file_connection.rb} +2 -2
  67. data/test/test_mechanize_file_request.rb +2 -2
  68. data/test/test_mechanize_file_saver.rb +21 -0
  69. data/test/test_mechanize_form.rb +345 -46
  70. data/test/test_mechanize_form_check_box.rb +5 -4
  71. data/test/test_mechanize_form_encoding.rb +10 -16
  72. data/test/test_mechanize_form_field.rb +45 -3
  73. data/test/test_mechanize_form_file_upload.rb +20 -0
  74. data/test/test_mechanize_form_image_button.rb +2 -2
  75. data/test/test_mechanize_form_keygen.rb +32 -0
  76. data/test/test_mechanize_form_multi_select_list.rb +84 -0
  77. data/test/test_mechanize_form_option.rb +55 -0
  78. data/test/test_mechanize_form_radio_button.rb +78 -0
  79. data/test/test_mechanize_form_select_list.rb +76 -0
  80. data/test/test_mechanize_form_textarea.rb +8 -7
  81. data/test/{test_headers.rb → test_mechanize_headers.rb} +4 -2
  82. data/test/test_mechanize_history.rb +103 -0
  83. data/test/test_mechanize_http_agent.rb +525 -17
  84. data/test/test_mechanize_http_auth_challenge.rb +39 -0
  85. data/test/test_mechanize_http_auth_realm.rb +49 -0
  86. data/test/test_mechanize_http_content_disposition_parser.rb +118 -0
  87. data/test/test_mechanize_http_www_authenticate_parser.rb +146 -0
  88. data/test/test_mechanize_link.rb +10 -14
  89. data/test/test_mechanize_page.rb +118 -0
  90. data/test/test_mechanize_page_encoding.rb +48 -13
  91. data/test/test_mechanize_page_frame.rb +16 -0
  92. data/test/test_mechanize_page_link.rb +27 -19
  93. data/test/test_mechanize_page_meta_refresh.rb +26 -14
  94. data/test/test_mechanize_parser.rb +289 -0
  95. data/test/test_mechanize_pluggable_parser.rb +52 -0
  96. data/test/test_mechanize_redirect_limit_reached_error.rb +24 -0
  97. data/test/test_mechanize_redirect_not_get_or_head_error.rb +3 -7
  98. data/test/test_mechanize_subclass.rb +2 -2
  99. data/test/test_mechanize_util.rb +24 -13
  100. data/test/test_multi_select.rb +23 -22
  101. metadata +145 -114
  102. metadata.gz.sig +0 -0
  103. data/lib/mechanize/inspect.rb +0 -88
  104. data/test/helper.rb +0 -175
  105. data/test/htdocs/form_select_all.html +0 -16
  106. data/test/htdocs/form_select_none.html +0 -17
  107. data/test/htdocs/form_select_noopts.html +0 -10
  108. data/test/htdocs/iframe_test.html +0 -16
  109. data/test/htdocs/nofollow.html +0 -9
  110. data/test/htdocs/norobots.html +0 -8
  111. data/test/htdocs/rel_nofollow.html +0 -8
  112. data/test/htdocs/tc_base_images.html +0 -10
  113. data/test/htdocs/tc_images.html +0 -8
  114. data/test/htdocs/tc_no_attributes.html +0 -16
  115. data/test/htdocs/tc_radiobuttons.html +0 -17
  116. data/test/htdocs/test_bad_encoding.html +0 -52
  117. data/test/servlets.rb +0 -402
  118. data/test/ssl_server.rb +0 -48
  119. data/test/test_cookies.rb +0 -129
  120. data/test/test_form_action.rb +0 -52
  121. data/test/test_form_as_hash.rb +0 -59
  122. data/test/test_form_button.rb +0 -46
  123. data/test/test_frames.rb +0 -34
  124. data/test/test_history.rb +0 -118
  125. data/test/test_history_added.rb +0 -16
  126. data/test/test_html_unscape_forms.rb +0 -46
  127. data/test/test_if_modified_since.rb +0 -20
  128. data/test/test_images.rb +0 -19
  129. data/test/test_no_attributes.rb +0 -13
  130. data/test/test_option.rb +0 -18
  131. data/test/test_pluggable_parser.rb +0 -136
  132. data/test/test_post_form.rb +0 -37
  133. data/test/test_pretty_print.rb +0 -22
  134. data/test/test_radiobutton.rb +0 -75
  135. data/test/test_redirect_limit_reached.rb +0 -39
  136. data/test/test_referer.rb +0 -81
  137. data/test/test_relative_links.rb +0 -40
  138. data/test/test_request.rb +0 -13
  139. data/test/test_response_code.rb +0 -53
  140. data/test/test_robots.rb +0 -72
  141. data/test/test_save_file.rb +0 -48
  142. data/test/test_scheme.rb +0 -48
  143. data/test/test_select.rb +0 -119
  144. data/test/test_select_all.rb +0 -15
  145. data/test/test_select_none.rb +0 -15
  146. data/test/test_select_noopts.rb +0 -18
  147. data/test/test_set_fields.rb +0 -44
  148. data/test/test_ssl_server.rb +0 -20
@@ -0,0 +1,59 @@
1
+ class Mechanize::HTTP
2
+
3
+ AuthChallenge = Struct.new :scheme, :params
4
+
5
+ ##
6
+ # A parsed WWW-Authenticate header
7
+
8
+ class AuthChallenge
9
+
10
+ ##
11
+ # :attr_accessor: scheme
12
+ #
13
+ # The authentication scheme
14
+
15
+ ##
16
+ # :attr_accessor: params
17
+ #
18
+ # The authentication parameters
19
+
20
+ ##
21
+ # :method: initialize(scheme = nil, params = nil)
22
+ #
23
+ # Creates a new AuthChallenge header with the given scheme and parameters
24
+
25
+ ##
26
+ # Retrieves +param+ from the params list
27
+
28
+ def [] param
29
+ params[param]
30
+ end
31
+
32
+ ##
33
+ # Constructs an AuthRealm for this challenge
34
+
35
+ def realm uri
36
+ case scheme
37
+ when 'Basic' then
38
+ raise ArgumentError, "provide uri for Basic authentication" unless uri
39
+ Mechanize::HTTP::AuthRealm.new scheme, uri + '/', self['realm']
40
+ when 'Digest' then
41
+ Mechanize::HTTP::AuthRealm.new scheme, uri + '/', self['realm']
42
+ else
43
+ raise Mechanize::Error, "unknown HTTP authentication scheme #{scheme}"
44
+ end
45
+ end
46
+
47
+ ##
48
+ # The reconstructed, normalized challenge
49
+
50
+ def to_s
51
+ auth_params = params.map { |name, value| "#{name}=\"#{value}\"" }
52
+
53
+ "#{scheme} #{auth_params.join ', '}"
54
+ end
55
+
56
+ end
57
+
58
+ end
59
+
@@ -0,0 +1,31 @@
1
+ class Mechanize::HTTP::AuthRealm
2
+
3
+ attr_reader :scheme
4
+ attr_reader :uri
5
+ attr_reader :realm
6
+
7
+ def initialize scheme, uri, realm
8
+ @scheme = scheme
9
+ @uri = uri
10
+ @realm = realm.downcase if realm
11
+ end
12
+
13
+ def == other
14
+ self.class === other and
15
+ @scheme == other.scheme and
16
+ @uri == other.uri and
17
+ @realm == other.realm
18
+ end
19
+
20
+ alias eql? ==
21
+
22
+ def hash # :nodoc:
23
+ [@scheme, @uri, @realm].hash
24
+ end
25
+
26
+ def inspect # :nodoc:
27
+ "#<AuthRealm %s %p \"%s\">" % [@scheme, @uri, @realm]
28
+ end
29
+
30
+ end
31
+
@@ -0,0 +1,188 @@
1
+ # coding: BINARY
2
+
3
+ require 'strscan'
4
+ require 'time'
5
+
6
+ class Mechanize::HTTP
7
+ ContentDisposition = Struct.new :type, :filename, :creation_date,
8
+ :modification_date, :read_date, :size, :parameters
9
+ end
10
+
11
+ ##
12
+ # Parser Content-Disposition headers that loosely follows RFC 2183.
13
+ #
14
+ # Beyond RFC 2183, this parser allows:
15
+ #
16
+ # * Missing disposition-type
17
+ # * Multiple semicolons
18
+ # * Whitespace around semicolons
19
+
20
+ class Mechanize::HTTP::ContentDispositionParser
21
+
22
+ attr_accessor :scanner # :nodoc:
23
+
24
+ @parser = nil
25
+
26
+ ##
27
+ # Parses the disposition type and params in the +content_disposition+
28
+ # string. The "Content-Disposition:" must be removed.
29
+
30
+ def self.parse content_disposition
31
+ @parser ||= self.new
32
+ @parser.parse content_disposition
33
+ end
34
+
35
+ ##
36
+ # Creates a new parser Content-Disposition headers
37
+
38
+ def initialize
39
+ @scanner = nil
40
+ end
41
+
42
+ ##
43
+ # Parses the +content_disposition+ header. If +header+ is set to true the
44
+ # "Content-Disposition:" portion will be parsed
45
+
46
+ def parse content_disposition, header = false
47
+ return nil if content_disposition.empty?
48
+
49
+ @scanner = StringScanner.new content_disposition
50
+
51
+ if header then
52
+ return nil unless @scanner.scan(/Content-Disposition/i)
53
+ return nil unless @scanner.scan(/:/)
54
+ spaces
55
+ end
56
+
57
+ type = rfc_2045_token
58
+ @scanner.scan(/;+/)
59
+
60
+ if @scanner.peek(1) == '=' then
61
+ @scanner.pos = 0
62
+ type = nil
63
+ end
64
+
65
+ disposition = Mechanize::HTTP::ContentDisposition.new type
66
+
67
+ spaces
68
+
69
+ return nil unless parameters = parse_parameters
70
+
71
+ disposition.filename = parameters.delete 'filename'
72
+ disposition.creation_date = parameters.delete 'creation-date'
73
+ disposition.modification_date = parameters.delete 'modification-date'
74
+ disposition.read_date = parameters.delete 'read-date'
75
+ disposition.size = parameters.delete 'size'
76
+ disposition.parameters = parameters
77
+
78
+ disposition
79
+ end
80
+
81
+ ##
82
+ # Extracts disposition-parm and returns a Hash.
83
+
84
+ def parse_parameters
85
+ parameters = {}
86
+
87
+ while true do
88
+ return nil unless param = rfc_2045_token
89
+ param.downcase
90
+ return nil unless @scanner.scan(/=/)
91
+
92
+ value = case param
93
+ when /^filename$/ then
94
+ rfc_2045_value
95
+ when /^(creation|modification|read)-date$/ then
96
+ Time.rfc822 rfc_2045_quoted_string
97
+ when /^size$/ then
98
+ @scanner.scan(/\d+/).to_i(10)
99
+ else
100
+ rfc_2045_value
101
+ end
102
+
103
+ return nil unless value
104
+
105
+ parameters[param] = value
106
+
107
+ spaces
108
+
109
+ break if @scanner.eos? or not @scanner.scan(/;+/)
110
+
111
+ spaces
112
+ end
113
+
114
+ parameters
115
+ end
116
+
117
+ ##
118
+ # quoted-string = <"> *(qtext/quoted-pair) <">
119
+ # qtext = <any CHAR excepting <">, "\" & CR,
120
+ # and including linear-white-space
121
+ # quoted-pair = "\" CHAR
122
+ #
123
+ # Parses an RFC 2045 quoted-string
124
+
125
+ def rfc_2045_quoted_string
126
+ return nil unless @scanner.scan(/"/)
127
+
128
+ text = ''
129
+
130
+ while true do
131
+ chunk = @scanner.scan(/[\000-\014\016-\041\043-\133\135-\177]+/) # not \r "
132
+
133
+ if chunk then
134
+ text << chunk
135
+
136
+ if @scanner.peek(1) == '\\' then
137
+ @scanner.get_byte
138
+ return nil if @scanner.eos?
139
+ text << @scanner.get_byte
140
+ elsif @scanner.scan(/\r\n[\t ]+/) then
141
+ text << " "
142
+ end
143
+ else
144
+ if '"' == @scanner.peek(1) then
145
+ @scanner.get_byte
146
+ break
147
+ else
148
+ return nil
149
+ end
150
+ end
151
+ end
152
+
153
+ text
154
+ end
155
+
156
+ ##
157
+ # token := 1*<any (US-ASCII) CHAR except SPACE, CTLs, or tspecials>
158
+ #
159
+ # Parses an RFC 2045 token
160
+
161
+ def rfc_2045_token
162
+ @scanner.scan(/[^\000-\037\177()<>@,;:\\"\/\[\]?= ]+/)
163
+ end
164
+
165
+ ##
166
+ # value := token / quoted-string
167
+ #
168
+ # Parses an RFC 2045 value
169
+
170
+ def rfc_2045_value
171
+ if @scanner.peek(1) == '"' then
172
+ rfc_2045_quoted_string
173
+ else
174
+ rfc_2045_token
175
+ end
176
+ end
177
+
178
+ ##
179
+ # 1*SP
180
+ #
181
+ # Parses spaces
182
+
183
+ def spaces
184
+ @scanner.scan(/ +/)
185
+ end
186
+
187
+ end
188
+
@@ -0,0 +1,155 @@
1
+ # coding: BINARY
2
+
3
+ require 'strscan'
4
+
5
+ ##
6
+ # Parses the WWW-Authenticate HTTP header into separate challenges.
7
+
8
+ class Mechanize::HTTP::WWWAuthenticateParser
9
+
10
+ attr_accessor :scanner # :nodoc:
11
+
12
+ ##
13
+ # Creates a new header parser for WWW-Authenticate headers
14
+
15
+ def initialize
16
+ @scanner = nil
17
+ end
18
+
19
+ ##
20
+ # Parsers the header. Returns an Array of challenges as strings
21
+
22
+ def parse www_authenticate
23
+ challenges = []
24
+ @scanner = StringScanner.new www_authenticate
25
+
26
+ while true do
27
+ break if @scanner.eos?
28
+ challenge = Mechanize::HTTP::AuthChallenge.new
29
+
30
+ scheme = auth_scheme
31
+ next unless scheme
32
+ challenge.scheme = scheme
33
+
34
+ space = spaces
35
+
36
+ if scheme == 'NTLM' then
37
+ if space then
38
+ challenge.params = @scanner.scan(/.*/)
39
+ end
40
+
41
+ challenges << challenge
42
+ next
43
+ end
44
+
45
+ next unless space
46
+
47
+ params = {}
48
+
49
+ while true do
50
+ pos = @scanner.pos
51
+ name, value = auth_param
52
+
53
+ unless name then
54
+ challenge.params = params
55
+ challenges << challenge
56
+ break if @scanner.eos?
57
+
58
+ @scanner.pos = pos # rewind
59
+ challenge = '' # a token should be next, new challenge
60
+ break
61
+ else
62
+ params[name] = value
63
+ end
64
+
65
+ spaces
66
+
67
+ return nil unless ',' == @scanner.peek(1) or @scanner.eos?
68
+
69
+ @scanner.scan(/(, *)+/)
70
+ end
71
+ end
72
+
73
+ challenges
74
+ end
75
+
76
+ ##
77
+ # 1*SP
78
+ #
79
+ # Parses spaces
80
+
81
+ def spaces
82
+ @scanner.scan(/ +/)
83
+ end
84
+
85
+ ##
86
+ # token = 1*<any CHAR except CTLs or separators>
87
+ #
88
+ # Parses a token
89
+
90
+ def token
91
+ @scanner.scan(/[^\000-\037\177()<>@,;:\\"\/\[\]?={} ]+/)
92
+ end
93
+
94
+ ##
95
+ # auth-scheme = token
96
+ #
97
+ # Parses an auth scheme (a token)
98
+
99
+ alias auth_scheme token
100
+
101
+ ##
102
+ # auth-param = token "=" ( token | quoted-string )
103
+ #
104
+ # Parses an auth parameter
105
+
106
+ def auth_param
107
+ return nil unless name = token
108
+ return nil unless @scanner.scan(/=/)
109
+
110
+ value = if @scanner.peek(1) == '"' then
111
+ quoted_string
112
+ else
113
+ token
114
+ end
115
+
116
+ return nil unless value
117
+
118
+ return name, value
119
+ end
120
+
121
+ ##
122
+ # quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
123
+ # qdtext = <any TEXT except <">>
124
+ # quoted-pair = "\" CHAR
125
+ #
126
+ # For TEXT, the rules of RFC 2047 are ignored.
127
+
128
+ def quoted_string
129
+ return nil unless @scanner.scan(/"/)
130
+
131
+ text = ''
132
+
133
+ while true do
134
+ chunk = @scanner.scan(/[\r\n \t\041\043-\176\200-\377]+/) # not "
135
+
136
+ if chunk then
137
+ text << chunk
138
+
139
+ text << @scanner.get_byte if
140
+ chunk.end_with? '\\' and '"' == @scanner.peek(1)
141
+ else
142
+ if '"' == @scanner.peek(1) then
143
+ @scanner.get_byte
144
+ break
145
+ else
146
+ return nil
147
+ end
148
+ end
149
+ end
150
+
151
+ text
152
+ end
153
+
154
+ end
155
+
@@ -1,37 +1,16 @@
1
- if RUBY_VERSION < '1.9' then
2
- module Net
3
- class HTTP
4
- alias :old_keep_alive? :keep_alive?
5
- def keep_alive?(req, res)
6
- return false if /close/i =~ req['connection'].to_s
7
- return false if @seems_1_0_server
8
- return false if /close/i =~ res['connection'].to_s
9
- return true if /keep-alive/i =~ res['connection'].to_s
10
- return false if /close/i =~ res['proxy-connection'].to_s
11
- return true if /keep-alive/i =~ res['proxy-connection'].to_s
12
- (@curr_http_version == '1.1')
13
- end
14
- end
15
- end
16
- end
1
+ # :stopdoc:
17
2
 
18
- # Monkey patch for ruby 1.8.4
19
- unless RUBY_VERSION > "1.8.4"
20
- module Net # :nodoc:
21
- class HTTPResponse # :nodoc:
22
- CODE_TO_OBJ['500'] = HTTPInternalServerError
23
- end
24
- end
25
- else
26
- class Mechanize
27
- class Form
28
- alias :inspect :pretty_inspect
29
- end
30
- class Page
31
- alias :inspect :pretty_inspect
32
- class Link
33
- alias :inspect :pretty_inspect
34
- end
35
- end
3
+ class Net::HTTP
4
+ alias old_keep_alive? keep_alive?
5
+
6
+ def keep_alive?(req, res)
7
+ return false if /close/i =~ req['connection'].to_s
8
+ return false if @seems_1_0_server
9
+ return false if /close/i =~ res['connection'].to_s
10
+ return true if /keep-alive/i =~ res['connection'].to_s
11
+ return false if /close/i =~ res['proxy-connection'].to_s
12
+ return true if /keep-alive/i =~ res['proxy-connection'].to_s
13
+ (@curr_http_version == '1.1')
36
14
  end
37
- end
15
+ end if RUBY_VERSION < '1.9'
16
+