diamond-mechanize 2.2 → 2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (147) hide show
  1. data/Rakefile +49 -0
  2. data/lib/mechanize.rb +1079 -0
  3. data/lib/mechanize/content_type_error.rb +13 -0
  4. data/lib/mechanize/cookie.rb +232 -0
  5. data/lib/mechanize/cookie_jar.rb +194 -0
  6. data/lib/mechanize/download.rb +59 -0
  7. data/lib/mechanize/element_matcher.rb +36 -0
  8. data/lib/mechanize/file.rb +65 -0
  9. data/lib/mechanize/file_connection.rb +17 -0
  10. data/lib/mechanize/file_request.rb +26 -0
  11. data/lib/mechanize/file_response.rb +74 -0
  12. data/lib/mechanize/file_saver.rb +39 -0
  13. data/lib/mechanize/form.rb +543 -0
  14. data/lib/mechanize/form/button.rb +6 -0
  15. data/lib/mechanize/form/check_box.rb +12 -0
  16. data/lib/mechanize/form/field.rb +54 -0
  17. data/lib/mechanize/form/file_upload.rb +21 -0
  18. data/lib/mechanize/form/hidden.rb +3 -0
  19. data/lib/mechanize/form/image_button.rb +19 -0
  20. data/lib/mechanize/form/keygen.rb +34 -0
  21. data/lib/mechanize/form/multi_select_list.rb +94 -0
  22. data/lib/mechanize/form/option.rb +50 -0
  23. data/lib/mechanize/form/radio_button.rb +55 -0
  24. data/lib/mechanize/form/reset.rb +3 -0
  25. data/lib/mechanize/form/select_list.rb +44 -0
  26. data/lib/mechanize/form/submit.rb +3 -0
  27. data/lib/mechanize/form/text.rb +3 -0
  28. data/lib/mechanize/form/textarea.rb +3 -0
  29. data/lib/mechanize/headers.rb +23 -0
  30. data/lib/mechanize/history.rb +82 -0
  31. data/lib/mechanize/http.rb +8 -0
  32. data/lib/mechanize/http/agent.rb +1004 -0
  33. data/lib/mechanize/http/auth_challenge.rb +59 -0
  34. data/lib/mechanize/http/auth_realm.rb +31 -0
  35. data/lib/mechanize/http/content_disposition_parser.rb +188 -0
  36. data/lib/mechanize/http/www_authenticate_parser.rb +155 -0
  37. data/lib/mechanize/monkey_patch.rb +16 -0
  38. data/lib/mechanize/page.rb +440 -0
  39. data/lib/mechanize/page/base.rb +7 -0
  40. data/lib/mechanize/page/frame.rb +27 -0
  41. data/lib/mechanize/page/image.rb +30 -0
  42. data/lib/mechanize/page/label.rb +20 -0
  43. data/lib/mechanize/page/link.rb +98 -0
  44. data/lib/mechanize/page/meta_refresh.rb +68 -0
  45. data/lib/mechanize/parser.rb +173 -0
  46. data/lib/mechanize/pluggable_parsers.rb +144 -0
  47. data/lib/mechanize/redirect_limit_reached_error.rb +19 -0
  48. data/lib/mechanize/redirect_not_get_or_head_error.rb +21 -0
  49. data/lib/mechanize/response_code_error.rb +21 -0
  50. data/lib/mechanize/response_read_error.rb +27 -0
  51. data/lib/mechanize/robots_disallowed_error.rb +28 -0
  52. data/lib/mechanize/test_case.rb +663 -0
  53. data/lib/mechanize/unauthorized_error.rb +3 -0
  54. data/lib/mechanize/unsupported_scheme_error.rb +6 -0
  55. data/lib/mechanize/util.rb +101 -0
  56. data/test/data/htpasswd +1 -0
  57. data/test/data/server.crt +16 -0
  58. data/test/data/server.csr +12 -0
  59. data/test/data/server.key +15 -0
  60. data/test/data/server.pem +15 -0
  61. data/test/htdocs/alt_text.html +10 -0
  62. data/test/htdocs/bad_form_test.html +9 -0
  63. data/test/htdocs/button.jpg +0 -0
  64. data/test/htdocs/canonical_uri.html +9 -0
  65. data/test/htdocs/dir with spaces/foo.html +1 -0
  66. data/test/htdocs/empty_form.html +6 -0
  67. data/test/htdocs/file_upload.html +26 -0
  68. data/test/htdocs/find_link.html +41 -0
  69. data/test/htdocs/form_multi_select.html +16 -0
  70. data/test/htdocs/form_multival.html +37 -0
  71. data/test/htdocs/form_no_action.html +18 -0
  72. data/test/htdocs/form_no_input_name.html +16 -0
  73. data/test/htdocs/form_order_test.html +11 -0
  74. data/test/htdocs/form_select.html +16 -0
  75. data/test/htdocs/form_set_fields.html +14 -0
  76. data/test/htdocs/form_test.html +188 -0
  77. data/test/htdocs/frame_referer_test.html +10 -0
  78. data/test/htdocs/frame_test.html +30 -0
  79. data/test/htdocs/google.html +13 -0
  80. data/test/htdocs/index.html +6 -0
  81. data/test/htdocs/link with space.html +5 -0
  82. data/test/htdocs/meta_cookie.html +11 -0
  83. data/test/htdocs/no_title_test.html +6 -0
  84. data/test/htdocs/noindex.html +9 -0
  85. data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
  86. data/test/htdocs/relative/tc_relative_links.html +21 -0
  87. data/test/htdocs/robots.html +8 -0
  88. data/test/htdocs/robots.txt +2 -0
  89. data/test/htdocs/tc_bad_charset.html +9 -0
  90. data/test/htdocs/tc_bad_links.html +5 -0
  91. data/test/htdocs/tc_base_link.html +8 -0
  92. data/test/htdocs/tc_blank_form.html +11 -0
  93. data/test/htdocs/tc_charset.html +6 -0
  94. data/test/htdocs/tc_checkboxes.html +19 -0
  95. data/test/htdocs/tc_encoded_links.html +5 -0
  96. data/test/htdocs/tc_field_precedence.html +11 -0
  97. data/test/htdocs/tc_follow_meta.html +8 -0
  98. data/test/htdocs/tc_form_action.html +48 -0
  99. data/test/htdocs/tc_links.html +19 -0
  100. data/test/htdocs/tc_meta_in_body.html +9 -0
  101. data/test/htdocs/tc_pretty_print.html +17 -0
  102. data/test/htdocs/tc_referer.html +16 -0
  103. data/test/htdocs/tc_relative_links.html +19 -0
  104. data/test/htdocs/tc_textarea.html +23 -0
  105. data/test/htdocs/test_click.html +11 -0
  106. data/test/htdocs/unusual______.html +5 -0
  107. data/test/test_mechanize.rb +1164 -0
  108. data/test/test_mechanize_cookie.rb +451 -0
  109. data/test/test_mechanize_cookie_jar.rb +483 -0
  110. data/test/test_mechanize_download.rb +43 -0
  111. data/test/test_mechanize_file.rb +61 -0
  112. data/test/test_mechanize_file_connection.rb +21 -0
  113. data/test/test_mechanize_file_request.rb +19 -0
  114. data/test/test_mechanize_file_saver.rb +21 -0
  115. data/test/test_mechanize_form.rb +875 -0
  116. data/test/test_mechanize_form_check_box.rb +38 -0
  117. data/test/test_mechanize_form_encoding.rb +114 -0
  118. data/test/test_mechanize_form_field.rb +63 -0
  119. data/test/test_mechanize_form_file_upload.rb +20 -0
  120. data/test/test_mechanize_form_image_button.rb +12 -0
  121. data/test/test_mechanize_form_keygen.rb +32 -0
  122. data/test/test_mechanize_form_multi_select_list.rb +84 -0
  123. data/test/test_mechanize_form_option.rb +55 -0
  124. data/test/test_mechanize_form_radio_button.rb +78 -0
  125. data/test/test_mechanize_form_select_list.rb +76 -0
  126. data/test/test_mechanize_form_textarea.rb +52 -0
  127. data/test/test_mechanize_headers.rb +35 -0
  128. data/test/test_mechanize_history.rb +103 -0
  129. data/test/test_mechanize_http_agent.rb +1225 -0
  130. data/test/test_mechanize_http_auth_challenge.rb +39 -0
  131. data/test/test_mechanize_http_auth_realm.rb +49 -0
  132. data/test/test_mechanize_http_content_disposition_parser.rb +118 -0
  133. data/test/test_mechanize_http_www_authenticate_parser.rb +146 -0
  134. data/test/test_mechanize_link.rb +80 -0
  135. data/test/test_mechanize_page.rb +118 -0
  136. data/test/test_mechanize_page_encoding.rb +182 -0
  137. data/test/test_mechanize_page_frame.rb +16 -0
  138. data/test/test_mechanize_page_link.rb +390 -0
  139. data/test/test_mechanize_page_meta_refresh.rb +127 -0
  140. data/test/test_mechanize_parser.rb +289 -0
  141. data/test/test_mechanize_pluggable_parser.rb +52 -0
  142. data/test/test_mechanize_redirect_limit_reached_error.rb +24 -0
  143. data/test/test_mechanize_redirect_not_get_or_head_error.rb +14 -0
  144. data/test/test_mechanize_subclass.rb +22 -0
  145. data/test/test_mechanize_util.rb +103 -0
  146. data/test/test_multi_select.rb +119 -0
  147. metadata +148 -71
@@ -0,0 +1,59 @@
1
+ class Mechanize::HTTP
2
+
3
+ AuthChallenge = Struct.new :scheme, :params
4
+
5
+ ##
6
+ # A parsed WWW-Authenticate header
7
+
8
+ class AuthChallenge
9
+
10
+ ##
11
+ # :attr_accessor: scheme
12
+ #
13
+ # The authentication scheme
14
+
15
+ ##
16
+ # :attr_accessor: params
17
+ #
18
+ # The authentication parameters
19
+
20
+ ##
21
+ # :method: initialize(scheme = nil, params = nil)
22
+ #
23
+ # Creates a new AuthChallenge header with the given scheme and parameters
24
+
25
+ ##
26
+ # Retrieves +param+ from the params list
27
+
28
+ def [] param
29
+ params[param]
30
+ end
31
+
32
+ ##
33
+ # Constructs an AuthRealm for this challenge
34
+
35
+ def realm uri
36
+ case scheme
37
+ when 'Basic' then
38
+ raise ArgumentError, "provide uri for Basic authentication" unless uri
39
+ Mechanize::HTTP::AuthRealm.new scheme, uri + '/', self['realm']
40
+ when 'Digest' then
41
+ Mechanize::HTTP::AuthRealm.new scheme, uri + '/', self['realm']
42
+ else
43
+ raise Mechanize::Error, "unknown HTTP authentication scheme #{scheme}"
44
+ end
45
+ end
46
+
47
+ ##
48
+ # The reconstructed, normalized challenge
49
+
50
+ def to_s
51
+ auth_params = params.map { |name, value| "#{name}=\"#{value}\"" }
52
+
53
+ "#{scheme} #{auth_params.join ', '}"
54
+ end
55
+
56
+ end
57
+
58
+ end
59
+
@@ -0,0 +1,31 @@
1
+ class Mechanize::HTTP::AuthRealm
2
+
3
+ attr_reader :scheme
4
+ attr_reader :uri
5
+ attr_reader :realm
6
+
7
+ def initialize scheme, uri, realm
8
+ @scheme = scheme
9
+ @uri = uri
10
+ @realm = realm.downcase if realm
11
+ end
12
+
13
+ def == other
14
+ self.class === other and
15
+ @scheme == other.scheme and
16
+ @uri == other.uri and
17
+ @realm == other.realm
18
+ end
19
+
20
+ alias eql? ==
21
+
22
+ def hash # :nodoc:
23
+ [@scheme, @uri, @realm].hash
24
+ end
25
+
26
+ def inspect # :nodoc:
27
+ "#<AuthRealm %s %p \"%s\">" % [@scheme, @uri, @realm]
28
+ end
29
+
30
+ end
31
+
@@ -0,0 +1,188 @@
1
+ # coding: BINARY
2
+
3
+ require 'strscan'
4
+ require 'time'
5
+
6
+ class Mechanize::HTTP
7
+ ContentDisposition = Struct.new :type, :filename, :creation_date,
8
+ :modification_date, :read_date, :size, :parameters
9
+ end
10
+
11
+ ##
12
+ # Parser Content-Disposition headers that loosely follows RFC 2183.
13
+ #
14
+ # Beyond RFC 2183, this parser allows:
15
+ #
16
+ # * Missing disposition-type
17
+ # * Multiple semicolons
18
+ # * Whitespace around semicolons
19
+
20
+ class Mechanize::HTTP::ContentDispositionParser
21
+
22
+ attr_accessor :scanner # :nodoc:
23
+
24
+ @parser = nil
25
+
26
+ ##
27
+ # Parses the disposition type and params in the +content_disposition+
28
+ # string. The "Content-Disposition:" must be removed.
29
+
30
+ def self.parse content_disposition
31
+ @parser ||= self.new
32
+ @parser.parse content_disposition
33
+ end
34
+
35
+ ##
36
+ # Creates a new parser Content-Disposition headers
37
+
38
+ def initialize
39
+ @scanner = nil
40
+ end
41
+
42
+ ##
43
+ # Parses the +content_disposition+ header. If +header+ is set to true the
44
+ # "Content-Disposition:" portion will be parsed
45
+
46
+ def parse content_disposition, header = false
47
+ return nil if content_disposition.empty?
48
+
49
+ @scanner = StringScanner.new content_disposition
50
+
51
+ if header then
52
+ return nil unless @scanner.scan(/Content-Disposition/i)
53
+ return nil unless @scanner.scan(/:/)
54
+ spaces
55
+ end
56
+
57
+ type = rfc_2045_token
58
+ @scanner.scan(/;+/)
59
+
60
+ if @scanner.peek(1) == '=' then
61
+ @scanner.pos = 0
62
+ type = nil
63
+ end
64
+
65
+ disposition = Mechanize::HTTP::ContentDisposition.new type
66
+
67
+ spaces
68
+
69
+ return nil unless parameters = parse_parameters
70
+
71
+ disposition.filename = parameters.delete 'filename'
72
+ disposition.creation_date = parameters.delete 'creation-date'
73
+ disposition.modification_date = parameters.delete 'modification-date'
74
+ disposition.read_date = parameters.delete 'read-date'
75
+ disposition.size = parameters.delete 'size'
76
+ disposition.parameters = parameters
77
+
78
+ disposition
79
+ end
80
+
81
+ ##
82
+ # Extracts disposition-parm and returns a Hash.
83
+
84
+ def parse_parameters
85
+ parameters = {}
86
+
87
+ while true do
88
+ return nil unless param = rfc_2045_token
89
+ param.downcase
90
+ return nil unless @scanner.scan(/=/)
91
+
92
+ value = case param
93
+ when /^filename$/ then
94
+ rfc_2045_value
95
+ when /^(creation|modification|read)-date$/ then
96
+ Time.rfc822 rfc_2045_quoted_string
97
+ when /^size$/ then
98
+ @scanner.scan(/\d+/).to_i(10)
99
+ else
100
+ rfc_2045_value
101
+ end
102
+
103
+ return nil unless value
104
+
105
+ parameters[param] = value
106
+
107
+ spaces
108
+
109
+ break if @scanner.eos? or not @scanner.scan(/;+/)
110
+
111
+ spaces
112
+ end
113
+
114
+ parameters
115
+ end
116
+
117
+ ##
118
+ # quoted-string = <"> *(qtext/quoted-pair) <">
119
+ # qtext = <any CHAR excepting <">, "\" & CR,
120
+ # and including linear-white-space
121
+ # quoted-pair = "\" CHAR
122
+ #
123
+ # Parses an RFC 2045 quoted-string
124
+
125
+ def rfc_2045_quoted_string
126
+ return nil unless @scanner.scan(/"/)
127
+
128
+ text = ''
129
+
130
+ while true do
131
+ chunk = @scanner.scan(/[\000-\014\016-\041\043-\133\135-\177]+/) # not \r "
132
+
133
+ if chunk then
134
+ text << chunk
135
+
136
+ if @scanner.peek(1) == '\\' then
137
+ @scanner.get_byte
138
+ return nil if @scanner.eos?
139
+ text << @scanner.get_byte
140
+ elsif @scanner.scan(/\r\n[\t ]+/) then
141
+ text << " "
142
+ end
143
+ else
144
+ if '"' == @scanner.peek(1) then
145
+ @scanner.get_byte
146
+ break
147
+ else
148
+ return nil
149
+ end
150
+ end
151
+ end
152
+
153
+ text
154
+ end
155
+
156
+ ##
157
+ # token := 1*<any (US-ASCII) CHAR except SPACE, CTLs, or tspecials>
158
+ #
159
+ # Parses an RFC 2045 token
160
+
161
+ def rfc_2045_token
162
+ @scanner.scan(/[^\000-\037\177()<>@,;:\\"\/\[\]?= ]+/)
163
+ end
164
+
165
+ ##
166
+ # value := token / quoted-string
167
+ #
168
+ # Parses an RFC 2045 value
169
+
170
+ def rfc_2045_value
171
+ if @scanner.peek(1) == '"' then
172
+ rfc_2045_quoted_string
173
+ else
174
+ rfc_2045_token
175
+ end
176
+ end
177
+
178
+ ##
179
+ # 1*SP
180
+ #
181
+ # Parses spaces
182
+
183
+ def spaces
184
+ @scanner.scan(/ +/)
185
+ end
186
+
187
+ end
188
+
@@ -0,0 +1,155 @@
1
+ # coding: BINARY
2
+
3
+ require 'strscan'
4
+
5
+ ##
6
+ # Parses the WWW-Authenticate HTTP header into separate challenges.
7
+
8
+ class Mechanize::HTTP::WWWAuthenticateParser
9
+
10
+ attr_accessor :scanner # :nodoc:
11
+
12
+ ##
13
+ # Creates a new header parser for WWW-Authenticate headers
14
+
15
+ def initialize
16
+ @scanner = nil
17
+ end
18
+
19
+ ##
20
+ # Parsers the header. Returns an Array of challenges as strings
21
+
22
+ def parse www_authenticate
23
+ challenges = []
24
+ @scanner = StringScanner.new www_authenticate
25
+
26
+ while true do
27
+ break if @scanner.eos?
28
+ challenge = Mechanize::HTTP::AuthChallenge.new
29
+
30
+ scheme = auth_scheme
31
+ next unless scheme
32
+ challenge.scheme = scheme
33
+
34
+ space = spaces
35
+
36
+ if scheme == 'NTLM' then
37
+ if space then
38
+ challenge.params = @scanner.scan(/.*/)
39
+ end
40
+
41
+ challenges << challenge
42
+ next
43
+ end
44
+
45
+ next unless space
46
+
47
+ params = {}
48
+
49
+ while true do
50
+ pos = @scanner.pos
51
+ name, value = auth_param
52
+
53
+ unless name then
54
+ challenge.params = params
55
+ challenges << challenge
56
+ break if @scanner.eos?
57
+
58
+ @scanner.pos = pos # rewind
59
+ challenge = '' # a token should be next, new challenge
60
+ break
61
+ else
62
+ params[name] = value
63
+ end
64
+
65
+ spaces
66
+
67
+ return nil unless ',' == @scanner.peek(1) or @scanner.eos?
68
+
69
+ @scanner.scan(/(, *)+/)
70
+ end
71
+ end
72
+
73
+ challenges
74
+ end
75
+
76
+ ##
77
+ # 1*SP
78
+ #
79
+ # Parses spaces
80
+
81
+ def spaces
82
+ @scanner.scan(/ +/)
83
+ end
84
+
85
+ ##
86
+ # token = 1*<any CHAR except CTLs or separators>
87
+ #
88
+ # Parses a token
89
+
90
+ def token
91
+ @scanner.scan(/[^\000-\037\177()<>@,;:\\"\/\[\]?={} ]+/)
92
+ end
93
+
94
+ ##
95
+ # auth-scheme = token
96
+ #
97
+ # Parses an auth scheme (a token)
98
+
99
+ alias auth_scheme token
100
+
101
+ ##
102
+ # auth-param = token "=" ( token | quoted-string )
103
+ #
104
+ # Parses an auth parameter
105
+
106
+ def auth_param
107
+ return nil unless name = token
108
+ return nil unless @scanner.scan(/=/)
109
+
110
+ value = if @scanner.peek(1) == '"' then
111
+ quoted_string
112
+ else
113
+ token
114
+ end
115
+
116
+ return nil unless value
117
+
118
+ return name, value
119
+ end
120
+
121
+ ##
122
+ # quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
123
+ # qdtext = <any TEXT except <">>
124
+ # quoted-pair = "\" CHAR
125
+ #
126
+ # For TEXT, the rules of RFC 2047 are ignored.
127
+
128
+ def quoted_string
129
+ return nil unless @scanner.scan(/"/)
130
+
131
+ text = ''
132
+
133
+ while true do
134
+ chunk = @scanner.scan(/[\r\n \t\041\043-\176\200-\377]+/) # not "
135
+
136
+ if chunk then
137
+ text << chunk
138
+
139
+ text << @scanner.get_byte if
140
+ chunk.end_with? '\\' and '"' == @scanner.peek(1)
141
+ else
142
+ if '"' == @scanner.peek(1) then
143
+ @scanner.get_byte
144
+ break
145
+ else
146
+ return nil
147
+ end
148
+ end
149
+ end
150
+
151
+ text
152
+ end
153
+
154
+ end
155
+
@@ -0,0 +1,16 @@
1
+ # :stopdoc:
2
+
3
+ class Net::HTTP
4
+ alias old_keep_alive? keep_alive?
5
+
6
+ def keep_alive?(req, res)
7
+ return false if /close/i =~ req['connection'].to_s
8
+ return false if @seems_1_0_server
9
+ return false if /close/i =~ res['connection'].to_s
10
+ return true if /keep-alive/i =~ res['connection'].to_s
11
+ return false if /close/i =~ res['proxy-connection'].to_s
12
+ return true if /keep-alive/i =~ res['proxy-connection'].to_s
13
+ (@curr_http_version == '1.1')
14
+ end
15
+ end if RUBY_VERSION < '1.9'
16
+