mechanize 2.0.1 → 2.1.pre.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (148) hide show
  1. data.tar.gz.sig +0 -0
  2. data/CHANGELOG.rdoc +82 -0
  3. data/EXAMPLES.rdoc +1 -1
  4. data/FAQ.rdoc +9 -9
  5. data/Manifest.txt +35 -48
  6. data/README.rdoc +2 -1
  7. data/Rakefile +16 -3
  8. data/lib/mechanize.rb +809 -392
  9. data/lib/mechanize/content_type_error.rb +10 -11
  10. data/lib/mechanize/cookie.rb +193 -60
  11. data/lib/mechanize/cookie_jar.rb +39 -86
  12. data/lib/mechanize/download.rb +59 -0
  13. data/lib/mechanize/element_matcher.rb +1 -0
  14. data/lib/mechanize/file.rb +61 -76
  15. data/lib/mechanize/file_saver.rb +37 -35
  16. data/lib/mechanize/form.rb +475 -410
  17. data/lib/mechanize/form/button.rb +4 -7
  18. data/lib/mechanize/form/check_box.rb +10 -9
  19. data/lib/mechanize/form/field.rb +52 -42
  20. data/lib/mechanize/form/file_upload.rb +17 -19
  21. data/lib/mechanize/form/hidden.rb +3 -0
  22. data/lib/mechanize/form/image_button.rb +15 -16
  23. data/lib/mechanize/form/keygen.rb +34 -0
  24. data/lib/mechanize/form/multi_select_list.rb +20 -9
  25. data/lib/mechanize/form/option.rb +48 -47
  26. data/lib/mechanize/form/radio_button.rb +52 -45
  27. data/lib/mechanize/form/reset.rb +3 -0
  28. data/lib/mechanize/form/select_list.rb +10 -6
  29. data/lib/mechanize/form/submit.rb +3 -0
  30. data/lib/mechanize/form/text.rb +3 -0
  31. data/lib/mechanize/form/textarea.rb +3 -0
  32. data/lib/mechanize/headers.rb +17 -19
  33. data/lib/mechanize/history.rb +60 -61
  34. data/lib/mechanize/http.rb +5 -0
  35. data/lib/mechanize/http/agent.rb +485 -218
  36. data/lib/mechanize/http/auth_challenge.rb +59 -0
  37. data/lib/mechanize/http/auth_realm.rb +31 -0
  38. data/lib/mechanize/http/content_disposition_parser.rb +188 -0
  39. data/lib/mechanize/http/www_authenticate_parser.rb +155 -0
  40. data/lib/mechanize/monkey_patch.rb +14 -35
  41. data/lib/mechanize/page.rb +34 -2
  42. data/lib/mechanize/page/base.rb +6 -7
  43. data/lib/mechanize/page/frame.rb +5 -5
  44. data/lib/mechanize/page/image.rb +23 -23
  45. data/lib/mechanize/page/label.rb +16 -16
  46. data/lib/mechanize/page/link.rb +16 -0
  47. data/lib/mechanize/page/meta_refresh.rb +19 -7
  48. data/lib/mechanize/parser.rb +173 -0
  49. data/lib/mechanize/pluggable_parsers.rb +126 -83
  50. data/lib/mechanize/redirect_limit_reached_error.rb +16 -13
  51. data/lib/mechanize/redirect_not_get_or_head_error.rb +18 -16
  52. data/lib/mechanize/response_code_error.rb +16 -17
  53. data/lib/mechanize/robots_disallowed_error.rb +22 -23
  54. data/lib/mechanize/test_case.rb +659 -0
  55. data/lib/mechanize/unauthorized_error.rb +3 -0
  56. data/lib/mechanize/unsupported_scheme_error.rb +4 -6
  57. data/lib/mechanize/util.rb +0 -12
  58. data/test/htdocs/form_order_test.html +11 -0
  59. data/test/htdocs/form_test.html +2 -2
  60. data/test/htdocs/tc_links.html +1 -0
  61. data/test/test_mechanize.rb +367 -59
  62. data/test/test_mechanize_cookie.rb +69 -4
  63. data/test/test_mechanize_cookie_jar.rb +200 -124
  64. data/test/test_mechanize_download.rb +43 -0
  65. data/test/test_mechanize_file.rb +53 -45
  66. data/test/{test_mechanize_file_response.rb → test_mechanize_file_connection.rb} +2 -2
  67. data/test/test_mechanize_file_request.rb +2 -2
  68. data/test/test_mechanize_file_saver.rb +21 -0
  69. data/test/test_mechanize_form.rb +345 -46
  70. data/test/test_mechanize_form_check_box.rb +5 -4
  71. data/test/test_mechanize_form_encoding.rb +10 -16
  72. data/test/test_mechanize_form_field.rb +45 -3
  73. data/test/test_mechanize_form_file_upload.rb +20 -0
  74. data/test/test_mechanize_form_image_button.rb +2 -2
  75. data/test/test_mechanize_form_keygen.rb +32 -0
  76. data/test/test_mechanize_form_multi_select_list.rb +84 -0
  77. data/test/test_mechanize_form_option.rb +55 -0
  78. data/test/test_mechanize_form_radio_button.rb +78 -0
  79. data/test/test_mechanize_form_select_list.rb +76 -0
  80. data/test/test_mechanize_form_textarea.rb +8 -7
  81. data/test/{test_headers.rb → test_mechanize_headers.rb} +4 -2
  82. data/test/test_mechanize_history.rb +103 -0
  83. data/test/test_mechanize_http_agent.rb +525 -17
  84. data/test/test_mechanize_http_auth_challenge.rb +39 -0
  85. data/test/test_mechanize_http_auth_realm.rb +49 -0
  86. data/test/test_mechanize_http_content_disposition_parser.rb +118 -0
  87. data/test/test_mechanize_http_www_authenticate_parser.rb +146 -0
  88. data/test/test_mechanize_link.rb +10 -14
  89. data/test/test_mechanize_page.rb +118 -0
  90. data/test/test_mechanize_page_encoding.rb +48 -13
  91. data/test/test_mechanize_page_frame.rb +16 -0
  92. data/test/test_mechanize_page_link.rb +27 -19
  93. data/test/test_mechanize_page_meta_refresh.rb +26 -14
  94. data/test/test_mechanize_parser.rb +289 -0
  95. data/test/test_mechanize_pluggable_parser.rb +52 -0
  96. data/test/test_mechanize_redirect_limit_reached_error.rb +24 -0
  97. data/test/test_mechanize_redirect_not_get_or_head_error.rb +3 -7
  98. data/test/test_mechanize_subclass.rb +2 -2
  99. data/test/test_mechanize_util.rb +24 -13
  100. data/test/test_multi_select.rb +23 -22
  101. metadata +145 -114
  102. metadata.gz.sig +0 -0
  103. data/lib/mechanize/inspect.rb +0 -88
  104. data/test/helper.rb +0 -175
  105. data/test/htdocs/form_select_all.html +0 -16
  106. data/test/htdocs/form_select_none.html +0 -17
  107. data/test/htdocs/form_select_noopts.html +0 -10
  108. data/test/htdocs/iframe_test.html +0 -16
  109. data/test/htdocs/nofollow.html +0 -9
  110. data/test/htdocs/norobots.html +0 -8
  111. data/test/htdocs/rel_nofollow.html +0 -8
  112. data/test/htdocs/tc_base_images.html +0 -10
  113. data/test/htdocs/tc_images.html +0 -8
  114. data/test/htdocs/tc_no_attributes.html +0 -16
  115. data/test/htdocs/tc_radiobuttons.html +0 -17
  116. data/test/htdocs/test_bad_encoding.html +0 -52
  117. data/test/servlets.rb +0 -402
  118. data/test/ssl_server.rb +0 -48
  119. data/test/test_cookies.rb +0 -129
  120. data/test/test_form_action.rb +0 -52
  121. data/test/test_form_as_hash.rb +0 -59
  122. data/test/test_form_button.rb +0 -46
  123. data/test/test_frames.rb +0 -34
  124. data/test/test_history.rb +0 -118
  125. data/test/test_history_added.rb +0 -16
  126. data/test/test_html_unscape_forms.rb +0 -46
  127. data/test/test_if_modified_since.rb +0 -20
  128. data/test/test_images.rb +0 -19
  129. data/test/test_no_attributes.rb +0 -13
  130. data/test/test_option.rb +0 -18
  131. data/test/test_pluggable_parser.rb +0 -136
  132. data/test/test_post_form.rb +0 -37
  133. data/test/test_pretty_print.rb +0 -22
  134. data/test/test_radiobutton.rb +0 -75
  135. data/test/test_redirect_limit_reached.rb +0 -39
  136. data/test/test_referer.rb +0 -81
  137. data/test/test_relative_links.rb +0 -40
  138. data/test/test_request.rb +0 -13
  139. data/test/test_response_code.rb +0 -53
  140. data/test/test_robots.rb +0 -72
  141. data/test/test_save_file.rb +0 -48
  142. data/test/test_scheme.rb +0 -48
  143. data/test/test_select.rb +0 -119
  144. data/test/test_select_all.rb +0 -15
  145. data/test/test_select_none.rb +0 -15
  146. data/test/test_select_noopts.rb +0 -18
  147. data/test/test_set_fields.rb +0 -44
  148. data/test/test_ssl_server.rb +0 -20
@@ -2,100 +2,143 @@ require 'mechanize/file'
2
2
  require 'mechanize/file_saver'
3
3
  require 'mechanize/page'
4
4
 
5
- class Mechanize
6
- # = Synopsis
7
- # This class is used to register and maintain pluggable parsers for
8
- # Mechanize to use.
9
- #
10
- # A Pluggable Parser is a parser that Mechanize uses for any particular
11
- # content type. Mechanize will ask PluggableParser for the class it
12
- # should initialize given any content type. This class allows users to
13
- # register their own pluggable parsers, or modify existing pluggable
14
- # parsers.
15
- #
16
- # PluggableParser returns a Mechanize::File object for content types
17
- # that it does not know how to handle. Mechanize::File provides
18
- # basic functionality for any content type, so it is a good class to
19
- # extend when building your own parsers.
20
- # == Example
21
- # To create your own parser, just create a class that takes four
22
- # parameters in the constructor. Here is an example of registering
23
- # a pluggable parser that handles CSV files:
24
- # class CSVParser < Mechanize::File
25
- # attr_reader :csv
26
- # def initialize(uri=nil, response=nil, body=nil, code=nil)
27
- # super(uri, response, body, code)
28
- # @csv = CSV.parse(body)
29
- # end
30
- # end
31
- # agent = Mechanize.new
32
- # agent.pluggable_parser.csv = CSVParser
33
- # agent.get('http://example.com/test.csv') # => CSVParser
34
- # Now any page that returns the content type of 'text/csv' will initialize
35
- # a CSVParser and return that object to the caller.
36
- #
37
- # To register a pluggable parser for a content type that pluggable parser
38
- # does not know about, just use the hash syntax:
39
- # agent.pluggable_parser['text/something'] = SomeClass
40
- #
41
- # To set the default parser, just use the 'defaut' method:
42
- # agent.pluggable_parser.default = SomeClass
43
- # Now all unknown content types will be instances of SomeClass.
44
- class PluggableParser
45
- CONTENT_TYPES = {
46
- :html => 'text/html',
47
- :wap => 'application/vnd.wap.xhtml+xml',
48
- :xhtml => 'application/xhtml+xml',
49
- :pdf => 'application/pdf',
50
- :csv => 'text/csv',
51
- :xml => 'text/xml',
5
+ ##
6
+ # This class is used to register and maintain pluggable parsers for Mechanize
7
+ # to use.
8
+ #
9
+ # Mechanize allows different parsers for different content types. Mechanize
10
+ # uses PluggableParser to determine which parser to use for any content type.
11
+ # To use your own pluggable parser or to change the default pluggable parsers,
12
+ # register them with this class.
13
+ #
14
+ # The default parser for unregistered content types is Mechanize::File.
15
+ #
16
+ # The module Mechanize::Parser provides basic functionality for any content
17
+ # type, so you may use it in custom parsers you write. For small files you
18
+ # wish to perform in-memory operations on, you should subclass
19
+ # Mechanize::File. For large files you should subclass Mechanize::Download as
20
+ # the content is only loaded into memory in small chunks.
21
+ #
22
+ # == Example
23
+ #
24
+ # To create your own parser, just create a class that takes four parameters in
25
+ # the constructor. Here is an example of registering a pluggable parser that
26
+ # handles CSV files:
27
+ #
28
+ # require 'csv'
29
+ #
30
+ # class CSVParser < Mechanize::File
31
+ # attr_reader :csv
32
+ #
33
+ # def initialize uri = nil, response = nil, body = nil, code = nil
34
+ # super uri, response, body, code
35
+ # @csv = CSV.parse body
36
+ # end
37
+ # end
38
+ #
39
+ # agent = Mechanize.new
40
+ # agent.pluggable_parser.csv = CSVParser
41
+ # agent.get('http://example.com/test.csv') # => CSVParser
42
+ #
43
+ # Now any response with a content type of 'text/csv' will initialize a
44
+ # CSVParser and return that object to the caller.
45
+ #
46
+ # To register a pluggable parser for a content type that pluggable parser does
47
+ # not know about, use the hash syntax:
48
+ #
49
+ # agent.pluggable_parser['text/something'] = SomeClass
50
+ #
51
+ # To set the default parser, use #default:
52
+ #
53
+ # agent.pluggable_parser.default = Mechanize::Download
54
+ #
55
+ # Now all unknown content types will be saved to disk and not loaded into
56
+ # memory.
57
+
58
+ class Mechanize::PluggableParser
59
+
60
+ CONTENT_TYPES = {
61
+ :html => 'text/html',
62
+ :wap => 'application/vnd.wap.xhtml+xml',
63
+ :xhtml => 'application/xhtml+xml',
64
+ :pdf => 'application/pdf',
65
+ :csv => 'text/csv',
66
+ :xml => 'text/xml',
67
+ }
68
+
69
+ attr_accessor :default
70
+
71
+ def initialize
72
+ @parsers = {
73
+ CONTENT_TYPES[:html] => Mechanize::Page,
74
+ CONTENT_TYPES[:xhtml] => Mechanize::Page,
75
+ CONTENT_TYPES[:wap] => Mechanize::Page,
52
76
  }
53
77
 
54
- attr_accessor :default
78
+ @default = Mechanize::File
79
+ end
80
+
81
+ ##
82
+ # Returns the parser registered for the given +content_type+
83
+
84
+ def parser(content_type)
85
+ content_type.nil? ? default : @parsers[content_type] || default
86
+ end
87
+
88
+ def register_parser(content_type, klass) # :nodoc:
89
+ @parsers[content_type] = klass
90
+ end
91
+
92
+ ##
93
+ # Registers +klass+ as the parser for text/html and application/xhtml+xml
94
+ # content
55
95
 
56
- def initialize
57
- @parsers = { CONTENT_TYPES[:html] => Page,
58
- CONTENT_TYPES[:xhtml] => Page,
59
- CONTENT_TYPES[:wap] => Page,
60
- }
61
- @default = File
62
- end
96
+ def html=(klass)
97
+ register_parser(CONTENT_TYPES[:html], klass)
98
+ register_parser(CONTENT_TYPES[:xhtml], klass)
99
+ end
100
+
101
+ ##
102
+ # Registers +klass+ as the parser for application/xhtml+xml content
63
103
 
64
- def parser(content_type)
65
- content_type.nil? ? default : @parsers[content_type] || default
66
- end
104
+ def xhtml=(klass)
105
+ register_parser(CONTENT_TYPES[:xhtml], klass)
106
+ end
67
107
 
68
- def register_parser(content_type, klass)
69
- @parsers[content_type] = klass
70
- end
108
+ ##
109
+ # Registers +klass+ as the parser for application/pdf content
71
110
 
72
- def html=(klass)
73
- register_parser(CONTENT_TYPES[:html], klass)
74
- register_parser(CONTENT_TYPES[:xhtml], klass)
75
- end
111
+ def pdf=(klass)
112
+ register_parser(CONTENT_TYPES[:pdf], klass)
113
+ end
76
114
 
77
- def xhtml=(klass)
78
- register_parser(CONTENT_TYPES[:xhtml], klass)
79
- end
115
+ ##
116
+ # Registers +klass+ as the parser for text/csv content
80
117
 
81
- def pdf=(klass)
82
- register_parser(CONTENT_TYPES[:pdf], klass)
83
- end
118
+ def csv=(klass)
119
+ register_parser(CONTENT_TYPES[:csv], klass)
120
+ end
84
121
 
85
- def csv=(klass)
86
- register_parser(CONTENT_TYPES[:csv], klass)
87
- end
122
+ ##
123
+ # Registers +klass+ as the parser for text/xml content
88
124
 
89
- def xml=(klass)
90
- register_parser(CONTENT_TYPES[:xml], klass)
91
- end
125
+ def xml=(klass)
126
+ register_parser(CONTENT_TYPES[:xml], klass)
127
+ end
92
128
 
93
- def [](content_type)
94
- @parsers[content_type]
95
- end
129
+ ##
130
+ # Retrieves the parser for +content_type+ content
96
131
 
97
- def []=(content_type, klass)
98
- @parsers[content_type] = klass
99
- end
132
+ def [](content_type)
133
+ @parsers[content_type]
100
134
  end
135
+
136
+ ##
137
+ # Sets the parser for +content_type+ content to +klass+
138
+
139
+ def []=(content_type, klass)
140
+ @parsers[content_type] = klass
141
+ end
142
+
101
143
  end
144
+
@@ -1,16 +1,19 @@
1
- class Mechanize
2
- # Thrown when too many redirects are sent
3
- class RedirectLimitReachedError < Mechanize::Error
4
- attr_reader :page, :response_code, :redirects
5
- def initialize(page, redirects)
6
- @page = page
7
- @redirects = redirects
8
- @response_code = page.code
9
- end
1
+ ##
2
+ # Raised when too many redirects are sent
10
3
 
11
- def to_s
12
- "Maximum redirect limit (#{redirects}) reached"
13
- end
14
- alias :inspect :to_s
4
+ class Mechanize::RedirectLimitReachedError < Mechanize::Error
5
+
6
+ attr_reader :page
7
+ attr_reader :redirects
8
+ attr_reader :response_code
9
+
10
+ def initialize page, redirects
11
+ @page = page
12
+ @redirects = redirects
13
+ @response_code = page.code
14
+
15
+ super "Redirect limit of #{redirects} reached"
15
16
  end
17
+
16
18
  end
19
+
@@ -1,19 +1,21 @@
1
- class Mechanize
2
- # Thrown when a POST, PUT, or DELETE request results in a redirect
3
- # see RFC 2616 10.3.2, 10.3.3 http://www.ietf.org/rfc/rfc2616.txt
4
- class RedirectNotGetOrHeadError < Mechanize::Error
5
- attr_reader :page, :response_code, :verb, :uri
6
- def initialize(page, verb)
7
- @page = page
8
- @verb = verb
9
- @uri = page.uri
10
- @response_code = page.code
11
- end
1
+ ##
2
+ # Raised when a POST, PUT, or DELETE request results in a redirect
3
+ # see RFC 2616 10.3.2, 10.3.3 http://www.ietf.org/rfc/rfc2616.txt
12
4
 
13
- def to_s
14
- method = @verb.to_s.upcase
15
- "#{@response_code} redirect received after a #{method} request"
16
- end
17
- alias :inspect :to_s
5
+ class Mechanize::RedirectNotGetOrHeadError < Mechanize::Error
6
+ attr_reader :page, :response_code, :verb, :uri
7
+ def initialize(page, verb)
8
+ @page = page
9
+ @verb = verb
10
+ @uri = page.uri
11
+ @response_code = page.code
18
12
  end
13
+
14
+ def to_s
15
+ method = @verb.to_s.upcase
16
+ "#{@response_code} redirect received after a #{method} request"
17
+ end
18
+
19
+ alias :inspect :to_s
19
20
  end
21
+
@@ -1,22 +1,21 @@
1
- class Mechanize
2
- # =Synopsis
3
- # This error is thrown when Mechanize encounters a response code it does
4
- # not know how to handle. Currently, this exception will be thrown
5
- # if Mechanize encounters response codes other than 200, 301, or 302.
6
- # Any other response code is up to the user to handle.
7
- class ResponseCodeError < Mechanize::Error
8
- attr_reader :response_code
9
- attr_reader :page
1
+ # This error is raised when Mechanize encounters a response code it does not
2
+ # know how to handle. Currently, this exception will be thrown if Mechanize
3
+ # encounters response codes other than 200, 301, or 302. Any other response
4
+ # code is up to the user to handle.
10
5
 
11
- def initialize(page)
12
- @page = page
13
- @response_code = page.code.to_s
14
- end
6
+ class Mechanize::ResponseCodeError < Mechanize::Error
7
+ attr_reader :response_code
8
+ attr_reader :page
15
9
 
16
- def to_s
17
- "#{@response_code} => #{Net::HTTPResponse::CODE_TO_OBJ[@response_code]}"
18
- end
10
+ def initialize(page)
11
+ @page = page
12
+ @response_code = page.code.to_s
13
+ end
19
14
 
20
- def inspect; to_s; end
15
+ def to_s
16
+ "#{@response_code} => #{Net::HTTPResponse::CODE_TO_OBJ[@response_code]}"
21
17
  end
18
+
19
+ alias inspect to_s
22
20
  end
21
+
@@ -1,29 +1,28 @@
1
- class Mechanize
2
- # Exception that is raised when an access to a resource is
3
- # disallowed by robots.txt or by HTML document itself.
4
- class RobotsDisallowedError < Mechanize::Error
5
- def initialize(url)
6
- if url.is_a?(URI)
7
- @url = url.to_s
8
- @uri = url
9
- else
10
- @url = url.to_s
11
- end
1
+ # Exception that is raised when an access to a resource is disallowed by
2
+ # robots.txt or by HTML document itself.
3
+
4
+ class Mechanize::RobotsDisallowedError < Mechanize::Error
5
+ def initialize(url)
6
+ if url.is_a?(URI)
7
+ @url = url.to_s
8
+ @uri = url
9
+ else
10
+ @url = url.to_s
12
11
  end
12
+ end
13
13
 
14
- # Returns the URL (string) of the resource that caused this error.
15
- attr_reader :url
14
+ # Returns the URL (string) of the resource that caused this error.
15
+ attr_reader :url
16
16
 
17
- # Returns the URL (URI object) of the resource that caused this
18
- # error. URI::InvalidURIError may be raised if the URL happens to
19
- # be invalid or not understood by the URI library.
20
- def uri
21
- @uri ||= URI.parse(url)
22
- end
17
+ # Returns the URL (URI object) of the resource that caused this
18
+ # error. URI::InvalidURIError may be raised if the URL happens to
19
+ # be invalid or not understood by the URI library.
20
+ def uri
21
+ @uri ||= URI.parse(url)
22
+ end
23
23
 
24
- def to_s
25
- "Robots access is disallowed for URL: #{url}"
26
- end
27
- alias :inspect :to_s
24
+ def to_s
25
+ "Robots access is disallowed for URL: #{url}"
28
26
  end
27
+ alias :inspect :to_s
29
28
  end
@@ -0,0 +1,659 @@
1
+ require 'mechanize'
2
+ require 'logger'
3
+ require 'tempfile'
4
+ require 'tmpdir'
5
+ require 'webrick'
6
+ require 'zlib'
7
+
8
+ require 'rubygems'
9
+
10
+ begin
11
+ gem 'minitest'
12
+ rescue Gem::LoadError
13
+ end
14
+
15
+ require 'minitest/autorun'
16
+
17
+ class Mechanize::TestCase < MiniTest::Unit::TestCase
18
+
19
+ TEST_DIR = File.expand_path '../../../test', __FILE__
20
+ REQUESTS = []
21
+
22
+ def setup
23
+ super
24
+
25
+ REQUESTS.clear
26
+ @mech = Mechanize.new
27
+ @ssl_private_key = nil
28
+ @ssl_certificate = nil
29
+ end
30
+
31
+ def fake_page agent = @mech
32
+ uri = URI 'http://fake.example/'
33
+ html = <<-END
34
+ <html>
35
+ <body>
36
+ <form><input type="submit" value="submit" /></form>
37
+ </body>
38
+ </html>
39
+ END
40
+
41
+ response = { 'content-type' => 'text/html' }
42
+
43
+ Mechanize::Page.new uri, response, html, 200, agent
44
+ end
45
+
46
+ def html_page body
47
+ uri = URI 'http://example/'
48
+ Mechanize::Page.new uri, { 'content-type' => 'text/html' }, body, 200, @mech
49
+ end
50
+
51
+ def in_tmpdir
52
+ Dir.mktmpdir do |dir|
53
+ Dir.chdir dir do
54
+ yield
55
+ end
56
+ end
57
+ end
58
+
59
+ def node element, attributes = {}
60
+ doc = Nokogiri::HTML::Document.new
61
+
62
+ node = Nokogiri::XML::Node.new element, doc
63
+
64
+ attributes.each do |name, value|
65
+ node[name] = value
66
+ end
67
+
68
+ node
69
+ end
70
+
71
+ def page uri, content_type = 'text/html', body = '', code = 200
72
+ uri = URI uri unless URI::Generic === uri
73
+
74
+ Mechanize::Page.new(uri, { 'content-type' => content_type }, body, code,
75
+ @mech)
76
+ end
77
+
78
+ def requests
79
+ REQUESTS
80
+ end
81
+
82
+ def ssl_private_key
83
+ @ssl_private_key ||= OpenSSL::PKey::RSA.new <<-KEY
84
+ -----BEGIN RSA PRIVATE KEY-----
85
+ MIG7AgEAAkEA8pmEfmP0Ibir91x6pbts4JmmsVZd3xvD5p347EFvBCbhBW1nv1Gs
86
+ bCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTXUQIBAQIBAQIhAPumXslvf6YasXa1hni3
87
+ p80joKOug2UUgqOLD2GUSO//AiEA9ssY6AFxjHWuwo/+/rkLmkfO2s1Lz3OeUEWq
88
+ 6DiHOK8CAQECAQECIQDt8bc4vS6wh9VXApNSKIpVygtxSFe/IwLeX26n77j6Qg==
89
+ -----END RSA PRIVATE KEY-----
90
+ KEY
91
+ end
92
+
93
+ def ssl_certificate
94
+ @ssl_certificate ||= OpenSSL::X509::Certificate.new <<-CERT
95
+ -----BEGIN CERTIFICATE-----
96
+ MIIBQjCB7aADAgECAgEAMA0GCSqGSIb3DQEBBQUAMCoxDzANBgNVBAMMBm5vYm9k
97
+ eTEXMBUGCgmSJomT8ixkARkWB2V4YW1wbGUwIBcNMTExMTAzMjEwODU5WhgPOTk5
98
+ OTEyMzExMjU5NTlaMCoxDzANBgNVBAMMBm5vYm9keTEXMBUGCgmSJomT8ixkARkW
99
+ B2V4YW1wbGUwWjANBgkqhkiG9w0BAQEFAANJADBGAkEA8pmEfmP0Ibir91x6pbts
100
+ 4JmmsVZd3xvD5p347EFvBCbhBW1nv1GsbCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTX
101
+ UQIBATANBgkqhkiG9w0BAQUFAANBAAAB////////////////////////////////
102
+ //8AMCEwCQYFKw4DAhoFAAQUePiv+QrJxyjtEJNnH5pB9OTWIqA=
103
+ -----END CERTIFICATE-----
104
+ CERT
105
+ end
106
+
107
+ end
108
+
109
+ class BasicAuthServlet < WEBrick::HTTPServlet::AbstractServlet
110
+ def do_GET(req,res)
111
+ htpd = WEBrick::HTTPAuth::Htpasswd.new('dot.htpasswd')
112
+ htpd.set_passwd('Blah', 'user', 'pass')
113
+ authenticator = WEBrick::HTTPAuth::BasicAuth.new({
114
+ :UserDB => htpd,
115
+ :Realm => 'Blah',
116
+ :Logger => Logger.new(nil)
117
+ }
118
+ )
119
+ begin
120
+ authenticator.authenticate(req,res)
121
+ res.body = 'You are authenticated'
122
+ rescue WEBrick::HTTPStatus::Unauthorized
123
+ res.status = 401
124
+ end
125
+ FileUtils.rm('dot.htpasswd')
126
+ end
127
+ alias :do_POST :do_GET
128
+ end
129
+
130
+ class ContentTypeServlet < WEBrick::HTTPServlet::AbstractServlet
131
+ def do_GET(req, res)
132
+ ct = req.query['ct'] || "text/html; charset=utf-8"
133
+ res['Content-Type'] = ct
134
+ res.body = "Hello World"
135
+ end
136
+ end
137
+
138
+ class DigestAuthServlet < WEBrick::HTTPServlet::AbstractServlet
139
+ htpd = WEBrick::HTTPAuth::Htdigest.new('digest.htpasswd')
140
+ htpd.set_passwd('Blah', 'user', 'pass')
141
+ @@authenticator = WEBrick::HTTPAuth::DigestAuth.new({
142
+ :UserDB => htpd,
143
+ :Realm => 'Blah',
144
+ :Algorithm => 'MD5',
145
+ :Logger => Logger.new(nil)
146
+ }
147
+ )
148
+ def do_GET(req,res)
149
+ def req.request_time; Time.now; end
150
+ def req.request_uri; '/digest_auth'; end
151
+ def req.request_method; "GET"; end
152
+
153
+ begin
154
+ @@authenticator.authenticate(req,res)
155
+ res.body = 'You are authenticated'
156
+ rescue WEBrick::HTTPStatus::Unauthorized
157
+ res.status = 401
158
+ end
159
+ FileUtils.rm('digest.htpasswd') if File.exists?('digest.htpasswd')
160
+ end
161
+ alias :do_POST :do_GET
162
+ end
163
+
164
+ class FileUploadServlet < WEBrick::HTTPServlet::AbstractServlet
165
+ def do_POST(req, res)
166
+ res.body = req.body
167
+ end
168
+ end
169
+
170
+ class FormServlet < WEBrick::HTTPServlet::AbstractServlet
171
+ def do_GET(req, res)
172
+ res.body = "<HTML><body>"
173
+ req.query.each_key { |k|
174
+ req.query[k].each_data { |data|
175
+ res.body << "<a href=\"#\">#{WEBrick::HTTPUtils.unescape(k)}:#{WEBrick::HTTPUtils.unescape(data)}</a><br />"
176
+ }
177
+ }
178
+ res.body << "<div id=\"query\">#{res.query}</div></body></HTML>"
179
+ res['Content-Type'] = "text/html"
180
+ end
181
+
182
+ def do_POST(req, res)
183
+ res.body = "<HTML><body>"
184
+
185
+ req.query.each_key { |k|
186
+ req.query[k].each_data { |data|
187
+ res.body << "<a href=\"#\">#{k}:#{data}</a><br />"
188
+ }
189
+ }
190
+
191
+ res.body << "<div id=\"query\">#{req.body}</div></body></HTML>"
192
+ res['Content-Type'] = "text/html"
193
+ end
194
+ end
195
+
196
+ class GzipServlet < WEBrick::HTTPServlet::AbstractServlet
197
+ def do_GET(req, res)
198
+ if req['Accept-Encoding'] =~ /gzip/
199
+ if name = req.query['file'] then
200
+ open("#{Mechanize::TestCase::TEST_DIR}/htdocs/#{name}", 'r') do |io|
201
+ string = ""
202
+ zipped = StringIO.new string, 'w'
203
+ Zlib::GzipWriter.wrap zipped do |gz|
204
+ gz.write io.read
205
+ end
206
+ res.body = string
207
+ end
208
+ else
209
+ res.body = ''
210
+ end
211
+ res['Content-Encoding'] = req['X-ResponseContentEncoding'] || 'gzip'
212
+ res['Content-Type'] = "text/html"
213
+ else
214
+ res.code = 400
215
+ res.body = 'no gzip'
216
+ end
217
+ end
218
+ end
219
+
220
+ class HeaderServlet < WEBrick::HTTPServlet::AbstractServlet
221
+ def do_GET(req, res)
222
+ res['Content-Type'] = "text/html"
223
+
224
+ req.query.each do |x,y|
225
+ res[x] = y
226
+ end
227
+
228
+ body = ''
229
+ req.each_header do |k,v|
230
+ body << "#{k}|#{v}\n"
231
+ end
232
+ res.body = body
233
+ end
234
+ end
235
+
236
+ class HttpRefreshServlet < WEBrick::HTTPServlet::AbstractServlet
237
+ def do_GET(req, res)
238
+ res['Content-Type'] = req.query['ct'] || "text/html"
239
+ refresh_time = req.query['refresh_time'] || 0
240
+ refresh_url = req.query['refresh_url'] || '/index.html'
241
+ res['Refresh'] = " #{refresh_time};url=#{refresh_url}\r\n";
242
+ end
243
+ end
244
+
245
+ class InfiniteRedirectServlet < WEBrick::HTTPServlet::AbstractServlet
246
+ def do_GET(req, res)
247
+ res['Content-Type'] = req.query['ct'] || "text/html"
248
+ res.status = req.query['code'] ? req.query['code'].to_i : '302'
249
+ number = req.query['q'] ? req.query['q'].to_i : 0
250
+ res['Location'] = "/infinite_redirect?q=#{number + 1}"
251
+ end
252
+ alias :do_POST :do_GET
253
+ end
254
+
255
+ class InfiniteRefreshServlet < WEBrick::HTTPServlet::AbstractServlet
256
+ def do_GET(req, res)
257
+ res['Content-Type'] = req.query['ct'] || "text/html"
258
+ res.status = req.query['code'] ? req.query['code'].to_i : '302'
259
+ number = req.query['q'] ? req.query['q'].to_i : 0
260
+ res['Refresh'] = " 0;url=http://localhost/infinite_refresh?q=#{number + 1}\r\n";
261
+ end
262
+ end
263
+
264
+ class ManyCookiesAsStringServlet < WEBrick::HTTPServlet::AbstractServlet
265
+ def do_GET(req, res)
266
+ cookies = []
267
+ name_cookie = WEBrick::Cookie.new("name", "Aaron")
268
+ name_cookie.path = "/"
269
+ name_cookie.expires = Time.now + 86400
270
+ name_cookie.domain = 'localhost'
271
+ cookies << name_cookie
272
+ cookies << name_cookie
273
+ cookies << name_cookie
274
+ cookies << "#{name_cookie}; HttpOnly"
275
+
276
+ expired_cookie = WEBrick::Cookie.new("expired", "doh")
277
+ expired_cookie.path = "/"
278
+ expired_cookie.expires = Time.now - 86400
279
+ cookies << expired_cookie
280
+
281
+ different_path_cookie = WEBrick::Cookie.new("a_path", "some_path")
282
+ different_path_cookie.path = "/some_path"
283
+ different_path_cookie.expires = Time.now + 86400
284
+ cookies << different_path_cookie
285
+
286
+ no_path_cookie = WEBrick::Cookie.new("no_path", "no_path")
287
+ no_path_cookie.expires = Time.now + 86400
288
+ cookies << no_path_cookie
289
+
290
+ no_exp_path_cookie = WEBrick::Cookie.new("no_expires", "nope")
291
+ no_exp_path_cookie.path = "/"
292
+ cookies << no_exp_path_cookie
293
+
294
+ res['Set-Cookie'] = cookies.join(', ')
295
+
296
+ res['Content-Type'] = "text/html"
297
+ res.body = "<html><body>hello</body></html>"
298
+ end
299
+ end
300
+
301
+ class ManyCookiesServlet < WEBrick::HTTPServlet::AbstractServlet
302
+ def do_GET(req, res)
303
+ name_cookie = WEBrick::Cookie.new("name", "Aaron")
304
+ name_cookie.path = "/"
305
+ name_cookie.expires = Time.now + 86400
306
+ res.cookies << name_cookie
307
+ res.cookies << name_cookie
308
+ res.cookies << name_cookie
309
+ res.cookies << name_cookie
310
+
311
+ expired_cookie = WEBrick::Cookie.new("expired", "doh")
312
+ expired_cookie.path = "/"
313
+ expired_cookie.expires = Time.now - 86400
314
+ res.cookies << expired_cookie
315
+
316
+ different_path_cookie = WEBrick::Cookie.new("a_path", "some_path")
317
+ different_path_cookie.path = "/some_path"
318
+ different_path_cookie.expires = Time.now + 86400
319
+ res.cookies << different_path_cookie
320
+
321
+ no_path_cookie = WEBrick::Cookie.new("no_path", "no_path")
322
+ no_path_cookie.expires = Time.now + 86400
323
+ res.cookies << no_path_cookie
324
+
325
+ no_exp_path_cookie = WEBrick::Cookie.new("no_expires", "nope")
326
+ no_exp_path_cookie.path = "/"
327
+ res.cookies << no_exp_path_cookie
328
+
329
+ res['Content-Type'] = "text/html"
330
+ res.body = "<html><body>hello</body></html>"
331
+ end
332
+ end
333
+
334
+ class ModifiedSinceServlet < WEBrick::HTTPServlet::AbstractServlet
335
+ def do_GET(req, res)
336
+ s_time = 'Fri, 04 May 2001 00:00:38 GMT'
337
+
338
+ my_time = Time.parse(s_time)
339
+
340
+ if req['If-Modified-Since']
341
+ your_time = Time.parse(req['If-Modified-Since'])
342
+ if my_time > your_time
343
+ res.body = 'This page was updated since you requested'
344
+ else
345
+ res.status = 304
346
+ end
347
+ else
348
+ res.body = 'You did not send an If-Modified-Since header'
349
+ end
350
+
351
+ res['Last-Modified'] = s_time
352
+ end
353
+ end
354
+
355
+ class NTLMServlet < WEBrick::HTTPServlet::AbstractServlet
356
+
357
+ def do_GET(req, res)
358
+ if req['Authorization'] =~ /^NTLM (.*)/ then
359
+ authorization = $1.unpack('m*').first
360
+
361
+ if authorization =~ /^NTLMSSP\000\001/ then
362
+ type_2 = 'TlRMTVNTUAACAAAADAAMADAAAAABAoEAASNFZ4mr' \
363
+ 'ze8AAAAAAAAAAGIAYgA8AAAARABPAE0AQQBJAE4A' \
364
+ 'AgAMAEQATwBNAEEASQBOAAEADABTAEUAUgBWAEUA' \
365
+ 'UgAEABQAZABvAG0AYQBpAG4ALgBjAG8AbQADACIA' \
366
+ 'cwBlAHIAdgBlAHIALgBkAG8AbQBhAGkAbgAuAGMA' \
367
+ 'bwBtAAAAAAA='
368
+
369
+ res['WWW-Authenticate'] = "NTLM #{type_2}"
370
+ res.status = 401
371
+ elsif authorization =~ /^NTLMSSP\000\003/ then
372
+ res.body = 'ok'
373
+ else
374
+ res['WWW-Authenticate'] = 'NTLM'
375
+ res.status = 401
376
+ end
377
+ else
378
+ res['WWW-Authenticate'] = 'NTLM'
379
+ res.status = 401
380
+ end
381
+ end
382
+
383
+ end
384
+
385
+ class OneCookieNoSpacesServlet < WEBrick::HTTPServlet::AbstractServlet
386
+ def do_GET(req, res)
387
+ cookie = WEBrick::Cookie.new("foo", "bar")
388
+ cookie.path = "/"
389
+ cookie.expires = Time.now + 86400
390
+ res.cookies << cookie.to_s.gsub(/; /, ';')
391
+ res['Content-Type'] = "text/html"
392
+ res.body = "<html><body>hello</body></html>"
393
+ end
394
+ end
395
+
396
+ class OneCookieServlet < WEBrick::HTTPServlet::AbstractServlet
397
+ def do_GET(req, res)
398
+ cookie = WEBrick::Cookie.new("foo", "bar")
399
+ cookie.path = "/"
400
+ cookie.expires = Time.now + 86400
401
+ res.cookies << cookie
402
+ res['Content-Type'] = "text/html"
403
+ res.body = "<html><body>hello</body></html>"
404
+ end
405
+ end
406
+
407
+ class QuotedValueCookieServlet < WEBrick::HTTPServlet::AbstractServlet
408
+ def do_GET(req, res)
409
+ cookie = WEBrick::Cookie.new("quoted", "\"value\"")
410
+ cookie.path = "/"
411
+ cookie.expires = Time.now + 86400
412
+ res.cookies << cookie
413
+ res['Content-Type'] = "text/html"
414
+ res.body = "<html><body>hello</body></html>"
415
+ end
416
+ end
417
+
418
+ class RedirectServlet < WEBrick::HTTPServlet::AbstractServlet
419
+ def do_GET(req, res)
420
+ res['Content-Type'] = req.query['ct'] || "text/html"
421
+ res.status = req.query['code'] ? req.query['code'].to_i : '302'
422
+ res['Location'] = "/verb"
423
+ end
424
+
425
+ alias :do_POST :do_GET
426
+ alias :do_HEAD :do_GET
427
+ alias :do_PUT :do_GET
428
+ alias :do_DELETE :do_GET
429
+ end
430
+
431
+ class RefererServlet < WEBrick::HTTPServlet::AbstractServlet
432
+ def do_GET(req, res)
433
+ res['Content-Type'] = "text/html"
434
+ res.body = req['Referer'] || ''
435
+ end
436
+
437
+ def do_POST(req, res)
438
+ res['Content-Type'] = "text/html"
439
+ res.body = req['Referer'] || ''
440
+ end
441
+ end
442
+
443
+ class RefreshWithoutUrl < WEBrick::HTTPServlet::AbstractServlet
444
+ @@count = 0
445
+ def do_GET(req, res)
446
+ res['Content-Type'] = "text/html"
447
+ @@count += 1
448
+ if @@count > 1
449
+ res['Refresh'] = "0; url=http://localhost/index.html";
450
+ else
451
+ res['Refresh'] = "0";
452
+ end
453
+ end
454
+ end
455
+
456
+ class RefreshWithEmptyUrl < WEBrick::HTTPServlet::AbstractServlet
457
+ @@count = 0
458
+ def do_GET(req, res)
459
+ res['Content-Type'] = "text/html"
460
+ @@count += 1
461
+ if @@count > 1
462
+ res['Refresh'] = "0; url=http://localhost/index.html";
463
+ else
464
+ res['Refresh'] = "0; url=";
465
+ end
466
+ end
467
+ end
468
+
469
+ class ResponseCodeServlet < WEBrick::HTTPServlet::AbstractServlet
470
+ def do_GET(req, res)
471
+ res['Content-Type'] = req.query['ct'] || "text/html"
472
+ if req.query['code']
473
+ code = req.query['code'].to_i
474
+ case code
475
+ when 300, 301, 302, 303, 304, 305, 307
476
+ res['Location'] = "/index.html"
477
+ end
478
+ res.status = code
479
+ else
480
+ end
481
+ end
482
+ end
483
+
484
+ class SendCookiesServlet < WEBrick::HTTPServlet::AbstractServlet
485
+ def do_GET(req, res)
486
+ res['Content-Type'] = "text/html"
487
+ res.body = "<html><body>"
488
+ req.cookies.each { |c|
489
+ res.body << "<a href=\"#\">#{c.name}:#{c.value}</a>"
490
+ }
491
+ res.body << "</body></html>"
492
+ end
493
+ end
494
+
495
+ class VerbServlet < WEBrick::HTTPServlet::AbstractServlet
496
+ %w(HEAD GET POST PUT DELETE).each do |verb|
497
+ eval(<<-eomethod)
498
+ def do_#{verb}(req, res)
499
+ res.header['X-Request-Method'] = #{verb.dump}
500
+ end
501
+ eomethod
502
+ end
503
+ end
504
+
505
+ class Net::HTTP
506
+ alias :old_do_start :do_start
507
+
508
+ def do_start
509
+ @started = true
510
+ end
511
+
512
+ SERVLETS = {
513
+ '/gzip' => GzipServlet,
514
+ '/form_post' => FormServlet,
515
+ '/basic_auth' => BasicAuthServlet,
516
+ '/form post' => FormServlet,
517
+ '/response_code' => ResponseCodeServlet,
518
+ '/http_refresh' => HttpRefreshServlet,
519
+ '/content_type_test' => ContentTypeServlet,
520
+ '/referer' => RefererServlet,
521
+ '/file_upload' => FileUploadServlet,
522
+ '/one_cookie' => OneCookieServlet,
523
+ '/one_cookie_no_space' => OneCookieNoSpacesServlet,
524
+ '/many_cookies' => ManyCookiesServlet,
525
+ '/many_cookies_as_string' => ManyCookiesAsStringServlet,
526
+ '/ntlm' => NTLMServlet,
527
+ '/send_cookies' => SendCookiesServlet,
528
+ '/quoted_value_cookie' => QuotedValueCookieServlet,
529
+ '/if_modified_since' => ModifiedSinceServlet,
530
+ '/http_headers' => HeaderServlet,
531
+ '/infinite_redirect' => InfiniteRedirectServlet,
532
+ '/infinite_refresh' => InfiniteRefreshServlet,
533
+ '/redirect' => RedirectServlet,
534
+ '/refresh_without_url' => RefreshWithoutUrl,
535
+ '/refresh_with_empty_url' => RefreshWithEmptyUrl,
536
+ '/digest_auth' => DigestAuthServlet,
537
+ '/verb' => VerbServlet,
538
+ }
539
+
540
+ PAGE_CACHE = {}
541
+
542
+ alias :old_request :request
543
+
544
+ def request(req, *data, &block)
545
+ url = URI.parse(req.path)
546
+ path = WEBrick::HTTPUtils.unescape(url.path)
547
+
548
+ path = '/index.html' if path == '/'
549
+
550
+ res = ::Response.new
551
+ res.query_params = url.query
552
+
553
+ req.query = if 'POST' != req.method && url.query then
554
+ WEBrick::HTTPUtils.parse_query url.query
555
+ elsif req['content-type'] =~ /www-form-urlencoded/ then
556
+ WEBrick::HTTPUtils.parse_query req.body
557
+ elsif req['content-type'] =~ /boundary=(.+)/ then
558
+ boundary = WEBrick::HTTPUtils.dequote $1
559
+ WEBrick::HTTPUtils.parse_form_data req.body, boundary
560
+ else
561
+ {}
562
+ end
563
+
564
+ req.cookies = WEBrick::Cookie.parse(req['Cookie'])
565
+
566
+ Mechanize::TestCase::REQUESTS << req
567
+
568
+ if servlet_klass = SERVLETS[path]
569
+ servlet = servlet_klass.new({})
570
+ servlet.send "do_#{req.method}", req, res
571
+ else
572
+ filename = "htdocs#{path.gsub(/[^\/\\.\w\s]/, '_')}"
573
+ unless PAGE_CACHE[filename]
574
+ open("#{Mechanize::TestCase::TEST_DIR}/#{filename}", 'rb') { |io|
575
+ PAGE_CACHE[filename] = io.read
576
+ }
577
+ end
578
+
579
+ res.body = PAGE_CACHE[filename]
580
+ case filename
581
+ when /\.txt$/
582
+ res['Content-Type'] = 'text/plain'
583
+ when /\.jpg$/
584
+ res['Content-Type'] = 'image/jpeg'
585
+ end
586
+ end
587
+
588
+ res['Content-Type'] ||= 'text/html'
589
+ res.code ||= "200"
590
+
591
+ response_klass = Net::HTTPResponse::CODE_TO_OBJ[res.code.to_s]
592
+ response = response_klass.new res.http_version, res.code, res.message
593
+
594
+ res.header.each do |k,v|
595
+ v = v.first if v.length == 1
596
+ response[k] = v
597
+ end
598
+
599
+ res.cookies.each do |cookie|
600
+ response.add_field 'Set-Cookie', cookie.to_s
601
+ end
602
+
603
+ response['Content-Type'] ||= 'text/html'
604
+ response['Content-Length'] = res['Content-Length'] || res.body.length.to_s
605
+
606
+ io = StringIO.new(res.body)
607
+ response.instance_variable_set :@socket, io
608
+ def io.read clen, dest, _
609
+ dest << string[0, clen]
610
+ end
611
+
612
+ body_exist = req.response_body_permitted? &&
613
+ response_klass.body_permitted?
614
+
615
+ response.instance_variable_set :@body_exist, body_exist
616
+
617
+ yield response if block_given?
618
+
619
+ response
620
+ end
621
+ end
622
+
623
+ class Net::HTTPRequest
624
+ attr_accessor :query, :body, :cookies, :user
625
+ end
626
+
627
+ class Response
628
+ include Net::HTTPHeader
629
+
630
+ attr_reader :code
631
+ attr_accessor :body, :query, :cookies
632
+ attr_accessor :query_params, :http_version
633
+ attr_accessor :header
634
+
635
+ def code=(c)
636
+ @code = c.to_s
637
+ end
638
+
639
+ alias :status :code
640
+ alias :status= :code=
641
+
642
+ def initialize
643
+ @header = {}
644
+ @body = ''
645
+ @code = nil
646
+ @query = nil
647
+ @cookies = []
648
+ @http_version = '1.1'
649
+ end
650
+
651
+ def read_body
652
+ yield body
653
+ end
654
+
655
+ def message
656
+ ''
657
+ end
658
+ end
659
+