mechanize 2.0.1 → 2.1.pre.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (148) hide show
  1. data.tar.gz.sig +0 -0
  2. data/CHANGELOG.rdoc +82 -0
  3. data/EXAMPLES.rdoc +1 -1
  4. data/FAQ.rdoc +9 -9
  5. data/Manifest.txt +35 -48
  6. data/README.rdoc +2 -1
  7. data/Rakefile +16 -3
  8. data/lib/mechanize.rb +809 -392
  9. data/lib/mechanize/content_type_error.rb +10 -11
  10. data/lib/mechanize/cookie.rb +193 -60
  11. data/lib/mechanize/cookie_jar.rb +39 -86
  12. data/lib/mechanize/download.rb +59 -0
  13. data/lib/mechanize/element_matcher.rb +1 -0
  14. data/lib/mechanize/file.rb +61 -76
  15. data/lib/mechanize/file_saver.rb +37 -35
  16. data/lib/mechanize/form.rb +475 -410
  17. data/lib/mechanize/form/button.rb +4 -7
  18. data/lib/mechanize/form/check_box.rb +10 -9
  19. data/lib/mechanize/form/field.rb +52 -42
  20. data/lib/mechanize/form/file_upload.rb +17 -19
  21. data/lib/mechanize/form/hidden.rb +3 -0
  22. data/lib/mechanize/form/image_button.rb +15 -16
  23. data/lib/mechanize/form/keygen.rb +34 -0
  24. data/lib/mechanize/form/multi_select_list.rb +20 -9
  25. data/lib/mechanize/form/option.rb +48 -47
  26. data/lib/mechanize/form/radio_button.rb +52 -45
  27. data/lib/mechanize/form/reset.rb +3 -0
  28. data/lib/mechanize/form/select_list.rb +10 -6
  29. data/lib/mechanize/form/submit.rb +3 -0
  30. data/lib/mechanize/form/text.rb +3 -0
  31. data/lib/mechanize/form/textarea.rb +3 -0
  32. data/lib/mechanize/headers.rb +17 -19
  33. data/lib/mechanize/history.rb +60 -61
  34. data/lib/mechanize/http.rb +5 -0
  35. data/lib/mechanize/http/agent.rb +485 -218
  36. data/lib/mechanize/http/auth_challenge.rb +59 -0
  37. data/lib/mechanize/http/auth_realm.rb +31 -0
  38. data/lib/mechanize/http/content_disposition_parser.rb +188 -0
  39. data/lib/mechanize/http/www_authenticate_parser.rb +155 -0
  40. data/lib/mechanize/monkey_patch.rb +14 -35
  41. data/lib/mechanize/page.rb +34 -2
  42. data/lib/mechanize/page/base.rb +6 -7
  43. data/lib/mechanize/page/frame.rb +5 -5
  44. data/lib/mechanize/page/image.rb +23 -23
  45. data/lib/mechanize/page/label.rb +16 -16
  46. data/lib/mechanize/page/link.rb +16 -0
  47. data/lib/mechanize/page/meta_refresh.rb +19 -7
  48. data/lib/mechanize/parser.rb +173 -0
  49. data/lib/mechanize/pluggable_parsers.rb +126 -83
  50. data/lib/mechanize/redirect_limit_reached_error.rb +16 -13
  51. data/lib/mechanize/redirect_not_get_or_head_error.rb +18 -16
  52. data/lib/mechanize/response_code_error.rb +16 -17
  53. data/lib/mechanize/robots_disallowed_error.rb +22 -23
  54. data/lib/mechanize/test_case.rb +659 -0
  55. data/lib/mechanize/unauthorized_error.rb +3 -0
  56. data/lib/mechanize/unsupported_scheme_error.rb +4 -6
  57. data/lib/mechanize/util.rb +0 -12
  58. data/test/htdocs/form_order_test.html +11 -0
  59. data/test/htdocs/form_test.html +2 -2
  60. data/test/htdocs/tc_links.html +1 -0
  61. data/test/test_mechanize.rb +367 -59
  62. data/test/test_mechanize_cookie.rb +69 -4
  63. data/test/test_mechanize_cookie_jar.rb +200 -124
  64. data/test/test_mechanize_download.rb +43 -0
  65. data/test/test_mechanize_file.rb +53 -45
  66. data/test/{test_mechanize_file_response.rb → test_mechanize_file_connection.rb} +2 -2
  67. data/test/test_mechanize_file_request.rb +2 -2
  68. data/test/test_mechanize_file_saver.rb +21 -0
  69. data/test/test_mechanize_form.rb +345 -46
  70. data/test/test_mechanize_form_check_box.rb +5 -4
  71. data/test/test_mechanize_form_encoding.rb +10 -16
  72. data/test/test_mechanize_form_field.rb +45 -3
  73. data/test/test_mechanize_form_file_upload.rb +20 -0
  74. data/test/test_mechanize_form_image_button.rb +2 -2
  75. data/test/test_mechanize_form_keygen.rb +32 -0
  76. data/test/test_mechanize_form_multi_select_list.rb +84 -0
  77. data/test/test_mechanize_form_option.rb +55 -0
  78. data/test/test_mechanize_form_radio_button.rb +78 -0
  79. data/test/test_mechanize_form_select_list.rb +76 -0
  80. data/test/test_mechanize_form_textarea.rb +8 -7
  81. data/test/{test_headers.rb → test_mechanize_headers.rb} +4 -2
  82. data/test/test_mechanize_history.rb +103 -0
  83. data/test/test_mechanize_http_agent.rb +525 -17
  84. data/test/test_mechanize_http_auth_challenge.rb +39 -0
  85. data/test/test_mechanize_http_auth_realm.rb +49 -0
  86. data/test/test_mechanize_http_content_disposition_parser.rb +118 -0
  87. data/test/test_mechanize_http_www_authenticate_parser.rb +146 -0
  88. data/test/test_mechanize_link.rb +10 -14
  89. data/test/test_mechanize_page.rb +118 -0
  90. data/test/test_mechanize_page_encoding.rb +48 -13
  91. data/test/test_mechanize_page_frame.rb +16 -0
  92. data/test/test_mechanize_page_link.rb +27 -19
  93. data/test/test_mechanize_page_meta_refresh.rb +26 -14
  94. data/test/test_mechanize_parser.rb +289 -0
  95. data/test/test_mechanize_pluggable_parser.rb +52 -0
  96. data/test/test_mechanize_redirect_limit_reached_error.rb +24 -0
  97. data/test/test_mechanize_redirect_not_get_or_head_error.rb +3 -7
  98. data/test/test_mechanize_subclass.rb +2 -2
  99. data/test/test_mechanize_util.rb +24 -13
  100. data/test/test_multi_select.rb +23 -22
  101. metadata +145 -114
  102. metadata.gz.sig +0 -0
  103. data/lib/mechanize/inspect.rb +0 -88
  104. data/test/helper.rb +0 -175
  105. data/test/htdocs/form_select_all.html +0 -16
  106. data/test/htdocs/form_select_none.html +0 -17
  107. data/test/htdocs/form_select_noopts.html +0 -10
  108. data/test/htdocs/iframe_test.html +0 -16
  109. data/test/htdocs/nofollow.html +0 -9
  110. data/test/htdocs/norobots.html +0 -8
  111. data/test/htdocs/rel_nofollow.html +0 -8
  112. data/test/htdocs/tc_base_images.html +0 -10
  113. data/test/htdocs/tc_images.html +0 -8
  114. data/test/htdocs/tc_no_attributes.html +0 -16
  115. data/test/htdocs/tc_radiobuttons.html +0 -17
  116. data/test/htdocs/test_bad_encoding.html +0 -52
  117. data/test/servlets.rb +0 -402
  118. data/test/ssl_server.rb +0 -48
  119. data/test/test_cookies.rb +0 -129
  120. data/test/test_form_action.rb +0 -52
  121. data/test/test_form_as_hash.rb +0 -59
  122. data/test/test_form_button.rb +0 -46
  123. data/test/test_frames.rb +0 -34
  124. data/test/test_history.rb +0 -118
  125. data/test/test_history_added.rb +0 -16
  126. data/test/test_html_unscape_forms.rb +0 -46
  127. data/test/test_if_modified_since.rb +0 -20
  128. data/test/test_images.rb +0 -19
  129. data/test/test_no_attributes.rb +0 -13
  130. data/test/test_option.rb +0 -18
  131. data/test/test_pluggable_parser.rb +0 -136
  132. data/test/test_post_form.rb +0 -37
  133. data/test/test_pretty_print.rb +0 -22
  134. data/test/test_radiobutton.rb +0 -75
  135. data/test/test_redirect_limit_reached.rb +0 -39
  136. data/test/test_referer.rb +0 -81
  137. data/test/test_relative_links.rb +0 -40
  138. data/test/test_request.rb +0 -13
  139. data/test/test_response_code.rb +0 -53
  140. data/test/test_robots.rb +0 -72
  141. data/test/test_save_file.rb +0 -48
  142. data/test/test_scheme.rb +0 -48
  143. data/test/test_select.rb +0 -119
  144. data/test/test_select_all.rb +0 -15
  145. data/test/test_select_none.rb +0 -15
  146. data/test/test_select_noopts.rb +0 -18
  147. data/test/test_set_fields.rb +0 -44
  148. data/test/test_ssl_server.rb +0 -20
@@ -0,0 +1,3 @@
1
+ class Mechanize::Form::Reset < Mechanize::Form::Button
2
+ end
3
+
@@ -1,10 +1,13 @@
1
- # This class represents a select list or drop down box in a Form. Set the
2
- # value for the list by calling SelectList#value=. SelectList contains a
3
- # list of Option that were found. After finding the correct option, set
4
- # the select lists value to the option value:
5
- # selectlist.value = selectlist.options.first.value
6
- # Options can also be selected by "clicking" or selecting them. See Option
1
+ # This class represents a select list or drop down box in a Form. Set the
2
+ # value for the list by calling SelectList#value=. SelectList contains a list
3
+ # of Option that were found. After finding the correct option, set the select
4
+ # lists value to the option value:
5
+ #
6
+ # selectlist.value = selectlist.options.first.value
7
+ #
8
+ # Options can also be selected by "clicking" or selecting them. See Option
7
9
  class Mechanize::Form::SelectList < Mechanize::Form::MultiSelectList
10
+
8
11
  def initialize node
9
12
  super
10
13
  if selected_options.length > 1
@@ -36,5 +39,6 @@ class Mechanize::Form::SelectList < Mechanize::Form::MultiSelectList
36
39
  def query_value
37
40
  value ? [[name, value]] : nil
38
41
  end
42
+
39
43
  end
40
44
 
@@ -0,0 +1,3 @@
1
+ class Mechanize::Form::Submit < Mechanize::Form::Button
2
+ end
3
+
@@ -0,0 +1,3 @@
1
+ class Mechanize::Form::Text < Mechanize::Form::Field
2
+ end
3
+
@@ -0,0 +1,3 @@
1
+ class Mechanize::Form::Textarea < Mechanize::Form::Field
2
+ end
3
+
@@ -1,25 +1,23 @@
1
- class Mechanize
2
- class Headers < Hash
3
- def [](key)
4
- super(key.downcase)
5
- end
1
+ class Mechanize::Headers < Hash
2
+ def [](key)
3
+ super(key.downcase)
4
+ end
6
5
 
7
- def []=(key, value)
8
- super(key.downcase, value)
9
- end
6
+ def []=(key, value)
7
+ super(key.downcase, value)
8
+ end
10
9
 
11
- def key?(key)
12
- super(key.downcase)
13
- end
10
+ def key?(key)
11
+ super(key.downcase)
12
+ end
14
13
 
15
- def canonical_each
16
- block_given? or return enum_for(__method__)
17
- each { |key, value|
18
- key = key.capitalize
19
- key.gsub!(/-([a-z])/) { "-#{$1.upcase}" }
20
- yield [key, value]
21
- }
22
- end
14
+ def canonical_each
15
+ block_given? or return enum_for(__method__)
16
+ each { |key, value|
17
+ key = key.capitalize
18
+ key.gsub!(/-([a-z])/) { "-#{$1.upcase}" }
19
+ yield [key, value]
20
+ }
23
21
  end
24
22
  end
25
23
 
@@ -1,83 +1,82 @@
1
- class Mechanize
2
- ##
3
- # This class manages history for your mechanize object.
4
- class History < Array
5
- attr_accessor :max_size
6
-
7
- def initialize(max_size = nil)
8
- @max_size = max_size
9
- @history_index = {}
10
- end
1
+ ##
2
+ # This class manages history for your mechanize object.
11
3
 
12
- def initialize_copy(orig)
13
- super
14
- @history_index = orig.instance_variable_get(:@history_index).dup
15
- end
4
+ class Mechanize::History < Array
16
5
 
17
- def inspect # :nodoc:
18
- uris = map { |page| page.uri }.join ', '
6
+ attr_accessor :max_size
19
7
 
20
- "[#{uris}]"
21
- end
8
+ def initialize(max_size = nil)
9
+ @max_size = max_size
10
+ @history_index = {}
11
+ end
22
12
 
23
- def push(page, uri = nil)
24
- super(page)
13
+ def initialize_copy(orig)
14
+ super
15
+ @history_index = orig.instance_variable_get(:@history_index).dup
16
+ end
25
17
 
26
- @history_index[(uri ? uri : page.uri).to_s] = page
18
+ def inspect # :nodoc:
19
+ uris = map { |page| page.uri }.join ', '
27
20
 
28
- if @max_size && self.length > @max_size
29
- while self.length > @max_size
30
- self.shift
31
- end
32
- end
21
+ "[#{uris}]"
22
+ end
33
23
 
34
- self
35
- end
36
- alias :<< :push
24
+ def push(page, uri = nil)
25
+ super page
37
26
 
38
- def visited_page(uri)
39
- page = @history_index[uri.to_s]
27
+ index = uri ? uri : page.uri
28
+ @history_index[index.to_s] = page
40
29
 
41
- return page if page # HACK
30
+ shift while length > @max_size if @max_size
42
31
 
43
- uri = uri.dup
44
- uri.path = '/' if uri.path.empty?
32
+ self
33
+ end
45
34
 
46
- @history_index[uri.to_s]
47
- end
35
+ alias :<< :push
48
36
 
49
- alias visited? visited_page
37
+ def visited? uri
38
+ page = @history_index[uri.to_s]
50
39
 
51
- def clear
52
- @history_index.clear
53
- super
54
- end
40
+ return page if page # HACK
55
41
 
56
- def shift
57
- return nil if length == 0
58
- page = self[0]
59
- self[0] = nil
42
+ uri = uri.dup
43
+ uri.path = '/' if uri.path.empty?
60
44
 
61
- super
45
+ @history_index[uri.to_s]
46
+ end
62
47
 
63
- remove_from_index(page)
64
- page
65
- end
48
+ alias visited_page visited?
66
49
 
67
- def pop
68
- return nil if length == 0
69
- page = super
70
- remove_from_index(page)
71
- page
72
- end
50
+ def clear
51
+ @history_index.clear
52
+ super
53
+ end
73
54
 
74
- private
55
+ def shift
56
+ return nil if length == 0
57
+ page = self[0]
58
+ self[0] = nil
75
59
 
76
- def remove_from_index(page)
77
- @history_index.each do |k,v|
78
- @history_index.delete(k) if v == page
79
- end
80
- end
60
+ super
81
61
 
62
+ remove_from_index(page)
63
+ page
82
64
  end
65
+
66
+ def pop
67
+ return nil if length == 0
68
+ page = super
69
+ remove_from_index(page)
70
+ page
71
+ end
72
+
73
+ private
74
+
75
+ def remove_from_index(page)
76
+ @history_index.each do |k,v|
77
+ @history_index.delete(k) if v == page
78
+ end
79
+ end
80
+
83
81
  end
82
+
@@ -1,3 +1,8 @@
1
+ ##
2
+ # Mechanize::HTTP contains classes for communicated with HTTP servers. All
3
+ # API under this namespace is considered private and is subject to change at
4
+ # any time.
5
+
1
6
  class Mechanize::HTTP
2
7
  end
3
8
 
@@ -1,38 +1,63 @@
1
+ require 'tempfile'
2
+ require 'net/ntlm'
3
+ require 'kconv'
4
+ require 'webrobots'
5
+
1
6
  ##
2
- # An HTTP (and local disk access) user agent
7
+ # An HTTP (and local disk access) user agent. This class is an implementation
8
+ # detail and is subject to change at any time.
3
9
 
4
10
  class Mechanize::HTTP::Agent
5
11
 
6
- attr_reader :cookie_jar
12
+ # :section: Headers
7
13
 
8
14
  # Disables If-Modified-Since conditional requests (enabled by default)
9
15
  attr_accessor :conditional_requests
10
- attr_accessor :context
11
16
 
12
- # Follow HTML meta refresh. If set to +:anywhere+ meta refresh tags outside
13
- # of the head element will be followed.
14
- attr_accessor :follow_meta_refresh
17
+ # Is gzip compression of requests enabled?
15
18
  attr_accessor :gzip_enabled
16
- attr_accessor :history
17
19
 
18
- # Length of time to wait until a connection is opened in seconds
19
- attr_accessor :open_timeout
20
+ # A hash of request headers to be used for every request
21
+ attr_accessor :request_headers
20
22
 
21
- attr_accessor :password
22
- attr_reader :proxy_uri
23
+ # The User-Agent header to send
24
+ attr_reader :user_agent
25
+
26
+ # :section: History
27
+
28
+ # history of requests made
29
+ attr_accessor :history
30
+
31
+ # :section: Hooks
23
32
 
24
33
  # A list of hooks to call after retrieving a response. Hooks are called with
25
34
  # the agent and the response returned.
26
-
27
35
  attr_reader :post_connect_hooks
28
36
 
29
37
  # A list of hooks to call before making a request. Hooks are called with
30
38
  # the agent and the request to be performed.
31
-
32
39
  attr_reader :pre_connect_hooks
33
40
 
34
- # Length of time to attempt to read data from the server
35
- attr_accessor :read_timeout
41
+ # A list of hooks to call to handle the content-encoding of a request.
42
+ attr_reader :content_encoding_hooks
43
+
44
+ # :section: HTTP Authentication
45
+
46
+ attr_reader :authenticate_methods # :nodoc:
47
+ attr_reader :digest_challenges # :nodoc:
48
+ attr_accessor :user
49
+ attr_accessor :password
50
+
51
+ # :section: Redirection
52
+
53
+ # Follow HTML meta refresh and HTTP Refresh. If set to +:anywhere+ meta
54
+ # refresh tags outside of the head element will be followed.
55
+ attr_accessor :follow_meta_refresh
56
+
57
+ # Follow an HTML meta refresh that has no "url=" in the content attribute.
58
+ #
59
+ # Defaults to false to prevent infinite refresh loops.
60
+ attr_accessor :follow_meta_refresh_self
36
61
 
37
62
  # Controls how this agent deals with redirects. The following values are
38
63
  # allowed:
@@ -40,22 +65,17 @@ class Mechanize::HTTP::Agent
40
65
  # :all, true:: All 3xx redirects are followed (default)
41
66
  # :permanent:: Only 301 Moved Permanantly redirects are followed
42
67
  # false:: No redirects are followed
43
-
44
68
  attr_accessor :redirect_ok
45
- attr_accessor :redirection_limit
46
69
 
47
- # A hash of request headers to be used
70
+ # Maximum number of redirects to follow
71
+ attr_accessor :redirection_limit
48
72
 
49
- attr_accessor :request_headers
73
+ # :section: Robots
50
74
 
51
75
  # When true, this agent will consult the site's robots.txt for each access.
52
-
53
76
  attr_reader :robots
54
77
 
55
- attr_accessor :scheme_handlers
56
-
57
- attr_accessor :user
58
- attr_reader :user_agent
78
+ # :section: SSL
59
79
 
60
80
  # Path to an OpenSSL server certificate file
61
81
  attr_accessor :ca_file
@@ -66,6 +86,9 @@ class Mechanize::HTTP::Agent
66
86
  # An OpenSSL client certificate or the path to a certificate file.
67
87
  attr_accessor :cert
68
88
 
89
+ # An SSL certificate store
90
+ attr_accessor :cert_store
91
+
69
92
  # OpenSSL key password
70
93
  attr_accessor :pass
71
94
 
@@ -77,38 +100,99 @@ class Mechanize::HTTP::Agent
77
100
  # when the SSLContext was created
78
101
  attr_accessor :verify_callback
79
102
 
103
+ # How to verify SSL connections. Defaults to VERIFY_PEER
104
+ attr_accessor :verify_mode
105
+
106
+ # :section: Timeouts
107
+
108
+ # Reset connections that have not been used in this many seconds
109
+ attr_reader :idle_timeout
110
+
111
+ # Set to false to disable HTTP/1.1 keep-alive requests
112
+ attr_accessor :keep_alive
113
+
114
+ # Length of time to wait until a connection is opened in seconds
115
+ attr_accessor :open_timeout
116
+
117
+ # Length of time to attempt to read data from the server
118
+ attr_accessor :read_timeout
119
+
120
+ # :section:
121
+
122
+ # The cookies for this agent
123
+ attr_accessor :cookie_jar
124
+
125
+ # URI for a proxy connection
126
+ attr_reader :proxy_uri
127
+
128
+ # Retry non-idempotent requests?
129
+ attr_reader :retry_change_requests
130
+
131
+ # Responses larger than this will be written to a Tempfile instead of stored
132
+ # in memory.
133
+ attr_accessor :max_file_buffer
134
+
135
+ # :section: Utility
136
+
137
+ # The context parses responses into pages
138
+ attr_accessor :context
139
+
80
140
  attr_reader :http # :nodoc:
81
141
 
142
+ # Handlers for various URI schemes
143
+ attr_accessor :scheme_handlers
144
+
145
+ # :section:
146
+
147
+ # Creates a new Mechanize HTTP user agent. The user agent is an
148
+ # implementation detail of mechanize and its API may change at any time.
149
+
82
150
  def initialize
83
- @auth_hash = {} # Keep track of urls for sending auth
84
- @conditional_requests = true
85
- @context = nil
86
- @cookie_jar = Mechanize::CookieJar.new
87
- @digest = nil # DigestAuth Digest
151
+ @conditional_requests = true
152
+ @context = nil
153
+ @content_encoding_hooks = []
154
+ @cookie_jar = Mechanize::CookieJar.new
155
+ @follow_meta_refresh = false
156
+ @follow_meta_refresh_self = false
157
+ @gzip_enabled = true
158
+ @history = Mechanize::History.new
159
+ @idle_timeout = nil
160
+ @keep_alive = true
161
+ @keep_alive_time = 300
162
+ @max_file_buffer = 10240
163
+ @open_timeout = nil
164
+ @post_connect_hooks = []
165
+ @pre_connect_hooks = []
166
+ @proxy_uri = nil
167
+ @read_timeout = nil
168
+ @redirect_ok = true
169
+ @redirection_limit = 20
170
+ @request_headers = {}
171
+ @retry_change_requests = false
172
+ @robots = false
173
+ @user_agent = nil
174
+ @webrobots = nil
175
+
176
+ # HTTP Authentication
177
+ @authenticate_parser = Mechanize::HTTP::WWWAuthenticateParser.new
178
+ @authenticate_methods = Hash.new do |methods, uri|
179
+ methods[uri] = Hash.new do |realms, auth_scheme|
180
+ realms[auth_scheme] = []
181
+ end
182
+ end
88
183
  @digest_auth = Net::HTTP::DigestAuth.new
89
- @follow_meta_refresh = false
90
- @gzip_enabled = true
91
- @history = Mechanize::History.new
92
- @keep_alive_time = 300
93
- @open_timeout = nil
184
+ @digest_challenges = {}
94
185
  @password = nil # HTTP auth password
95
- @post_connect_hooks = []
96
- @pre_connect_hooks = []
97
- @proxy_uri = nil
98
- @read_timeout = nil
99
- @redirect_ok = true
100
- @redirection_limit = 20
101
- @request_headers = {}
102
- @robots = false
103
186
  @user = nil # HTTP auth user
104
- @user_agent = nil
105
- @webrobots = nil
106
187
 
107
- @ca_file = nil # OpenSSL server certificate file
108
- @cert = nil # OpenSSL Certificate
109
- @key = nil # OpenSSL Private Key
110
- @pass = nil # OpenSSL Password
188
+ # SSL
189
+ @ca_file = nil
190
+ @cert = nil
191
+ @cert_store = nil
192
+ @key = nil
193
+ @pass = nil
111
194
  @verify_callback = nil
195
+ @verify_mode = nil
112
196
 
113
197
  @scheme_handlers = Hash.new { |h, scheme|
114
198
  h[scheme] = lambda { |link, page|
@@ -122,41 +206,14 @@ class Mechanize::HTTP::Agent
122
206
  @scheme_handlers['file'] = @scheme_handlers['http']
123
207
  end
124
208
 
125
- # Equivalent to the browser back button. Returns the most recent page
126
- # visited.
127
- def back
128
- @history.pop
129
- end
130
-
131
- def certificate
132
- @http.certificate
133
- end
134
-
135
- def connection_for uri
136
- case uri.scheme.downcase
137
- when 'http', 'https' then
138
- return @http
139
- when 'file' then
140
- return Mechanize::FileConnection.new
141
- end
142
- end
143
-
144
- ##
145
- # Returns the latest page loaded by the agent
146
-
147
- def current_page
148
- @history.last
149
- end
150
-
151
- def enable_gzip request
152
- request['accept-encoding'] = if @gzip_enabled
153
- 'gzip,deflate,identity'
154
- else
155
- 'identity'
156
- end
157
- end
209
+ # Retrieves +uri+ and parses it into a page or other object according to
210
+ # PluggableParser. If the URI is an HTTP or HTTPS scheme URI the given HTTP
211
+ # +method+ is used to retrieve it, along with the HTTP +headers+, request
212
+ # +params+ and HTTP +referer+.
213
+ #
214
+ # +redirects+ tracks the number of redirects experienced when retrieving the
215
+ # page. If it is over the redirection_limit an error will be raised.
158
216
 
159
- # uri is an absolute URI
160
217
  def fetch uri, method = :get, headers = {}, params = [],
161
218
  referer = current_page, redirects = 0
162
219
  referer_uri = referer ? referer.uri : nil
@@ -169,18 +226,19 @@ class Mechanize::HTTP::Agent
169
226
 
170
227
  connection = connection_for uri
171
228
 
172
- request_auth request, uri
229
+ request_auth request, uri
173
230
 
174
- enable_gzip request
231
+ disable_keep_alive request
232
+ enable_gzip request
175
233
 
176
234
  request_language_charset request
177
- request_cookies request, uri
178
- request_host request, uri
179
- request_referer request, uri, referer_uri
180
- request_user_agent request
181
- request_add_headers request, headers
235
+ request_cookies request, uri
236
+ request_host request, uri
237
+ request_referer request, uri, referer_uri
238
+ request_user_agent request
239
+ request_add_headers request, headers
182
240
 
183
- pre_connect request
241
+ pre_connect request
184
242
 
185
243
  # Consult robots.txt
186
244
  if robots && uri.is_a?(URI::HTTP)
@@ -188,6 +246,8 @@ class Mechanize::HTTP::Agent
188
246
  end
189
247
 
190
248
  # Add If-Modified-Since if page is in history
249
+ page = visited_page(uri)
250
+
191
251
  if (page = visited_page(uri)) and page.response['Last-Modified']
192
252
  request['If-Modified-Since'] = page.response['Last-Modified']
193
253
  end if(@conditional_requests)
@@ -209,11 +269,13 @@ class Mechanize::HTTP::Agent
209
269
  res
210
270
  }
211
271
 
212
- response_body = response_content_encoding response, response_body_io
272
+ hook_content_encoding response, uri, response_body_io
213
273
 
214
- post_connect uri, response, response_body
274
+ response_body_io = response_content_encoding response, response_body_io
215
275
 
216
- page = response_parse response, response_body, uri
276
+ post_connect uri, response, response_body_io
277
+
278
+ page = response_parse response, response_body_io, uri
217
279
 
218
280
  response_cookies response, uri, page
219
281
 
@@ -233,7 +295,7 @@ class Mechanize::HTTP::Agent
233
295
  log.debug("Got cached page") if log
234
296
  visited_page(uri) || page
235
297
  when Net::HTTPRedirection
236
- response_redirect response, method, page, redirects
298
+ response_redirect response, method, page, redirects, referer
237
299
  when Net::HTTPUnauthorized
238
300
  response_authenticate(response, page, uri, request, headers, params,
239
301
  referer)
@@ -242,6 +304,35 @@ class Mechanize::HTTP::Agent
242
304
  end
243
305
  end
244
306
 
307
+ # Retry non-idempotent requests
308
+
309
+ def retry_change_requests= retri
310
+ @retry_change_requests = retri
311
+ @http.retry_change_requests = retri if @http
312
+ end
313
+
314
+ # :section: Headers
315
+
316
+ def user_agent= user_agent
317
+ @webrobots = nil if user_agent != @user_agent
318
+ @user_agent = user_agent
319
+ end
320
+
321
+ # :section: History
322
+
323
+ # Equivalent to the browser back button. Returns the most recent page
324
+ # visited.
325
+ def back
326
+ @history.pop
327
+ end
328
+
329
+ ##
330
+ # Returns the latest page loaded by the agent
331
+
332
+ def current_page
333
+ @history.last
334
+ end
335
+
245
336
  def max_history
246
337
  @history.max_size
247
338
  end
@@ -250,24 +341,19 @@ class Mechanize::HTTP::Agent
250
341
  @history.max_size = length
251
342
  end
252
343
 
253
- def http_request uri, method, params = nil
254
- case uri.scheme.downcase
255
- when 'http', 'https' then
256
- klass = Net::HTTP.const_get(method.to_s.capitalize)
344
+ # Returns a visited page for the url passed in, otherwise nil
345
+ def visited_page url
346
+ @history.visited_page resolve url
347
+ end
257
348
 
258
- request ||= klass.new(uri.request_uri)
259
- request.body = params.first if params
349
+ # :section: Hooks
260
350
 
261
- request
262
- when 'file' then
263
- Mechanize::FileRequest.new uri
351
+ def hook_content_encoding response, uri, response_body_io
352
+ @content_encoding_hooks.each do |hook|
353
+ hook.call self, uri, response, response_body_io
264
354
  end
265
355
  end
266
356
 
267
- def log
268
- Mechanize.log
269
- end
270
-
271
357
  ##
272
358
  # Invokes hooks added to post_connect_hooks after a +response+ is returned
273
359
  # and the response +body+ is handled.
@@ -275,9 +361,13 @@ class Mechanize::HTTP::Agent
275
361
  # Yields the +context+, the +uri+ for the request, the +response+ and the
276
362
  # response +body+.
277
363
 
278
- def post_connect uri, response, body # :yields: agent, uri, response, body
364
+ def post_connect uri, response, body_io # :yields: agent, uri, response, body
279
365
  @post_connect_hooks.each do |hook|
280
- hook.call self, uri, response, body
366
+ begin
367
+ hook.call self, uri, response, body_io.read
368
+ ensure
369
+ body_io.rewind
370
+ end
281
371
  end
282
372
  end
283
373
 
@@ -291,26 +381,83 @@ class Mechanize::HTTP::Agent
291
381
  end
292
382
  end
293
383
 
294
- def request_auth request, uri
295
- auth_type = @auth_hash[uri.host]
384
+ # :section: Request
385
+
386
+ def connection_for uri
387
+ case uri.scheme.downcase
388
+ when 'http', 'https' then
389
+ return @http
390
+ when 'file' then
391
+ return Mechanize::FileConnection.new
392
+ end
393
+ end
296
394
 
297
- return unless auth_type
395
+ def disable_keep_alive request
396
+ request['connection'] = 'close' unless @keep_alive
397
+ end
298
398
 
299
- case auth_type
300
- when :basic
301
- request.basic_auth @user, @password
302
- when :digest, :iis_digest
303
- uri.user = @user
304
- uri.password = @password
399
+ def enable_gzip request
400
+ request['accept-encoding'] = if @gzip_enabled
401
+ 'gzip,deflate,identity'
402
+ else
403
+ 'identity'
404
+ end
405
+ end
406
+
407
+ def http_request uri, method, params = nil
408
+ case uri.scheme.downcase
409
+ when 'http', 'https' then
410
+ klass = Net::HTTP.const_get(method.to_s.capitalize)
411
+
412
+ request ||= klass.new(uri.request_uri)
413
+ request.body = params.first if params
414
+
415
+ request
416
+ when 'file' then
417
+ Mechanize::FileRequest.new uri
418
+ end
419
+ end
305
420
 
306
- iis = auth_type == :iis_digest
421
+ def request_add_headers request, headers = {}
422
+ @request_headers.each do |k,v|
423
+ request[k] = v
424
+ end
307
425
 
308
- auth = @digest_auth.auth_header uri, @digest, request.method, iis
426
+ headers.each do |field, value|
427
+ case field
428
+ when :etag then request["ETag"] = value
429
+ when :if_modified_since then request["If-Modified-Since"] = value
430
+ when Symbol then
431
+ raise ArgumentError, "unknown header symbol #{field}"
432
+ else
433
+ request[field] = value
434
+ end
435
+ end
436
+ end
309
437
 
310
- request['Authorization'] = auth
438
+ def request_auth request, uri
439
+ base_uri = uri + '/'
440
+ schemes = @authenticate_methods[base_uri]
441
+
442
+ if realm = schemes[:digest].find { |r| r.uri == base_uri } then
443
+ request_auth_digest request, uri, realm, base_uri, false
444
+ elsif realm = schemes[:iis_digest].find { |r| r.uri == base_uri } then
445
+ request_auth_digest request, uri, realm, base_uri, true
446
+ elsif schemes[:basic].find { |r| r.uri == base_uri } then
447
+ request.basic_auth @user, @password
311
448
  end
312
449
  end
313
450
 
451
+ def request_auth_digest request, uri, realm, base_uri, iis
452
+ challenge = @digest_challenges[realm]
453
+
454
+ uri.user = @user
455
+ uri.password = @password
456
+
457
+ auth = @digest_auth.auth_header uri, challenge.to_s, request.method, iis
458
+ request['Authorization'] = auth
459
+ end
460
+
314
461
  def request_cookies request, uri
315
462
  return if @cookie_jar.empty? uri
316
463
 
@@ -344,23 +491,6 @@ class Mechanize::HTTP::Agent
344
491
  end
345
492
  end
346
493
 
347
- def request_add_headers request, headers = {}
348
- @request_headers.each do |k,v|
349
- request[k] = v
350
- end
351
-
352
- headers.each do |field, value|
353
- case field
354
- when :etag then request["ETag"] = value
355
- when :if_modified_since then request["If-Modified-Since"] = value
356
- when Symbol then
357
- raise ArgumentError, "unknown header symbol #{field}"
358
- else
359
- request[field] = value
360
- end
361
- end
362
- end
363
-
364
494
  def request_referer request, uri, referer
365
495
  return unless referer
366
496
  return if 'https' == referer.scheme.downcase and
@@ -451,26 +581,110 @@ class Mechanize::HTTP::Agent
451
581
  return uri, parameters
452
582
  end
453
583
 
584
+ # :section: Response
585
+
586
+ def get_meta_refresh response, uri, page
587
+ return nil unless @follow_meta_refresh
588
+
589
+ if page.respond_to?(:meta_refresh) and
590
+ (redirect = page.meta_refresh.first) then
591
+ [redirect.delay, redirect.href] unless
592
+ not @follow_meta_refresh_self and redirect.link_self
593
+ elsif refresh = response['refresh']
594
+ delay, href, link_self = Mechanize::Page::MetaRefresh.parse refresh, uri
595
+ raise Mechanize::Error, 'Invalid refresh http header' unless delay
596
+ [delay.to_f, href] unless
597
+ not @follow_meta_refresh_self and link_self
598
+ end
599
+ end
600
+
601
+ def response_authenticate(response, page, uri, request, headers, params,
602
+ referer)
603
+ raise Mechanize::UnauthorizedError, page unless @user || @password
604
+
605
+ challenges = @authenticate_parser.parse response['www-authenticate']
606
+
607
+ if challenge = challenges.find { |c| c.scheme =~ /^Digest$/i } then
608
+ realm = challenge.realm uri
609
+
610
+ auth_scheme = if response['server'] =~ /Microsoft-IIS/ then
611
+ :iis_digest
612
+ else
613
+ :digest
614
+ end
615
+
616
+ existing_realms = @authenticate_methods[realm.uri][auth_scheme]
617
+
618
+ raise Mechanize::UnauthorizedError, page if
619
+ existing_realms.include? realm
620
+
621
+ existing_realms << realm
622
+ @digest_challenges[realm] = challenge
623
+ elsif challenge = challenges.find { |c| c.scheme == 'NTLM' } then
624
+ existing_realms = @authenticate_methods[uri + '/'][:ntlm]
625
+
626
+ raise Mechanize::UnauthorizedError, page if
627
+ existing_realms.include?(realm) and not challenge.params
628
+
629
+ existing_realms << realm
630
+
631
+ if challenge.params then
632
+ type_2 = Net::NTLM::Message.decode64 challenge.params
633
+
634
+ type_3 = type_2.response({ :user => @user, :password => @password, },
635
+ { :ntlmv2 => true }).encode64
636
+
637
+ headers['Authorization'] = "NTLM #{type_3}"
638
+ else
639
+ type_1 = Net::NTLM::Message::Type1.new.encode64
640
+ headers['Authorization'] = "NTLM #{type_1}"
641
+ end
642
+ elsif challenge = challenges.find { |c| c.scheme == 'Basic' } then
643
+ realm = challenge.realm uri
644
+
645
+ existing_realms = @authenticate_methods[realm.uri][:basic]
646
+
647
+ raise Mechanize::UnauthorizedError, page if
648
+ existing_realms.include? realm
649
+
650
+ existing_realms << realm
651
+ else
652
+ raise Mechanize::UnauthorizedError, page
653
+ end
654
+
655
+ fetch uri, request.method.downcase.to_sym, headers, params, referer
656
+ end
657
+
454
658
  def response_content_encoding response, body_io
455
- length = response.content_length || body_io.length
659
+ length = response.content_length
660
+
661
+ length = case body_io
662
+ when IO, Tempfile then
663
+ body_io.stat.size
664
+ else
665
+ body_io.length
666
+ end unless length
667
+
668
+ out_io = nil
456
669
 
457
670
  case response['Content-Encoding']
458
671
  when nil, 'none', '7bit' then
459
- body_io.string
672
+ out_io = body_io
460
673
  when 'deflate' then
461
674
  log.debug('deflate body') if log
462
675
 
463
676
  return if length.zero?
464
677
 
465
678
  begin
466
- Zlib::Inflate.inflate body_io.string
679
+ out_io = inflate body_io
467
680
  rescue Zlib::BufError, Zlib::DataError
468
681
  log.error('Unable to inflate page, retrying with raw deflate') if log
682
+ body_io.rewind
469
683
  begin
470
- Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body_io.string)
684
+ out_io = inflate body_io, -Zlib::MAX_WBITS
471
685
  rescue Zlib::BufError, Zlib::DataError
472
686
  log.error("unable to inflate page: #{$!}") if log
473
- ''
687
+ nil
474
688
  end
475
689
  end
476
690
  when 'gzip', 'x-gzip' then
@@ -480,12 +694,17 @@ class Mechanize::HTTP::Agent
480
694
 
481
695
  begin
482
696
  zio = Zlib::GzipReader.new body_io
483
- zio.read
697
+ out_io = Tempfile.new 'mechanize-decode'
698
+
699
+ until zio.eof? do
700
+ out_io.write zio.read 16384
701
+ end
484
702
  rescue Zlib::BufError, Zlib::GzipFile::Error
485
703
  log.error('Unable to gunzip body, trying raw inflate') if log
486
704
  body_io.rewind
487
705
  body_io.read 10
488
- Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body_io.read)
706
+
707
+ out_io = inflate body_io, -Zlib::MAX_WBITS
489
708
  rescue Zlib::DataError
490
709
  log.error("unable to gunzip page: #{$!}") if log
491
710
  ''
@@ -496,14 +715,23 @@ class Mechanize::HTTP::Agent
496
715
  raise Mechanize::Error,
497
716
  "Unsupported Content-Encoding: #{response['Content-Encoding']}"
498
717
  end
718
+
719
+ out_io.flush
720
+ out_io.rewind
721
+
722
+ out_io
499
723
  end
500
724
 
501
725
  def response_cookies response, uri, page
726
+ log = log() # reduce method calls
502
727
  if Mechanize::Page === page and page.body =~ /Set-Cookie/n
503
728
  page.search('//head/meta[@http-equiv="Set-Cookie"]').each do |meta|
504
- Mechanize::Cookie.parse(uri, meta['content']) { |c|
505
- log.debug("saved cookie: #{c}") if log
506
- @cookie_jar.add(uri, c)
729
+ Mechanize::Cookie.parse(uri, meta['content'], log) { |c|
730
+ if @cookie_jar.add(uri, c)
731
+ log.debug("saved cookie: #{c}") if log
732
+ else
733
+ log.debug("rejected cookie: #{c}") if log
734
+ end
507
735
  }
508
736
  end
509
737
  end
@@ -513,35 +741,27 @@ class Mechanize::HTTP::Agent
513
741
  return unless header_cookies
514
742
 
515
743
  header_cookies.each do |cookie|
516
- Mechanize::Cookie.parse(uri, cookie) { |c|
517
- log.debug("saved cookie: #{c}") if log
518
- @cookie_jar.add(uri, c)
744
+ Mechanize::Cookie.parse(uri, cookie, log) { |c|
745
+ if @cookie_jar.add(uri, c)
746
+ log.debug("saved cookie: #{c}") if log
747
+ else
748
+ log.debug("rejected cookie: #{c}") if log
749
+ end
519
750
  }
520
751
  end
521
752
  end
522
753
 
523
754
  def response_follow_meta_refresh response, uri, page, redirects
524
- return unless @follow_meta_refresh
525
-
526
- redirect_uri = nil
527
- referer = page
755
+ delay, new_url = get_meta_refresh(response, uri, page)
756
+ return nil unless new_url
528
757
 
529
- if page.respond_to?(:meta_refresh) and (redirect = page.meta_refresh.first)
530
- redirect_uri = Mechanize::Util.uri_unescape redirect.uri.to_s
531
- sleep redirect.node['delay'].to_f
532
- referer = Mechanize::Page.new(nil, {'content-type'=>'text/html'})
533
- elsif refresh = response['refresh']
534
- delay, redirect_uri = Mechanize::Page::MetaRefresh.parse refresh, uri
535
- raise Mechanize::Error, 'Invalid refresh http header' unless delay
536
- raise Mechanize::RedirectLimitReachedError.new(page, redirects) if
537
- redirects + 1 > @redirection_limit
538
- sleep delay.to_f
539
- end
758
+ raise Mechanize::RedirectLimitReachedError.new(page, redirects) if
759
+ redirects + 1 > @redirection_limit
540
760
 
541
- if redirect_uri
542
- @history.push(page, page.uri)
543
- fetch redirect_uri, :get, {}, [], referer, redirects + 1
544
- end
761
+ sleep delay
762
+ @history.push(page, page.uri)
763
+ fetch new_url, :get, {}, [],
764
+ Mechanize::Page.new(nil, {'content-type'=>'text/html'}), redirects
545
765
  end
546
766
 
547
767
  def response_log response
@@ -555,18 +775,36 @@ class Mechanize::HTTP::Agent
555
775
  end
556
776
  end
557
777
 
558
- def response_parse response, body, uri
559
- @context.parse uri, response, body
778
+ def response_parse response, body_io, uri
779
+ @context.parse uri, response, body_io
560
780
  end
561
781
 
562
782
  def response_read response, request
563
- body_io = StringIO.new
783
+ content_length = response.content_length
784
+
785
+ if content_length and content_length > @max_file_buffer then
786
+ body_io = Tempfile.new 'mechanize-raw'
787
+ body_io.binmode if defined? body_io.binmode
788
+ else
789
+ body_io = StringIO.new
790
+ end
791
+
564
792
  body_io.set_encoding Encoding::BINARY if body_io.respond_to? :set_encoding
565
793
  total = 0
566
794
 
567
795
  begin
568
796
  response.read_body { |part|
569
797
  total += part.length
798
+
799
+ if StringIO === body_io and total > @max_file_buffer then
800
+ new_io = Tempfile.new 'mechanize-raw'
801
+ new_io.binmode if defined? binmode
802
+
803
+ new_io.write body_io.string
804
+
805
+ body_io = new_io
806
+ end
807
+
570
808
  body_io.write(part)
571
809
  log.debug("Read #{part.length} bytes (#{total} total)") if log
572
810
  }
@@ -575,6 +813,7 @@ class Mechanize::HTTP::Agent
575
813
  raise Mechanize::ResponseReadError.new(e, response, body_io)
576
814
  end
577
815
 
816
+ body_io.flush
578
817
  body_io.rewind
579
818
 
580
819
  raise Mechanize::ResponseCodeError, response if
@@ -591,49 +830,37 @@ class Mechanize::HTTP::Agent
591
830
  body_io
592
831
  end
593
832
 
594
- def response_redirect response, method, page, redirects
833
+ def response_redirect response, method, page, redirects, referer = current_page
595
834
  case @redirect_ok
596
835
  when true, :all
597
836
  # shortcut
598
837
  when false, nil
599
838
  return page
600
839
  when :permanent
601
- return page if response_class != Net::HTTPMovedPermanently
840
+ return page unless Net::HTTPMovedPermanently === response
602
841
  end
603
842
 
604
843
  log.info("follow redirect to: #{response['Location']}") if log
605
844
 
606
- from_uri = page.uri
607
-
608
845
  raise Mechanize::RedirectLimitReachedError.new(page, redirects) if
609
846
  redirects + 1 > @redirection_limit
610
847
 
611
848
  redirect_method = method == :head ? :head : :get
612
849
 
613
- page = fetch(response['Location'].to_s, redirect_method, {}, [], page,
614
- redirects + 1)
615
-
850
+ from_uri = page.uri
616
851
  @history.push(page, from_uri)
852
+ new_uri = from_uri + response['Location'].to_s
617
853
 
618
- return page
854
+ fetch new_uri, redirect_method, {}, [], referer, redirects + 1
619
855
  end
620
856
 
621
- def response_authenticate(response, page, uri, request, headers, params,
622
- referer)
623
- raise Mechanize::ResponseCodeError, page unless @user || @password
624
- raise Mechanize::ResponseCodeError, page if @auth_hash.has_key?(uri.host)
857
+ # :section: Robots
625
858
 
626
- if response['www-authenticate'] =~ /Digest/i
627
- @auth_hash[uri.host] = :digest
628
- if response['server'] =~ /Microsoft-IIS/
629
- @auth_hash[uri.host] = :iis_digest
630
- end
631
- @digest = response['www-authenticate']
632
- else
633
- @auth_hash[uri.host] = :basic
634
- end
635
-
636
- fetch uri, request.method.downcase.to_sym, headers, params, referer
859
+ def get_robots(uri) # :nodoc:
860
+ fetch(uri).body
861
+ rescue Mechanize::ResponseCodeError => e
862
+ return '' if e.response_code == '404'
863
+ raise e
637
864
  end
638
865
 
639
866
  def robots= value
@@ -675,13 +902,58 @@ class Mechanize::HTTP::Agent
675
902
  webrobots.reset(url)
676
903
  end
677
904
 
905
+ def webrobots
906
+ @webrobots ||= WebRobots.new(@user_agent, :http_get => method(:get_robots))
907
+ end
908
+
909
+ # :section: SSL
910
+
911
+ def certificate
912
+ @http.certificate
913
+ end
914
+
915
+ # :section: Timeouts
916
+
917
+ # Sets the conection idle timeout for persistent connections
918
+ def idle_timeout= timeout
919
+ @idle_timeout = timeout
920
+ @http.idle_timeout = timeout if @http
921
+ end
922
+
923
+ # :section: Utility
924
+
925
+ def inflate compressed, window_bits = nil
926
+ inflate = Zlib::Inflate.new window_bits
927
+ out_io = Tempfile.new 'mechanize-decode'
928
+
929
+ until compressed.eof? do
930
+ out_io.write inflate.inflate compressed.read 1024
931
+ end
932
+
933
+ out_io.write inflate.finish
934
+
935
+ out_io
936
+ end
937
+
938
+ def log
939
+ @context.log
940
+ end
941
+
678
942
  def set_http
679
943
  @http = Net::HTTP::Persistent.new 'mechanize', @proxy_uri
680
944
 
681
945
  @http.keep_alive = @keep_alive_time
946
+ @http.idle_timeout = @idle_timeout if @idle_timeout
947
+ @http.retry_change_requests = @retry_change_requests
682
948
 
683
949
  @http.ca_file = @ca_file
950
+ @http.cert_store = @cert_store if @cert_store
684
951
  @http.verify_callback = @verify_callback
952
+ @http.verify_mode = @verify_mode if @verify_mode
953
+
954
+ # update our cached value
955
+ @verify_mode = @http.verify_mode
956
+ @cert_store = @http.cert_store
685
957
 
686
958
  if @cert and @key then
687
959
  cert = if OpenSSL::X509::Certificate === @cert then
@@ -701,10 +973,26 @@ class Mechanize::HTTP::Agent
701
973
  end
702
974
  end
703
975
 
976
+ ##
704
977
  # Sets the proxy address, port, user, and password +addr+ should be a host,
705
- # with no "http://"
978
+ # with no "http://", +port+ may be a port number, service name or port
979
+ # number string.
980
+
706
981
  def set_proxy(addr, port, user = nil, pass = nil)
707
982
  return unless addr and port
983
+
984
+ unless Integer === port then
985
+ begin
986
+ port = Socket.getservbyname port
987
+ rescue SocketError
988
+ begin
989
+ port = Integer port
990
+ rescue ArgumentError
991
+ raise ArgumentError, "invalid value for port: #{port.inspect}"
992
+ end
993
+ end
994
+ end
995
+
708
996
  @proxy_uri = URI "http://#{addr}"
709
997
  @proxy_uri.port = port
710
998
  @proxy_uri.user = user if user
@@ -713,26 +1001,5 @@ class Mechanize::HTTP::Agent
713
1001
  @proxy_uri
714
1002
  end
715
1003
 
716
- def user_agent= user_agent
717
- @webrobots = nil if user_agent != @user_agent
718
- @user_agent = user_agent
719
- end
720
-
721
- # Returns a visited page for the url passed in, otherwise nil
722
- def visited_page url
723
- @history.visited_page resolve url
724
- end
725
-
726
- def get_robots(uri) # :nodoc:
727
- fetch(uri).body
728
- rescue Mechanize::ResponseCodeError => e
729
- return '' if e.response_code == '404'
730
- raise e
731
- end
732
-
733
- def webrobots
734
- @webrobots ||= WebRobots.new(@user_agent, :http_get => method(:get_robots))
735
- end
736
-
737
1004
  end
738
1005