uri 0.12.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/uri/common.rb CHANGED
@@ -13,24 +13,54 @@ require_relative "rfc2396_parser"
13
13
  require_relative "rfc3986_parser"
14
14
 
15
15
  module URI
16
- include RFC2396_REGEXP
16
+ # The default parser instance for RFC 2396.
17
+ RFC2396_PARSER = RFC2396_Parser.new
18
+ Ractor.make_shareable(RFC2396_PARSER) if defined?(Ractor)
17
19
 
18
- REGEXP = RFC2396_REGEXP
19
- Parser = RFC2396_Parser
20
+ # The default parser instance for RFC 3986.
20
21
  RFC3986_PARSER = RFC3986_Parser.new
21
22
  Ractor.make_shareable(RFC3986_PARSER) if defined?(Ractor)
22
23
 
23
- # URI::Parser.new
24
- DEFAULT_PARSER = Parser.new
25
- DEFAULT_PARSER.pattern.each_pair do |sym, str|
26
- unless REGEXP::PATTERN.const_defined?(sym)
27
- REGEXP::PATTERN.const_set(sym, str)
24
+ # The default parser instance.
25
+ DEFAULT_PARSER = RFC3986_PARSER
26
+ Ractor.make_shareable(DEFAULT_PARSER) if defined?(Ractor)
27
+
28
+ # Set the default parser instance.
29
+ def self.parser=(parser = RFC3986_PARSER)
30
+ remove_const(:Parser) if defined?(::URI::Parser)
31
+ const_set("Parser", parser.class)
32
+
33
+ remove_const(:PARSER) if defined?(::URI::PARSER)
34
+ const_set("PARSER", parser)
35
+
36
+ remove_const(:REGEXP) if defined?(::URI::REGEXP)
37
+ remove_const(:PATTERN) if defined?(::URI::PATTERN)
38
+ if Parser == RFC2396_Parser
39
+ const_set("REGEXP", URI::RFC2396_REGEXP)
40
+ const_set("PATTERN", URI::RFC2396_REGEXP::PATTERN)
41
+ end
42
+
43
+ Parser.new.regexp.each_pair do |sym, str|
44
+ remove_const(sym) if const_defined?(sym, false)
45
+ const_set(sym, str)
28
46
  end
29
47
  end
30
- DEFAULT_PARSER.regexp.each_pair do |sym, str|
31
- const_set(sym, str)
48
+ self.parser = RFC3986_PARSER
49
+
50
+ def self.const_missing(const) # :nodoc:
51
+ if const == :REGEXP
52
+ warn "URI::REGEXP is obsolete. Use URI::RFC2396_REGEXP explicitly.", uplevel: 1 if $VERBOSE
53
+ URI::RFC2396_REGEXP
54
+ elsif value = RFC2396_PARSER.regexp[const]
55
+ warn "URI::#{const} is obsolete. Use URI::RFC2396_PARSER.regexp[#{const.inspect}] explicitly.", uplevel: 1 if $VERBOSE
56
+ value
57
+ elsif value = RFC2396_Parser.const_get(const)
58
+ warn "URI::#{const} is obsolete. Use URI::RFC2396_Parser::#{const} explicitly.", uplevel: 1 if $VERBOSE
59
+ value
60
+ else
61
+ super
62
+ end
32
63
  end
33
- Ractor.make_shareable(DEFAULT_PARSER) if defined?(Ractor)
34
64
 
35
65
  module Util # :nodoc:
36
66
  def make_components_hash(klass, array_hash)
@@ -64,41 +94,101 @@ module URI
64
94
  module_function :make_components_hash
65
95
  end
66
96
 
67
- module Schemes
97
+ module Schemes # :nodoc:
98
+ class << self
99
+ ReservedChars = ".+-"
100
+ EscapedChars = "\u01C0\u01C1\u01C2"
101
+ # Use Lo category chars as escaped chars for TruffleRuby, which
102
+ # does not allow Symbol categories as identifiers.
103
+
104
+ def escape(name)
105
+ unless name and name.ascii_only?
106
+ return nil
107
+ end
108
+ name.upcase.tr(ReservedChars, EscapedChars)
109
+ end
110
+
111
+ def unescape(name)
112
+ name.tr(EscapedChars, ReservedChars).encode(Encoding::US_ASCII).upcase
113
+ end
114
+
115
+ def find(name)
116
+ const_get(name, false) if name and const_defined?(name, false)
117
+ end
118
+
119
+ def register(name, klass)
120
+ unless scheme = escape(name)
121
+ raise ArgumentError, "invalid character as scheme - #{name}"
122
+ end
123
+ const_set(scheme, klass)
124
+ end
125
+
126
+ def list
127
+ constants.map { |name|
128
+ [unescape(name.to_s), const_get(name)]
129
+ }.to_h
130
+ end
131
+ end
68
132
  end
69
133
  private_constant :Schemes
70
134
 
135
+ # Registers the given +klass+ as the class to be instantiated
136
+ # when parsing a \URI with the given +scheme+:
71
137
  #
72
- # Register the given +klass+ to be instantiated when parsing URLs with the given +scheme+.
73
- # Note that currently only schemes which after .upcase are valid constant names
74
- # can be registered (no -/+/. allowed).
138
+ # URI.register_scheme('MS_SEARCH', URI::Generic) # => URI::Generic
139
+ # URI.scheme_list['MS_SEARCH'] # => URI::Generic
75
140
  #
141
+ # Note that after calling String#upcase on +scheme+, it must be a valid
142
+ # constant name.
76
143
  def self.register_scheme(scheme, klass)
77
- Schemes.const_set(scheme.to_s.upcase, klass)
144
+ Schemes.register(scheme, klass)
78
145
  end
79
146
 
80
- # Returns a Hash of the defined schemes.
147
+ # Returns a hash of the defined schemes:
148
+ #
149
+ # URI.scheme_list
150
+ # # =>
151
+ # {"MAILTO"=>URI::MailTo,
152
+ # "LDAPS"=>URI::LDAPS,
153
+ # "WS"=>URI::WS,
154
+ # "HTTP"=>URI::HTTP,
155
+ # "HTTPS"=>URI::HTTPS,
156
+ # "LDAP"=>URI::LDAP,
157
+ # "FILE"=>URI::File,
158
+ # "FTP"=>URI::FTP}
159
+ #
160
+ # Related: URI.register_scheme.
81
161
  def self.scheme_list
82
- Schemes.constants.map { |name|
83
- [name.to_s.upcase, Schemes.const_get(name)]
84
- }.to_h
162
+ Schemes.list
85
163
  end
86
164
 
165
+ # :stopdoc:
87
166
  INITIAL_SCHEMES = scheme_list
88
167
  private_constant :INITIAL_SCHEMES
89
168
  Ractor.make_shareable(INITIAL_SCHEMES) if defined?(Ractor)
169
+ # :startdoc:
90
170
 
171
+ # Returns a new object constructed from the given +scheme+, +arguments+,
172
+ # and +default+:
91
173
  #
92
- # Construct a URI instance, using the scheme to detect the appropriate class
93
- # from +URI.scheme_list+.
174
+ # - The new object is an instance of <tt>URI.scheme_list[scheme.upcase]</tt>.
175
+ # - The object is initialized by calling the class initializer
176
+ # using +scheme+ and +arguments+.
177
+ # See URI::Generic.new.
178
+ #
179
+ # Examples:
180
+ #
181
+ # values = ['john.doe', 'www.example.com', '123', nil, '/forum/questions/', nil, 'tag=networking&order=newest', 'top']
182
+ # URI.for('https', *values)
183
+ # # => #<URI::HTTPS https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top>
184
+ # URI.for('foo', *values, default: URI::HTTP)
185
+ # # => #<URI::HTTP foo://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top>
94
186
  #
95
187
  def self.for(scheme, *arguments, default: Generic)
96
- const_name = scheme.to_s.upcase
188
+ const_name = Schemes.escape(scheme)
97
189
 
98
190
  uri_class = INITIAL_SCHEMES[const_name]
99
- uri_class ||= if /\A[A-Z]\w*\z/.match?(const_name) && Schemes.const_defined?(const_name, false)
100
- Schemes.const_get(const_name, false)
101
- end
191
+ uri_class ||= Schemes.find(const_name)
102
192
  uri_class ||= default
103
193
 
104
194
  return uri_class.new(scheme, *arguments)
@@ -121,95 +211,49 @@ module URI
121
211
  #
122
212
  class BadURIError < Error; end
123
213
 
124
- #
125
- # == Synopsis
126
- #
127
- # URI::split(uri)
128
- #
129
- # == Args
130
- #
131
- # +uri+::
132
- # String with URI.
133
- #
134
- # == Description
135
- #
136
- # Splits the string on following parts and returns array with result:
137
- #
138
- # * Scheme
139
- # * Userinfo
140
- # * Host
141
- # * Port
142
- # * Registry
143
- # * Path
144
- # * Opaque
145
- # * Query
146
- # * Fragment
147
- #
148
- # == Usage
149
- #
150
- # require 'uri'
151
- #
152
- # URI.split("http://www.ruby-lang.org/")
153
- # # => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil]
214
+ # Returns a 9-element array representing the parts of the \URI
215
+ # formed from the string +uri+;
216
+ # each array element is a string or +nil+:
217
+ #
218
+ # names = %w[scheme userinfo host port registry path opaque query fragment]
219
+ # values = URI.split('https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top')
220
+ # names.zip(values)
221
+ # # =>
222
+ # [["scheme", "https"],
223
+ # ["userinfo", "john.doe"],
224
+ # ["host", "www.example.com"],
225
+ # ["port", "123"],
226
+ # ["registry", nil],
227
+ # ["path", "/forum/questions/"],
228
+ # ["opaque", nil],
229
+ # ["query", "tag=networking&order=newest"],
230
+ # ["fragment", "top"]]
154
231
  #
155
232
  def self.split(uri)
156
- RFC3986_PARSER.split(uri)
233
+ PARSER.split(uri)
157
234
  end
158
235
 
236
+ # Returns a new \URI object constructed from the given string +uri+:
159
237
  #
160
- # == Synopsis
161
- #
162
- # URI::parse(uri_str)
163
- #
164
- # == Args
238
+ # URI.parse('https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top')
239
+ # # => #<URI::HTTPS https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top>
240
+ # URI.parse('http://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top')
241
+ # # => #<URI::HTTP http://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top>
165
242
  #
166
- # +uri_str+::
167
- # String with URI.
168
- #
169
- # == Description
170
- #
171
- # Creates one of the URI's subclasses instance from the string.
172
- #
173
- # == Raises
174
- #
175
- # URI::InvalidURIError::
176
- # Raised if URI given is not a correct one.
177
- #
178
- # == Usage
179
- #
180
- # require 'uri'
181
- #
182
- # uri = URI.parse("http://www.ruby-lang.org/")
183
- # # => #<URI::HTTP http://www.ruby-lang.org/>
184
- # uri.scheme
185
- # # => "http"
186
- # uri.host
187
- # # => "www.ruby-lang.org"
188
- #
189
- # It's recommended to first ::escape the provided +uri_str+ if there are any
190
- # invalid URI characters.
243
+ # It's recommended to first URI::RFC2396_PARSER.escape string +uri+
244
+ # if it may contain invalid URI characters.
191
245
  #
192
246
  def self.parse(uri)
193
- RFC3986_PARSER.parse(uri)
247
+ PARSER.parse(uri)
194
248
  end
195
249
 
250
+ # Merges the given URI strings +str+
251
+ # per {RFC 2396}[https://www.rfc-editor.org/rfc/rfc2396.html].
196
252
  #
197
- # == Synopsis
198
- #
199
- # URI::join(str[, str, ...])
200
- #
201
- # == Args
202
- #
203
- # +str+::
204
- # String(s) to work with, will be converted to RFC3986 URIs before merging.
205
- #
206
- # == Description
207
- #
208
- # Joins URIs.
253
+ # Each string in +str+ is converted to an
254
+ # {RFC3986 URI}[https://www.rfc-editor.org/rfc/rfc3986.html] before being merged.
209
255
  #
210
- # == Usage
211
- #
212
- # require 'uri'
256
+ # Examples:
213
257
  #
214
258
  # URI.join("http://example.com/","main.rbx")
215
259
  # # => #<URI::HTTP http://example.com/main.rbx>
@@ -227,7 +271,7 @@ module URI
227
271
  # # => #<URI::HTTP http://example.com/foo/bar>
228
272
  #
229
273
  def self.join(*str)
230
- RFC3986_PARSER.join(*str)
274
+ DEFAULT_PARSER.join(*str)
231
275
  end
232
276
 
233
277
  #
@@ -254,9 +298,9 @@ module URI
254
298
  # URI.extract("text here http://foo.example.org/bla and here mailto:test@example.com and here also.")
255
299
  # # => ["http://foo.example.com/bla", "mailto:test@example.com"]
256
300
  #
257
- def self.extract(str, schemes = nil, &block)
301
+ def self.extract(str, schemes = nil, &block) # :nodoc:
258
302
  warn "URI.extract is obsolete", uplevel: 1 if $VERBOSE
259
- DEFAULT_PARSER.extract(str, schemes, &block)
303
+ PARSER.extract(str, schemes, &block)
260
304
  end
261
305
 
262
306
  #
@@ -291,16 +335,16 @@ module URI
291
335
  # p $&
292
336
  # end
293
337
  #
294
- def self.regexp(schemes = nil)
338
+ def self.regexp(schemes = nil)# :nodoc:
295
339
  warn "URI.regexp is obsolete", uplevel: 1 if $VERBOSE
296
- DEFAULT_PARSER.make_regexp(schemes)
340
+ PARSER.make_regexp(schemes)
297
341
  end
298
342
 
299
343
  TBLENCWWWCOMP_ = {} # :nodoc:
300
344
  256.times do |i|
301
345
  TBLENCWWWCOMP_[-i.chr] = -('%%%02X' % i)
302
346
  end
303
- TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze
347
+ TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze # :nodoc:
304
348
  TBLENCWWWCOMP_[' '] = '+'
305
349
  TBLENCWWWCOMP_.freeze
306
350
  TBLDECWWWCOMP_ = {} # :nodoc:
@@ -314,44 +358,92 @@ module URI
314
358
  TBLDECWWWCOMP_['+'] = ' '
315
359
  TBLDECWWWCOMP_.freeze
316
360
 
317
- # Encodes given +str+ to URL-encoded form data.
361
+ # Returns a URL-encoded string derived from the given string +str+.
362
+ #
363
+ # The returned string:
364
+ #
365
+ # - Preserves:
366
+ #
367
+ # - Characters <tt>'*'</tt>, <tt>'.'</tt>, <tt>'-'</tt>, and <tt>'_'</tt>.
368
+ # - Character in ranges <tt>'a'..'z'</tt>, <tt>'A'..'Z'</tt>,
369
+ # and <tt>'0'..'9'</tt>.
370
+ #
371
+ # Example:
372
+ #
373
+ # URI.encode_www_form_component('*.-_azAZ09')
374
+ # # => "*.-_azAZ09"
375
+ #
376
+ # - Converts:
377
+ #
378
+ # - Character <tt>' '</tt> to character <tt>'+'</tt>.
379
+ # - Any other character to "percent notation";
380
+ # the percent notation for character <i>c</i> is <tt>'%%%X' % c.ord</tt>.
381
+ #
382
+ # Example:
383
+ #
384
+ # URI.encode_www_form_component('Here are some punctuation characters: ,;?:')
385
+ # # => "Here+are+some+punctuation+characters%3A+%2C%3B%3F%3A"
318
386
  #
319
- # This method doesn't convert *, -, ., 0-9, A-Z, _, a-z, but does convert SP
320
- # (ASCII space) to + and converts others to %XX.
387
+ # Encoding:
321
388
  #
322
- # If +enc+ is given, convert +str+ to the encoding before percent encoding.
389
+ # - If +str+ has encoding Encoding::ASCII_8BIT, argument +enc+ is ignored.
390
+ # - Otherwise +str+ is converted first to Encoding::UTF_8
391
+ # (with suitable character replacements),
392
+ # and then to encoding +enc+.
323
393
  #
324
- # This is an implementation of
325
- # https://www.w3.org/TR/2013/CR-html5-20130806/forms.html#url-encoded-form-data.
394
+ # In either case, the returned string has forced encoding Encoding::US_ASCII.
326
395
  #
327
- # See URI.decode_www_form_component, URI.encode_www_form.
396
+ # Related: URI.encode_uri_component (encodes <tt>' '</tt> as <tt>'%20'</tt>).
328
397
  def self.encode_www_form_component(str, enc=nil)
329
398
  _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_, str, enc)
330
399
  end
331
400
 
332
- # Decodes given +str+ of URL-encoded form data.
401
+ # Returns a string decoded from the given \URL-encoded string +str+.
333
402
  #
334
- # This decodes + to SP.
403
+ # The given string is first encoded as Encoding::ASCII-8BIT (using String#b),
404
+ # then decoded (as below), and finally force-encoded to the given encoding +enc+.
335
405
  #
336
- # See URI.encode_www_form_component, URI.decode_www_form.
406
+ # The returned string:
407
+ #
408
+ # - Preserves:
409
+ #
410
+ # - Characters <tt>'*'</tt>, <tt>'.'</tt>, <tt>'-'</tt>, and <tt>'_'</tt>.
411
+ # - Character in ranges <tt>'a'..'z'</tt>, <tt>'A'..'Z'</tt>,
412
+ # and <tt>'0'..'9'</tt>.
413
+ #
414
+ # Example:
415
+ #
416
+ # URI.decode_www_form_component('*.-_azAZ09')
417
+ # # => "*.-_azAZ09"
418
+ #
419
+ # - Converts:
420
+ #
421
+ # - Character <tt>'+'</tt> to character <tt>' '</tt>.
422
+ # - Each "percent notation" to an ASCII character.
423
+ #
424
+ # Example:
425
+ #
426
+ # URI.decode_www_form_component('Here+are+some+punctuation+characters%3A+%2C%3B%3F%3A')
427
+ # # => "Here are some punctuation characters: ,;?:"
428
+ #
429
+ # Related: URI.decode_uri_component (preserves <tt>'+'</tt>).
337
430
  def self.decode_www_form_component(str, enc=Encoding::UTF_8)
338
431
  _decode_uri_component(/\+|%\h\h/, str, enc)
339
432
  end
340
433
 
341
- # Encodes +str+ using URL encoding
342
- #
343
- # This encodes SP to %20 instead of +.
434
+ # Like URI.encode_www_form_component, except that <tt>' '</tt> (space)
435
+ # is encoded as <tt>'%20'</tt> (instead of <tt>'+'</tt>).
344
436
  def self.encode_uri_component(str, enc=nil)
345
437
  _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCURICOMP_, str, enc)
346
438
  end
347
439
 
348
- # Decodes given +str+ of URL-encoded data.
349
- #
350
- # This does not decode + to SP.
440
+ # Like URI.decode_www_form_component, except that <tt>'+'</tt> is preserved.
351
441
  def self.decode_uri_component(str, enc=Encoding::UTF_8)
352
442
  _decode_uri_component(/%\h\h/, str, enc)
353
443
  end
354
444
 
445
+ # Returns a string derived from the given string +str+ with
446
+ # URI-encoded characters matching +regexp+ according to +table+.
355
447
  def self._encode_uri_component(regexp, table, str, enc)
356
448
  str = str.to_s.dup
357
449
  if str.encoding != Encoding::ASCII_8BIT
@@ -366,39 +458,112 @@ module URI
366
458
  end
367
459
  private_class_method :_encode_uri_component
368
460
 
461
+ # Returns a string decoding characters matching +regexp+ from the
462
+ # given \URL-encoded string +str+.
369
463
  def self._decode_uri_component(regexp, str, enc)
370
464
  raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str)
371
465
  str.b.gsub(regexp, TBLDECWWWCOMP_).force_encoding(enc)
372
466
  end
373
467
  private_class_method :_decode_uri_component
374
468
 
375
- # Generates URL-encoded form data from given +enum+.
469
+ # Returns a URL-encoded string derived from the given
470
+ # {Enumerable}[https://docs.ruby-lang.org/en/master/Enumerable.html#module-Enumerable-label-Enumerable+in+Ruby+Classes]
471
+ # +enum+.
472
+ #
473
+ # The result is suitable for use as form data
474
+ # for an \HTTP request whose <tt>Content-Type</tt> is
475
+ # <tt>'application/x-www-form-urlencoded'</tt>.
476
+ #
477
+ # The returned string consists of the elements of +enum+,
478
+ # each converted to one or more URL-encoded strings,
479
+ # and all joined with character <tt>'&'</tt>.
480
+ #
481
+ # Simple examples:
482
+ #
483
+ # URI.encode_www_form([['foo', 0], ['bar', 1], ['baz', 2]])
484
+ # # => "foo=0&bar=1&baz=2"
485
+ # URI.encode_www_form({foo: 0, bar: 1, baz: 2})
486
+ # # => "foo=0&bar=1&baz=2"
376
487
  #
377
- # This generates application/x-www-form-urlencoded data defined in HTML5
378
- # from given an Enumerable object.
488
+ # The returned string is formed using method URI.encode_www_form_component,
489
+ # which converts certain characters:
379
490
  #
380
- # This internally uses URI.encode_www_form_component(str).
491
+ # URI.encode_www_form('f#o': '/', 'b-r': '$', 'b z': '@')
492
+ # # => "f%23o=%2F&b-r=%24&b+z=%40"
381
493
  #
382
- # This method doesn't convert the encoding of given items, so convert them
383
- # before calling this method if you want to send data as other than original
384
- # encoding or mixed encoding data. (Strings which are encoded in an HTML5
385
- # ASCII incompatible encoding are converted to UTF-8.)
494
+ # When +enum+ is Array-like, each element +ele+ is converted to a field:
386
495
  #
387
- # This method doesn't handle files. When you send a file, use
388
- # multipart/form-data.
496
+ # - If +ele+ is an array of two or more elements,
497
+ # the field is formed from its first two elements
498
+ # (and any additional elements are ignored):
389
499
  #
390
- # This refers https://url.spec.whatwg.org/#concept-urlencoded-serializer
500
+ # name = URI.encode_www_form_component(ele[0], enc)
501
+ # value = URI.encode_www_form_component(ele[1], enc)
502
+ # "#{name}=#{value}"
391
503
  #
392
- # URI.encode_www_form([["q", "ruby"], ["lang", "en"]])
393
- # #=> "q=ruby&lang=en"
394
- # URI.encode_www_form("q" => "ruby", "lang" => "en")
395
- # #=> "q=ruby&lang=en"
396
- # URI.encode_www_form("q" => ["ruby", "perl"], "lang" => "en")
397
- # #=> "q=ruby&q=perl&lang=en"
398
- # URI.encode_www_form([["q", "ruby"], ["q", "perl"], ["lang", "en"]])
399
- # #=> "q=ruby&q=perl&lang=en"
504
+ # Examples:
505
+ #
506
+ # URI.encode_www_form([%w[foo bar], %w[baz bat bah]])
507
+ # # => "foo=bar&baz=bat"
508
+ # URI.encode_www_form([['foo', 0], ['bar', :baz, 'bat']])
509
+ # # => "foo=0&bar=baz"
510
+ #
511
+ # - If +ele+ is an array of one element,
512
+ # the field is formed from <tt>ele[0]</tt>:
513
+ #
514
+ # URI.encode_www_form_component(ele[0])
515
+ #
516
+ # Example:
517
+ #
518
+ # URI.encode_www_form([['foo'], [:bar], [0]])
519
+ # # => "foo&bar&0"
520
+ #
521
+ # - Otherwise the field is formed from +ele+:
522
+ #
523
+ # URI.encode_www_form_component(ele)
524
+ #
525
+ # Example:
526
+ #
527
+ # URI.encode_www_form(['foo', :bar, 0])
528
+ # # => "foo&bar&0"
529
+ #
530
+ # The elements of an Array-like +enum+ may be mixture:
531
+ #
532
+ # URI.encode_www_form([['foo', 0], ['bar', 1, 2], ['baz'], :bat])
533
+ # # => "foo=0&bar=1&baz&bat"
534
+ #
535
+ # When +enum+ is Hash-like,
536
+ # each +key+/+value+ pair is converted to one or more fields:
537
+ #
538
+ # - If +value+ is
539
+ # {Array-convertible}[https://docs.ruby-lang.org/en/master/implicit_conversion_rdoc.html#label-Array-Convertible+Objects],
540
+ # each element +ele+ in +value+ is paired with +key+ to form a field:
541
+ #
542
+ # name = URI.encode_www_form_component(key, enc)
543
+ # value = URI.encode_www_form_component(ele, enc)
544
+ # "#{name}=#{value}"
545
+ #
546
+ # Example:
547
+ #
548
+ # URI.encode_www_form({foo: [:bar, 1], baz: [:bat, :bam, 2]})
549
+ # # => "foo=bar&foo=1&baz=bat&baz=bam&baz=2"
550
+ #
551
+ # - Otherwise, +key+ and +value+ are paired to form a field:
552
+ #
553
+ # name = URI.encode_www_form_component(key, enc)
554
+ # value = URI.encode_www_form_component(value, enc)
555
+ # "#{name}=#{value}"
556
+ #
557
+ # Example:
558
+ #
559
+ # URI.encode_www_form({foo: 0, bar: 1, baz: 2})
560
+ # # => "foo=0&bar=1&baz=2"
561
+ #
562
+ # The elements of a Hash-like +enum+ may be mixture:
563
+ #
564
+ # URI.encode_www_form({foo: [0, 1], bar: 2})
565
+ # # => "foo=0&foo=1&bar=2"
400
566
  #
401
- # See URI.encode_www_form_component, URI.decode_www_form.
402
567
  def self.encode_www_form(enum, enc=nil)
403
568
  enum.map do |k,v|
404
569
  if v.nil?
@@ -419,22 +584,39 @@ module URI
419
584
  end.join('&')
420
585
  end
421
586
 
422
- # Decodes URL-encoded form data from given +str+.
587
+ # Returns name/value pairs derived from the given string +str+,
588
+ # which must be an ASCII string.
589
+ #
590
+ # The method may be used to decode the body of Net::HTTPResponse object +res+
591
+ # for which <tt>res['Content-Type']</tt> is <tt>'application/x-www-form-urlencoded'</tt>.
592
+ #
593
+ # The returned data is an array of 2-element subarrays;
594
+ # each subarray is a name/value pair (both are strings).
595
+ # Each returned string has encoding +enc+,
596
+ # and has had invalid characters removed via
597
+ # {String#scrub}[https://docs.ruby-lang.org/en/master/String.html#method-i-scrub].
423
598
  #
424
- # This decodes application/x-www-form-urlencoded data
425
- # and returns an array of key-value arrays.
599
+ # A simple example:
426
600
  #
427
- # This refers http://url.spec.whatwg.org/#concept-urlencoded-parser,
428
- # so this supports only &-separator, and doesn't support ;-separator.
601
+ # URI.decode_www_form('foo=0&bar=1&baz')
602
+ # # => [["foo", "0"], ["bar", "1"], ["baz", ""]]
429
603
  #
430
- # ary = URI.decode_www_form("a=1&a=2&b=3")
431
- # ary #=> [['a', '1'], ['a', '2'], ['b', '3']]
432
- # ary.assoc('a').last #=> '1'
433
- # ary.assoc('b').last #=> '3'
434
- # ary.rassoc('a').last #=> '2'
435
- # Hash[ary] #=> {"a"=>"2", "b"=>"3"}
604
+ # The returned strings have certain conversions,
605
+ # similar to those performed in URI.decode_www_form_component:
606
+ #
607
+ # URI.decode_www_form('f%23o=%2F&b-r=%24&b+z=%40')
608
+ # # => [["f#o", "/"], ["b-r", "$"], ["b z", "@"]]
609
+ #
610
+ # The given string may contain consecutive separators:
611
+ #
612
+ # URI.decode_www_form('foo=0&&bar=1&&baz=2')
613
+ # # => [["foo", "0"], ["", ""], ["bar", "1"], ["", ""], ["baz", "2"]]
614
+ #
615
+ # A different separator may be specified:
616
+ #
617
+ # URI.decode_www_form('foo=0--bar=1--baz', separator: '--')
618
+ # # => [["foo", "0"], ["bar", "1"], ["baz", ""]]
436
619
  #
437
- # See URI.decode_www_form_component, URI.encode_www_form.
438
620
  def self.decode_www_form(str, enc=Encoding::UTF_8, separator: '&', use__charset_: false, isindex: false)
439
621
  raise ArgumentError, "the input of #{self.name}.#{__method__} must be ASCII only string" unless str.ascii_only?
440
622
  ary = []
@@ -713,7 +895,18 @@ end # module URI
713
895
  module Kernel
714
896
 
715
897
  #
716
- # Returns +uri+ converted to an URI object.
898
+ # Returns a \URI object derived from the given +uri+,
899
+ # which may be a \URI string or an existing \URI object:
900
+ #
901
+ # require 'uri'
902
+ # # Returns a new URI.
903
+ # uri = URI('http://github.com/ruby/ruby')
904
+ # # => #<URI::HTTP http://github.com/ruby/ruby>
905
+ # # Returns the given URI.
906
+ # URI(uri)
907
+ # # => #<URI::HTTP http://github.com/ruby/ruby>
908
+ #
909
+ # You must require 'uri' to use this method.
717
910
  #
718
911
  def URI(uri)
719
912
  if uri.is_a?(URI::Generic)