rubysl-uri 1.0.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9dc1de2a61a2cb733ce807462f966d2011278ba6
4
- data.tar.gz: fc2994a37404922968e36219d439a21e2d906e6d
3
+ metadata.gz: 1cdbb4d6b18672fb1ef0eea47a1b42be33ba153c
4
+ data.tar.gz: a83c4da2e152500850a8eef0b8a93c9e795bdb58
5
5
  SHA512:
6
- metadata.gz: 4d7d26cb2bcdf7a8dbe255b37012717439d9b79c901a056c0f3e6e6c4457807f3552be29252472883d15ec2ea9e27dfc80e255a36edf467c04c36d405499579c
7
- data.tar.gz: 7915d126fdea40be02cde056bde73a3b51f400502c29099e172c3db08a67025f95103933e1c71ad94f78b2425060c8f40feea91fd39a066a4d96de8f97dd880d
6
+ metadata.gz: ff5396567881b630b8e5cf085c9d4f0ba53bd98023ac2c3bcdd5e722d7f438eb4cca350b298b1b390e8f2080d96bfbd630c5245273e4c0a8ab1bd412e2edbf3f
7
+ data.tar.gz: 0eae37ca085e4c68a2f54848311226c7064deafbd7bbe9f8aebb5a0bf2d42eef30f7f8cb6058eeef5d7163ef9c6b843f11d0476185107922bdc2971273a2aeae
@@ -3,5 +3,5 @@ env:
3
3
  - RUBYLIB=lib
4
4
  script: bundle exec mspec
5
5
  rvm:
6
- - 1.8.7
7
- - rbx-nightly-18mode
6
+ - 1.9.3
7
+ - rbx-nightly-19mode
@@ -1,2 +1,2 @@
1
- require "rubysl/uri/uri"
2
1
  require "rubysl/uri/version"
2
+ require "rubysl/uri/uri"
@@ -1,14 +1,96 @@
1
+ # URI is a module providing classes to handle Uniform Resource Identifiers
2
+ # (RFC2396[http://tools.ietf.org/html/rfc2396])
1
3
  #
2
- # URI support for Ruby
4
+ # == Features
5
+ #
6
+ # * Uniform handling of handling URIs
7
+ # * Flexibility to introduce custom URI schemes
8
+ # * Flexibility to have an alternate URI::Parser (or just different patterns
9
+ # and regexp's)
10
+ #
11
+ # == Basic example
12
+ #
13
+ # require 'uri'
14
+ #
15
+ # uri = URI("http://foo.com/posts?id=30&limit=5#time=1305298413")
16
+ # #=> #<URI::HTTP:0x00000000b14880
17
+ # URL:http://foo.com/posts?id=30&limit=5#time=1305298413>
18
+ # uri.scheme
19
+ # #=> "http"
20
+ # uri.host
21
+ # #=> "foo.com"
22
+ # uri.path
23
+ # #=> "/posts"
24
+ # uri.query
25
+ # #=> "id=30&limit=5"
26
+ # uri.fragment
27
+ # #=> "time=1305298413"
28
+ #
29
+ # uri.to_s
30
+ # #=> "http://foo.com/posts?id=30&limit=5#time=1305298413"
31
+ #
32
+ # == Adding custom URIs
33
+ #
34
+ # module URI
35
+ # class RSYNC < Generic
36
+ # DEFAULT_PORT = 873
37
+ # end
38
+ # @@schemes['RSYNC'] = RSYNC
39
+ # end
40
+ # #=> URI::RSYNC
41
+ #
42
+ # URI.scheme_list
43
+ # #=> {"FTP"=>URI::FTP, "HTTP"=>URI::HTTP, "HTTPS"=>URI::HTTPS,
44
+ # "LDAP"=>URI::LDAP, "LDAPS"=>URI::LDAPS, "MAILTO"=>URI::MailTo,
45
+ # "RSYNC"=>URI::RSYNC}
46
+ #
47
+ # uri = URI("rsync://rsync.foo.com")
48
+ # #=> #<URI::RSYNC:0x00000000f648c8 URL:rsync://rsync.foo.com>
49
+ #
50
+ # == RFC References
51
+ #
52
+ # A good place to view an RFC spec is http://www.ietf.org/rfc.html
53
+ #
54
+ # Here is a list of all related RFC's.
55
+ # - RFC822[http://tools.ietf.org/html/rfc822]
56
+ # - RFC1738[http://tools.ietf.org/html/rfc1738]
57
+ # - RFC2255[http://tools.ietf.org/html/rfc2255]
58
+ # - RFC2368[http://tools.ietf.org/html/rfc2368]
59
+ # - RFC2373[http://tools.ietf.org/html/rfc2373]
60
+ # - RFC2396[http://tools.ietf.org/html/rfc2396]
61
+ # - RFC2732[http://tools.ietf.org/html/rfc2732]
62
+ # - RFC3986[http://tools.ietf.org/html/rfc3986]
63
+ #
64
+ # == Class tree
65
+ #
66
+ # - URI::Generic (in uri/generic.rb)
67
+ # - URI::FTP - (in uri/ftp.rb)
68
+ # - URI::HTTP - (in uri/http.rb)
69
+ # - URI::HTTPS - (in uri/https.rb)
70
+ # - URI::LDAP - (in uri/ldap.rb)
71
+ # - URI::LDAPS - (in uri/ldaps.rb)
72
+ # - URI::MailTo - (in uri/mailto.rb)
73
+ # - URI::Parser - (in uri/common.rb)
74
+ # - URI::REGEXP - (in uri/common.rb)
75
+ # - URI::REGEXP::PATTERN - (in uri/common.rb)
76
+ # - URI::Util - (in uri/common.rb)
77
+ # - URI::Escape - (in uri/common.rb)
78
+ # - URI::Error - (in uri/common.rb)
79
+ # - URI::InvalidURIError - (in uri/common.rb)
80
+ # - URI::InvalidComponentError - (in uri/common.rb)
81
+ # - URI::BadURIError - (in uri/common.rb)
82
+ #
83
+ # == Copyright Info
3
84
  #
4
85
  # Author:: Akira Yamada <akira@ruby-lang.org>
5
- # Documentation:: Akira Yamada <akira@ruby-lang.org>, Dmitry V. Sabanin <sdmitry@lrn.ru>
6
- # License::
86
+ # Documentation::
87
+ # Akira Yamada <akira@ruby-lang.org>
88
+ # Dmitry V. Sabanin <sdmitry@lrn.ru>
89
+ # Vincent Batts <vbatts@hashbangbash.com>
90
+ # License::
7
91
  # Copyright (c) 2001 akira yamada <akira@ruby-lang.org>
8
92
  # You can redistribute it and/or modify it under the same term as Ruby.
9
- # Revision:: $Id: uri.rb 16038 2008-04-15 09:41:47Z kazu $
10
- #
11
- # See URI for documentation
93
+ # Revision:: $Id$
12
94
  #
13
95
 
14
96
  module URI
@@ -1,5 +1,5 @@
1
1
  module RubySL
2
2
  module URI
3
- VERSION = "1.0.0"
3
+ VERSION = "2.0.0"
4
4
  end
5
5
  end
@@ -1,12 +1,18 @@
1
+ #--
1
2
  # = uri/common.rb
2
3
  #
3
4
  # Author:: Akira Yamada <akira@ruby-lang.org>
4
- # Revision:: $Id: common.rb 14178 2007-12-10 09:31:55Z matz $
5
- # License::
5
+ # Revision:: $Id$
6
+ # License::
6
7
  # You can redistribute it and/or modify it under the same term as Ruby.
7
8
  #
9
+ # See URI for general documentation
10
+ #
8
11
 
9
12
  module URI
13
+ #
14
+ # Includes URI::REGEXP::PATTERN
15
+ #
10
16
  module REGEXP
11
17
  #
12
18
  # Patterns used to parse URI's
@@ -31,29 +37,336 @@ module URI
31
37
  # mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
32
38
  # "(" | ")"
33
39
  # unreserved = alphanum | mark
34
- UNRESERVED = "-_.!~*'()#{ALNUM}"
40
+ UNRESERVED = "\\-_.!~*'()#{ALNUM}"
35
41
  # reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
36
42
  # "$" | ","
37
- # reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
43
+ # reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
38
44
  # "$" | "," | "[" | "]" (RFC 2732)
39
45
  RESERVED = ";/?:@&=+$,\\[\\]"
40
46
 
47
+ # domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
48
+ DOMLABEL = "(?:[#{ALNUM}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
49
+ # toplabel = alpha | alpha *( alphanum | "-" ) alphanum
50
+ TOPLABEL = "(?:[#{ALPHA}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
51
+ # hostname = *( domainlabel "." ) toplabel [ "." ]
52
+ HOSTNAME = "(?:#{DOMLABEL}\\.)*#{TOPLABEL}\\.?"
53
+
54
+ # :startdoc:
55
+ end # PATTERN
56
+
57
+ # :startdoc:
58
+ end # REGEXP
59
+
60
+ # class that Parses String's into URI's
61
+ #
62
+ # It contains a Hash set of patterns and Regexp's that match and validate.
63
+ #
64
+ class Parser
65
+ include REGEXP
66
+
67
+ #
68
+ # == Synopsis
69
+ #
70
+ # URI::Parser.new([opts])
71
+ #
72
+ # == Args
73
+ #
74
+ # The constructor accepts a hash as options for parser.
75
+ # Keys of options are pattern names of URI components
76
+ # and values of options are pattern strings.
77
+ # The constructor generetes set of regexps for parsing URIs.
78
+ #
79
+ # You can use the following keys:
80
+ #
81
+ # * :ESCAPED (URI::PATTERN::ESCAPED in default)
82
+ # * :UNRESERVED (URI::PATTERN::UNRESERVED in default)
83
+ # * :DOMLABEL (URI::PATTERN::DOMLABEL in default)
84
+ # * :TOPLABEL (URI::PATTERN::TOPLABEL in default)
85
+ # * :HOSTNAME (URI::PATTERN::HOSTNAME in default)
86
+ #
87
+ # == Examples
88
+ #
89
+ # p = URI::Parser.new(:ESCAPED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})")
90
+ # u = p.parse("http://example.jp/%uABCD") #=> #<URI::HTTP:0xb78cf4f8 URL:http://example.jp/%uABCD>
91
+ # URI.parse(u.to_s) #=> raises URI::InvalidURIError
92
+ #
93
+ # s = "http://examle.com/ABCD"
94
+ # u1 = p.parse(s) #=> #<URI::HTTP:0xb78c3220 URL:http://example.com/ABCD>
95
+ # u2 = URI.parse(s) #=> #<URI::HTTP:0xb78b6d54 URL:http://example.com/ABCD>
96
+ # u1 == u2 #=> true
97
+ # u1.eql?(u2) #=> false
98
+ #
99
+ def initialize(opts = {})
100
+ @pattern = initialize_pattern(opts)
101
+ @pattern.each_value {|v| v.freeze}
102
+ @pattern.freeze
103
+
104
+ @regexp = initialize_regexp(@pattern)
105
+ @regexp.each_value {|v| v.freeze}
106
+ @regexp.freeze
107
+ end
108
+
109
+ # The Hash of patterns.
110
+ #
111
+ # see also URI::Parser.initialize_pattern
112
+ attr_reader :pattern
113
+
114
+ # The Hash of Regexp
115
+ #
116
+ # see also URI::Parser.initialize_regexp
117
+ attr_reader :regexp
118
+
119
+ # Returns a split URI against regexp[:ABS_URI]
120
+ def split(uri)
121
+ case uri
122
+ when ''
123
+ # null uri
124
+
125
+ when @regexp[:ABS_URI]
126
+ scheme, opaque, userinfo, host, port,
127
+ registry, path, query, fragment = $~[1..-1]
128
+
129
+ # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
130
+
131
+ # absoluteURI = scheme ":" ( hier_part | opaque_part )
132
+ # hier_part = ( net_path | abs_path ) [ "?" query ]
133
+ # opaque_part = uric_no_slash *uric
134
+
135
+ # abs_path = "/" path_segments
136
+ # net_path = "//" authority [ abs_path ]
137
+
138
+ # authority = server | reg_name
139
+ # server = [ [ userinfo "@" ] hostport ]
140
+
141
+ if !scheme
142
+ raise InvalidURIError,
143
+ "bad URI(absolute but no scheme): #{uri}"
144
+ end
145
+ if !opaque && (!path && (!host && !registry))
146
+ raise InvalidURIError,
147
+ "bad URI(absolute but no path): #{uri}"
148
+ end
149
+
150
+ when @regexp[:REL_URI]
151
+ scheme = nil
152
+ opaque = nil
153
+
154
+ userinfo, host, port, registry,
155
+ rel_segment, abs_path, query, fragment = $~[1..-1]
156
+ if rel_segment && abs_path
157
+ path = rel_segment + abs_path
158
+ elsif rel_segment
159
+ path = rel_segment
160
+ elsif abs_path
161
+ path = abs_path
162
+ end
163
+
164
+ # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
165
+
166
+ # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
167
+
168
+ # net_path = "//" authority [ abs_path ]
169
+ # abs_path = "/" path_segments
170
+ # rel_path = rel_segment [ abs_path ]
171
+
172
+ # authority = server | reg_name
173
+ # server = [ [ userinfo "@" ] hostport ]
174
+
175
+ else
176
+ raise InvalidURIError, "bad URI(is not URI?): #{uri}"
177
+ end
178
+
179
+ path = '' if !path && !opaque # (see RFC2396 Section 5.2)
180
+ ret = [
181
+ scheme,
182
+ userinfo, host, port, # X
183
+ registry, # X
184
+ path, # Y
185
+ opaque, # Y
186
+ query,
187
+ fragment
188
+ ]
189
+ return ret
190
+ end
191
+
192
+ #
193
+ # == Args
194
+ #
195
+ # +uri+::
196
+ # String
197
+ #
198
+ # == Description
199
+ #
200
+ # parses +uri+ and constructs either matching URI scheme object
201
+ # (FTP, HTTP, HTTPS, LDAP, LDAPS, or MailTo) or URI::Generic
202
+ #
203
+ # == Usage
204
+ #
205
+ # p = URI::Parser.new
206
+ # p.parse("ldap://ldap.example.com/dc=example?user=john")
207
+ # #=> #<URI::LDAP:0x00000000b9e7e8 URL:ldap://ldap.example.com/dc=example?user=john>
208
+ #
209
+ def parse(uri)
210
+ scheme, userinfo, host, port,
211
+ registry, path, opaque, query, fragment = self.split(uri)
212
+
213
+ if scheme && URI.scheme_list.include?(scheme.upcase)
214
+ URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port,
215
+ registry, path, opaque, query,
216
+ fragment, self)
217
+ else
218
+ Generic.new(scheme, userinfo, host, port,
219
+ registry, path, opaque, query,
220
+ fragment, self)
221
+ end
222
+ end
223
+
224
+
225
+ #
226
+ # == Args
227
+ #
228
+ # +uris+::
229
+ # an Array of Strings
230
+ #
231
+ # == Description
232
+ #
233
+ # Attempts to parse and merge a set of URIs
234
+ #
235
+ def join(*uris)
236
+ uris[0] = convert_to_uri(uris[0])
237
+ uris.inject :merge
238
+ end
239
+
240
+ #
241
+ # :call-seq:
242
+ # extract( str )
243
+ # extract( str, schemes )
244
+ # extract( str, schemes ) {|item| block }
245
+ #
246
+ # == Args
247
+ #
248
+ # +str+::
249
+ # String to search
250
+ # +schemes+::
251
+ # Patterns to apply to +str+
252
+ #
253
+ # == Description
254
+ #
255
+ # Attempts to parse and merge a set of URIs
256
+ # If no +block+ given , then returns the result,
257
+ # else it calls +block+ for each element in result.
258
+ #
259
+ # see also URI::Parser.make_regexp
260
+ #
261
+ def extract(str, schemes = nil)
262
+ if block_given?
263
+ str.scan(make_regexp(schemes)) { yield $& }
264
+ nil
265
+ else
266
+ result = []
267
+ str.scan(make_regexp(schemes)) { result.push $& }
268
+ result
269
+ end
270
+ end
271
+
272
+ # returns Regexp that is default self.regexp[:ABS_URI_REF],
273
+ # unless +schemes+ is provided. Then it is a Regexp.union with self.pattern[:X_ABS_URI]
274
+ def make_regexp(schemes = nil)
275
+ unless schemes
276
+ @regexp[:ABS_URI_REF]
277
+ else
278
+ /(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x
279
+ end
280
+ end
281
+
282
+ #
283
+ # :call-seq:
284
+ # escape( str )
285
+ # escape( str, unsafe )
286
+ #
287
+ # == Args
288
+ #
289
+ # +str+::
290
+ # String to make safe
291
+ # +unsafe+::
292
+ # Regexp to apply. Defaults to self.regexp[:UNSAFE]
293
+ #
294
+ # == Description
295
+ #
296
+ # constructs a safe String from +str+, removing unsafe characters,
297
+ # replacing them with codes.
298
+ #
299
+ def escape(str, unsafe = @regexp[:UNSAFE])
300
+ unless unsafe.kind_of?(Regexp)
301
+ # perhaps unsafe is String object
302
+ unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false)
303
+ end
304
+ str.gsub(unsafe) do
305
+ us = $&
306
+ tmp = ''
307
+ us.each_byte do |uc|
308
+ tmp << sprintf('%%%02X', uc)
309
+ end
310
+ tmp
311
+ end.force_encoding(Encoding::US_ASCII)
312
+ end
313
+
314
+ #
315
+ # :call-seq:
316
+ # unescape( str )
317
+ # unescape( str, unsafe )
318
+ #
319
+ # == Args
320
+ #
321
+ # +str+::
322
+ # String to remove escapes from
323
+ # +unsafe+::
324
+ # Regexp to apply. Defaults to self.regexp[:ESCAPED]
325
+ #
326
+ # == Description
327
+ #
328
+ # Removes escapes from +str+
329
+ #
330
+ def unescape(str, escaped = @regexp[:ESCAPED])
331
+ str.gsub(escaped) { [$&[1, 2].hex].pack('C') }.force_encoding(str.encoding)
332
+ end
333
+
334
+ @@to_s = Kernel.instance_method(:to_s)
335
+ def inspect
336
+ @@to_s.bind(self).call
337
+ end
338
+
339
+ private
340
+
341
+ # Constructs the default Hash of patterns
342
+ def initialize_pattern(opts = {})
343
+ ret = {}
344
+ ret[:ESCAPED] = escaped = (opts.delete(:ESCAPED) || PATTERN::ESCAPED)
345
+ ret[:UNRESERVED] = unreserved = opts.delete(:UNRESERVED) || PATTERN::UNRESERVED
346
+ ret[:RESERVED] = reserved = opts.delete(:RESERVED) || PATTERN::RESERVED
347
+ ret[:DOMLABEL] = opts.delete(:DOMLABEL) || PATTERN::DOMLABEL
348
+ ret[:TOPLABEL] = opts.delete(:TOPLABEL) || PATTERN::TOPLABEL
349
+ ret[:HOSTNAME] = hostname = opts.delete(:HOSTNAME)
350
+
351
+ # RFC 2396 (URI Generic Syntax)
352
+ # RFC 2732 (IPv6 Literal Addresses in URL's)
353
+ # RFC 2373 (IPv6 Addressing Architecture)
354
+
41
355
  # uric = reserved | unreserved | escaped
42
- URIC = "(?:[#{UNRESERVED}#{RESERVED}]|#{ESCAPED})"
356
+ ret[:URIC] = uric = "(?:[#{unreserved}#{reserved}]|#{escaped})"
43
357
  # uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
44
358
  # "&" | "=" | "+" | "$" | ","
45
- URIC_NO_SLASH = "(?:[#{UNRESERVED};?:@&=+$,]|#{ESCAPED})"
359
+ ret[:URIC_NO_SLASH] = uric_no_slash = "(?:[#{unreserved};?:@&=+$,]|#{escaped})"
46
360
  # query = *uric
47
- QUERY = "#{URIC}*"
361
+ ret[:QUERY] = query = "#{uric}*"
48
362
  # fragment = *uric
49
- FRAGMENT = "#{URIC}*"
363
+ ret[:FRAGMENT] = fragment = "#{uric}*"
50
364
 
51
- # domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
52
- DOMLABEL = "(?:[#{ALNUM}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
53
- # toplabel = alpha | alpha *( alphanum | "-" ) alphanum
54
- TOPLABEL = "(?:[#{ALPHA}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
55
365
  # hostname = *( domainlabel "." ) toplabel [ "." ]
56
- HOSTNAME = "(?:#{DOMLABEL}\\.)*#{TOPLABEL}\\.?"
366
+ # reg-name = *( unreserved / pct-encoded / sub-delims ) # RFC3986
367
+ unless hostname
368
+ ret[:HOSTNAME] = hostname = "(?:[a-zA-Z0-9\\-.]|%\\h\\h)+"
369
+ end
57
370
 
58
371
  # RFC 2373, APPENDIX B:
59
372
  # IPv6address = hexpart [ ":" IPv4address ]
@@ -66,153 +379,178 @@ module URI
66
379
  # allowed too. Here is a replacement.
67
380
  #
68
381
  # IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
69
- IPV4ADDR = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"
382
+ ret[:IPV4ADDR] = ipv4addr = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"
70
383
  # hex4 = 1*4HEXDIG
71
- HEX4 = "[#{HEX}]{1,4}"
384
+ hex4 = "[#{PATTERN::HEX}]{1,4}"
72
385
  # lastpart = hex4 | IPv4address
73
- LASTPART = "(?:#{HEX4}|#{IPV4ADDR})"
386
+ lastpart = "(?:#{hex4}|#{ipv4addr})"
74
387
  # hexseq1 = *( hex4 ":" ) hex4
75
- HEXSEQ1 = "(?:#{HEX4}:)*#{HEX4}"
388
+ hexseq1 = "(?:#{hex4}:)*#{hex4}"
76
389
  # hexseq2 = *( hex4 ":" ) lastpart
77
- HEXSEQ2 = "(?:#{HEX4}:)*#{LASTPART}"
390
+ hexseq2 = "(?:#{hex4}:)*#{lastpart}"
78
391
  # IPv6address = hexseq2 | [ hexseq1 ] "::" [ hexseq2 ]
79
- IPV6ADDR = "(?:#{HEXSEQ2}|(?:#{HEXSEQ1})?::(?:#{HEXSEQ2})?)"
392
+ ret[:IPV6ADDR] = ipv6addr = "(?:#{hexseq2}|(?:#{hexseq1})?::(?:#{hexseq2})?)"
80
393
 
81
394
  # IPv6prefix = ( hexseq1 | [ hexseq1 ] "::" [ hexseq1 ] ) "/" 1*2DIGIT
82
395
  # unused
83
396
 
84
397
  # ipv6reference = "[" IPv6address "]" (RFC 2732)
85
- IPV6REF = "\\[#{IPV6ADDR}\\]"
398
+ ret[:IPV6REF] = ipv6ref = "\\[#{ipv6addr}\\]"
86
399
 
87
400
  # host = hostname | IPv4address
88
401
  # host = hostname | IPv4address | IPv6reference (RFC 2732)
89
- HOST = "(?:#{HOSTNAME}|#{IPV4ADDR}|#{IPV6REF})"
402
+ ret[:HOST] = host = "(?:#{hostname}|#{ipv4addr}|#{ipv6ref})"
90
403
  # port = *digit
91
- PORT = '\d*'
404
+ port = '\d*'
92
405
  # hostport = host [ ":" port ]
93
- HOSTPORT = "#{HOST}(?::#{PORT})?"
406
+ ret[:HOSTPORT] = hostport = "#{host}(?::#{port})?"
94
407
 
95
408
  # userinfo = *( unreserved | escaped |
96
409
  # ";" | ":" | "&" | "=" | "+" | "$" | "," )
97
- USERINFO = "(?:[#{UNRESERVED};:&=+$,]|#{ESCAPED})*"
410
+ ret[:USERINFO] = userinfo = "(?:[#{unreserved};:&=+$,]|#{escaped})*"
98
411
 
99
412
  # pchar = unreserved | escaped |
100
413
  # ":" | "@" | "&" | "=" | "+" | "$" | ","
101
- PCHAR = "(?:[#{UNRESERVED}:@&=+$,]|#{ESCAPED})"
414
+ pchar = "(?:[#{unreserved}:@&=+$,]|#{escaped})"
102
415
  # param = *pchar
103
- PARAM = "#{PCHAR}*"
416
+ param = "#{pchar}*"
104
417
  # segment = *pchar *( ";" param )
105
- SEGMENT = "#{PCHAR}*(?:;#{PARAM})*"
418
+ segment = "#{pchar}*(?:;#{param})*"
106
419
  # path_segments = segment *( "/" segment )
107
- PATH_SEGMENTS = "#{SEGMENT}(?:/#{SEGMENT})*"
420
+ ret[:PATH_SEGMENTS] = path_segments = "#{segment}(?:/#{segment})*"
108
421
 
109
422
  # server = [ [ userinfo "@" ] hostport ]
110
- SERVER = "(?:#{USERINFO}@)?#{HOSTPORT}"
423
+ server = "(?:#{userinfo}@)?#{hostport}"
111
424
  # reg_name = 1*( unreserved | escaped | "$" | "," |
112
425
  # ";" | ":" | "@" | "&" | "=" | "+" )
113
- REG_NAME = "(?:[#{UNRESERVED}$,;:@&=+]|#{ESCAPED})+"
426
+ ret[:REG_NAME] = reg_name = "(?:[#{unreserved}$,;:@&=+]|#{escaped})+"
114
427
  # authority = server | reg_name
115
- AUTHORITY = "(?:#{SERVER}|#{REG_NAME})"
428
+ authority = "(?:#{server}|#{reg_name})"
116
429
 
117
430
  # rel_segment = 1*( unreserved | escaped |
118
431
  # ";" | "@" | "&" | "=" | "+" | "$" | "," )
119
- REL_SEGMENT = "(?:[#{UNRESERVED};@&=+$,]|#{ESCAPED})+"
432
+ ret[:REL_SEGMENT] = rel_segment = "(?:[#{unreserved};@&=+$,]|#{escaped})+"
120
433
 
121
434
  # scheme = alpha *( alpha | digit | "+" | "-" | "." )
122
- SCHEME = "[#{ALPHA}][-+.#{ALPHA}\\d]*"
435
+ ret[:SCHEME] = scheme = "[#{PATTERN::ALPHA}][\\-+.#{PATTERN::ALPHA}\\d]*"
123
436
 
124
437
  # abs_path = "/" path_segments
125
- ABS_PATH = "/#{PATH_SEGMENTS}"
438
+ ret[:ABS_PATH] = abs_path = "/#{path_segments}"
126
439
  # rel_path = rel_segment [ abs_path ]
127
- REL_PATH = "#{REL_SEGMENT}(?:#{ABS_PATH})?"
440
+ ret[:REL_PATH] = rel_path = "#{rel_segment}(?:#{abs_path})?"
128
441
  # net_path = "//" authority [ abs_path ]
129
- NET_PATH = "//#{AUTHORITY}(?:#{ABS_PATH})?"
442
+ ret[:NET_PATH] = net_path = "//#{authority}(?:#{abs_path})?"
130
443
 
131
444
  # hier_part = ( net_path | abs_path ) [ "?" query ]
132
- HIER_PART = "(?:#{NET_PATH}|#{ABS_PATH})(?:\\?(?:#{QUERY}))?"
445
+ ret[:HIER_PART] = hier_part = "(?:#{net_path}|#{abs_path})(?:\\?(?:#{query}))?"
133
446
  # opaque_part = uric_no_slash *uric
134
- OPAQUE_PART = "#{URIC_NO_SLASH}#{URIC}*"
447
+ ret[:OPAQUE_PART] = opaque_part = "#{uric_no_slash}#{uric}*"
135
448
 
136
449
  # absoluteURI = scheme ":" ( hier_part | opaque_part )
137
- ABS_URI = "#{SCHEME}:(?:#{HIER_PART}|#{OPAQUE_PART})"
450
+ ret[:ABS_URI] = abs_uri = "#{scheme}:(?:#{hier_part}|#{opaque_part})"
138
451
  # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
139
- REL_URI = "(?:#{NET_PATH}|#{ABS_PATH}|#{REL_PATH})(?:\\?#{QUERY})?"
452
+ ret[:REL_URI] = rel_uri = "(?:#{net_path}|#{abs_path}|#{rel_path})(?:\\?#{query})?"
140
453
 
141
454
  # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
142
- URI_REF = "(?:#{ABS_URI}|#{REL_URI})?(?:##{FRAGMENT})?"
455
+ ret[:URI_REF] = "(?:#{abs_uri}|#{rel_uri})?(?:##{fragment})?"
143
456
 
144
- # XXX:
145
- X_ABS_URI = "
146
- (#{PATTERN::SCHEME}): (?# 1: scheme)
457
+ ret[:X_ABS_URI] = "
458
+ (#{scheme}): (?# 1: scheme)
147
459
  (?:
148
- (#{PATTERN::OPAQUE_PART}) (?# 2: opaque)
460
+ (#{opaque_part}) (?# 2: opaque)
149
461
  |
150
462
  (?:(?:
151
463
  //(?:
152
- (?:(?:(#{PATTERN::USERINFO})@)? (?# 3: userinfo)
153
- (?:(#{PATTERN::HOST})(?::(\\d*))?))?(?# 4: host, 5: port)
464
+ (?:(?:(#{userinfo})@)? (?# 3: userinfo)
465
+ (?:(#{host})(?::(\\d*))?))? (?# 4: host, 5: port)
154
466
  |
155
- (#{PATTERN::REG_NAME}) (?# 6: registry)
467
+ (#{reg_name}) (?# 6: registry)
156
468
  )
157
469
  |
158
- (?!//)) (?# XXX: '//' is the mark for hostport)
159
- (#{PATTERN::ABS_PATH})? (?# 7: path)
160
- )(?:\\?(#{PATTERN::QUERY}))? (?# 8: query)
470
+ (?!//)) (?# XXX: '//' is the mark for hostport)
471
+ (#{abs_path})? (?# 7: path)
472
+ )(?:\\?(#{query}))? (?# 8: query)
161
473
  )
162
- (?:\\#(#{PATTERN::FRAGMENT}))? (?# 9: fragment)
474
+ (?:\\#(#{fragment}))? (?# 9: fragment)
163
475
  "
164
- X_REL_URI = "
476
+
477
+ ret[:X_REL_URI] = "
165
478
  (?:
166
479
  (?:
167
480
  //
168
481
  (?:
169
- (?:(#{PATTERN::USERINFO})@)? (?# 1: userinfo)
170
- (#{PATTERN::HOST})?(?::(\\d*))? (?# 2: host, 3: port)
482
+ (?:(#{userinfo})@)? (?# 1: userinfo)
483
+ (#{host})?(?::(\\d*))? (?# 2: host, 3: port)
171
484
  |
172
- (#{PATTERN::REG_NAME}) (?# 4: registry)
485
+ (#{reg_name}) (?# 4: registry)
173
486
  )
174
487
  )
175
488
  |
176
- (#{PATTERN::REL_SEGMENT}) (?# 5: rel_segment)
489
+ (#{rel_segment}) (?# 5: rel_segment)
177
490
  )?
178
- (#{PATTERN::ABS_PATH})? (?# 6: abs_path)
179
- (?:\\?(#{PATTERN::QUERY}))? (?# 7: query)
180
- (?:\\#(#{PATTERN::FRAGMENT}))? (?# 8: fragment)
491
+ (#{abs_path})? (?# 6: abs_path)
492
+ (?:\\?(#{query}))? (?# 7: query)
493
+ (?:\\#(#{fragment}))? (?# 8: fragment)
181
494
  "
182
- # :startdoc:
183
- end # PATTERN
184
495
 
185
- # :stopdoc:
186
-
187
- # for URI::split
188
- ABS_URI = Regexp.new('^' + PATTERN::X_ABS_URI + '$', #'
189
- Regexp::EXTENDED, 'N').freeze
190
- REL_URI = Regexp.new('^' + PATTERN::X_REL_URI + '$', #'
191
- Regexp::EXTENDED, 'N').freeze
192
-
193
- # for URI::extract
194
- URI_REF = Regexp.new(PATTERN::URI_REF, false, 'N').freeze
195
- ABS_URI_REF = Regexp.new(PATTERN::X_ABS_URI, Regexp::EXTENDED, 'N').freeze
196
- REL_URI_REF = Regexp.new(PATTERN::X_REL_URI, Regexp::EXTENDED, 'N').freeze
197
-
198
- # for URI::escape/unescape
199
- ESCAPED = Regexp.new(PATTERN::ESCAPED, false, 'N').freeze
200
- UNSAFE = Regexp.new("[^#{PATTERN::UNRESERVED}#{PATTERN::RESERVED}]",
201
- false, 'N').freeze
202
-
203
- # for Generic#initialize
204
- SCHEME = Regexp.new("^#{PATTERN::SCHEME}$", false, 'N').freeze #"
205
- USERINFO = Regexp.new("^#{PATTERN::USERINFO}$", false, 'N').freeze #"
206
- HOST = Regexp.new("^#{PATTERN::HOST}$", false, 'N').freeze #"
207
- PORT = Regexp.new("^#{PATTERN::PORT}$", false, 'N').freeze #"
208
- OPAQUE = Regexp.new("^#{PATTERN::OPAQUE_PART}$", false, 'N').freeze #"
209
- REGISTRY = Regexp.new("^#{PATTERN::REG_NAME}$", false, 'N').freeze #"
210
- ABS_PATH = Regexp.new("^#{PATTERN::ABS_PATH}$", false, 'N').freeze #"
211
- REL_PATH = Regexp.new("^#{PATTERN::REL_PATH}$", false, 'N').freeze #"
212
- QUERY = Regexp.new("^#{PATTERN::QUERY}$", false, 'N').freeze #"
213
- FRAGMENT = Regexp.new("^#{PATTERN::FRAGMENT}$", false, 'N').freeze #"
214
- # :startdoc:
215
- end # REGEXP
496
+ ret
497
+ end
498
+
499
+ # Constructs the default Hash of Regexp's
500
+ def initialize_regexp(pattern)
501
+ ret = {}
502
+
503
+ # for URI::split
504
+ ret[:ABS_URI] = Regexp.new('\A\s*' + pattern[:X_ABS_URI] + '\s*\z', Regexp::EXTENDED)
505
+ ret[:REL_URI] = Regexp.new('\A\s*' + pattern[:X_REL_URI] + '\s*\z', Regexp::EXTENDED)
506
+
507
+ # for URI::extract
508
+ ret[:URI_REF] = Regexp.new(pattern[:URI_REF])
509
+ ret[:ABS_URI_REF] = Regexp.new(pattern[:X_ABS_URI], Regexp::EXTENDED)
510
+ ret[:REL_URI_REF] = Regexp.new(pattern[:X_REL_URI], Regexp::EXTENDED)
511
+
512
+ # for URI::escape/unescape
513
+ ret[:ESCAPED] = Regexp.new(pattern[:ESCAPED])
514
+ ret[:UNSAFE] = Regexp.new("[^#{pattern[:UNRESERVED]}#{pattern[:RESERVED]}]")
515
+
516
+ # for Generic#initialize
517
+ ret[:SCHEME] = Regexp.new("\\A#{pattern[:SCHEME]}\\z")
518
+ ret[:USERINFO] = Regexp.new("\\A#{pattern[:USERINFO]}\\z")
519
+ ret[:HOST] = Regexp.new("\\A#{pattern[:HOST]}\\z")
520
+ ret[:PORT] = Regexp.new("\\A#{pattern[:PORT]}\\z")
521
+ ret[:OPAQUE] = Regexp.new("\\A#{pattern[:OPAQUE_PART]}\\z")
522
+ ret[:REGISTRY] = Regexp.new("\\A#{pattern[:REG_NAME]}\\z")
523
+ ret[:ABS_PATH] = Regexp.new("\\A#{pattern[:ABS_PATH]}\\z")
524
+ ret[:REL_PATH] = Regexp.new("\\A#{pattern[:REL_PATH]}\\z")
525
+ ret[:QUERY] = Regexp.new("\\A#{pattern[:QUERY]}\\z")
526
+ ret[:FRAGMENT] = Regexp.new("\\A#{pattern[:FRAGMENT]}\\z")
527
+
528
+ ret
529
+ end
530
+
531
+ def convert_to_uri(uri)
532
+ if uri.is_a?(URI::Generic)
533
+ uri
534
+ elsif uri = String.try_convert(uri)
535
+ parse(uri)
536
+ else
537
+ raise ArgumentError,
538
+ "bad argument (expected URI object or URI string)"
539
+ end
540
+ end
541
+
542
+ end # class Parser
543
+
544
+ # URI::Parser.new
545
+ DEFAULT_PARSER = Parser.new
546
+ DEFAULT_PARSER.pattern.each_pair do |sym, str|
547
+ unless REGEXP::PATTERN.const_defined?(sym)
548
+ REGEXP::PATTERN.const_set(sym, str)
549
+ end
550
+ end
551
+ DEFAULT_PARSER.regexp.each_pair do |sym, str|
552
+ const_set(sym, str)
553
+ end
216
554
 
217
555
  module Util # :nodoc:
218
556
  def make_components_hash(klass, array_hash)
@@ -236,7 +574,7 @@ module URI
236
574
  end
237
575
  end
238
576
  else
239
- raise ArgumentError,
577
+ raise ArgumentError,
240
578
  "expected Array of or Hash of components of #{klass.to_s} (#{klass.component[1..-1].join(', ')})"
241
579
  end
242
580
  tmp[:scheme] = klass.to_s.sub(/\A.*::/, '').downcase
@@ -246,9 +584,8 @@ module URI
246
584
  module_function :make_components_hash
247
585
  end
248
586
 
587
+ # module for escaping unsafe characters with codes.
249
588
  module Escape
250
- include REGEXP
251
-
252
589
  #
253
590
  # == Synopsis
254
591
  #
@@ -281,18 +618,9 @@ module URI
281
618
  # p URI.escape("@?@!", "!?")
282
619
  # # => "@%3F@%21"
283
620
  #
284
- def escape(str, unsafe = UNSAFE)
285
- unless unsafe.kind_of?(Regexp)
286
- # perhaps unsafe is String object
287
- unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false, 'N')
288
- end
289
- str.gsub(unsafe) do |us|
290
- tmp = ''
291
- us.each_byte do |uc|
292
- tmp << sprintf('%%%02X', uc)
293
- end
294
- tmp
295
- end
621
+ def escape(*arg)
622
+ warn "#{caller(1)[0]}: warning: URI.escape is obsolete" if $VERBOSE
623
+ DEFAULT_PARSER.escape(*arg)
296
624
  end
297
625
  alias encode escape
298
626
  #
@@ -316,19 +644,22 @@ module URI
316
644
  # p URI.unescape(enc_uri)
317
645
  # # => "http://example.com/?a=\t\r"
318
646
  #
319
- def unescape(str)
320
- str.gsub(ESCAPED) do
321
- $&[1,2].hex.chr
322
- end
647
+ def unescape(*arg)
648
+ warn "#{caller(1)[0]}: warning: URI.unescape is obsolete" if $VERBOSE
649
+ DEFAULT_PARSER.unescape(*arg)
323
650
  end
324
651
  alias decode unescape
325
- end
652
+ end # module Escape
326
653
 
327
- include REGEXP
328
654
  extend Escape
655
+ include REGEXP
329
656
 
330
657
  @@schemes = {}
331
-
658
+ # Returns a Hash of the defined schemes
659
+ def self.scheme_list
660
+ @@schemes
661
+ end
662
+
332
663
  #
333
664
  # Base class for all URI exceptions.
334
665
  #
@@ -369,7 +700,7 @@ module URI
369
700
  # * Opaque
370
701
  # * Query
371
702
  # * Fragment
372
- #
703
+ #
373
704
  # == Usage
374
705
  #
375
706
  # require 'uri'
@@ -378,75 +709,7 @@ module URI
378
709
  # # => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil]
379
710
  #
380
711
  def self.split(uri)
381
- case uri
382
- when ''
383
- # null uri
384
-
385
- when ABS_URI
386
- scheme, opaque, userinfo, host, port,
387
- registry, path, query, fragment = $~[1..-1]
388
-
389
- # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
390
-
391
- # absoluteURI = scheme ":" ( hier_part | opaque_part )
392
- # hier_part = ( net_path | abs_path ) [ "?" query ]
393
- # opaque_part = uric_no_slash *uric
394
-
395
- # abs_path = "/" path_segments
396
- # net_path = "//" authority [ abs_path ]
397
-
398
- # authority = server | reg_name
399
- # server = [ [ userinfo "@" ] hostport ]
400
-
401
- if !scheme
402
- raise InvalidURIError,
403
- "bad URI(absolute but no scheme): #{uri}"
404
- end
405
- if !opaque && (!path && (!host && !registry))
406
- raise InvalidURIError,
407
- "bad URI(absolute but no path): #{uri}"
408
- end
409
-
410
- when REL_URI
411
- scheme = nil
412
- opaque = nil
413
-
414
- userinfo, host, port, registry,
415
- rel_segment, abs_path, query, fragment = $~[1..-1]
416
- if rel_segment && abs_path
417
- path = rel_segment + abs_path
418
- elsif rel_segment
419
- path = rel_segment
420
- elsif abs_path
421
- path = abs_path
422
- end
423
-
424
- # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
425
-
426
- # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
427
-
428
- # net_path = "//" authority [ abs_path ]
429
- # abs_path = "/" path_segments
430
- # rel_path = rel_segment [ abs_path ]
431
-
432
- # authority = server | reg_name
433
- # server = [ [ userinfo "@" ] hostport ]
434
-
435
- else
436
- raise InvalidURIError, "bad URI(is not URI?): #{uri}"
437
- end
438
-
439
- path = '' if !path && !opaque # (see RFC2396 Section 5.2)
440
- ret = [
441
- scheme,
442
- userinfo, host, port, # X
443
- registry, # X
444
- path, # Y
445
- opaque, # Y
446
- query,
447
- fragment
448
- ]
449
- return ret
712
+ DEFAULT_PARSER.split(uri)
450
713
  end
451
714
 
452
715
  #
@@ -462,7 +725,7 @@ module URI
462
725
  # == Description
463
726
  #
464
727
  # Creates one of the URI's subclasses instance from the string.
465
- #
728
+ #
466
729
  # == Raises
467
730
  #
468
731
  # URI::InvalidURIError
@@ -475,24 +738,13 @@ module URI
475
738
  # uri = URI.parse("http://www.ruby-lang.org/")
476
739
  # p uri
477
740
  # # => #<URI::HTTP:0x202281be URL:http://www.ruby-lang.org/>
478
- # p uri.scheme
479
- # # => "http"
480
- # p uri.host
481
- # # => "www.ruby-lang.org"
482
- #
741
+ # p uri.scheme
742
+ # # => "http"
743
+ # p uri.host
744
+ # # => "www.ruby-lang.org"
745
+ #
483
746
  def self.parse(uri)
484
- scheme, userinfo, host, port,
485
- registry, path, opaque, query, fragment = self.split(uri)
486
-
487
- if scheme && @@schemes.include?(scheme.upcase)
488
- @@schemes[scheme.upcase].new(scheme, userinfo, host, port,
489
- registry, path, opaque, query,
490
- fragment)
491
- else
492
- Generic.new(scheme, userinfo, host, port,
493
- registry, path, opaque, query,
494
- fragment)
495
- end
747
+ DEFAULT_PARSER.parse(uri)
496
748
  end
497
749
 
498
750
  #
@@ -513,15 +765,24 @@ module URI
513
765
  #
514
766
  # require 'uri'
515
767
  #
516
- # p URI.join("http://localhost/","main.rbx")
768
+ # p URI.join("http://example.com/","main.rbx")
517
769
  # # => #<URI::HTTP:0x2022ac02 URL:http://localhost/main.rbx>
518
770
  #
771
+ # p URI.join('http://example.com', 'foo')
772
+ # # => #<URI::HTTP:0x01ab80a0 URL:http://example.com/foo>
773
+ #
774
+ # p URI.join('http://example.com', '/foo', '/bar')
775
+ # # => #<URI::HTTP:0x01aaf0b0 URL:http://example.com/bar>
776
+ #
777
+ # p URI.join('http://example.com', '/foo', 'bar')
778
+ # # => #<URI::HTTP:0x801a92af0 URL:http://example.com/bar>
779
+ #
780
+ # p URI.join('http://example.com', '/foo/', 'bar')
781
+ # # => #<URI::HTTP:0x80135a3a0 URL:http://example.com/foo/bar>
782
+ #
783
+ #
519
784
  def self.join(*str)
520
- u = self.parse(str[0])
521
- str[1 .. -1].each do |x|
522
- u = u.merge(x)
523
- end
524
- u
785
+ DEFAULT_PARSER.join(*str)
525
786
  end
526
787
 
527
788
  #
@@ -531,7 +792,7 @@ module URI
531
792
  #
532
793
  # == Args
533
794
  #
534
- # +str+::
795
+ # +str+::
535
796
  # String to extract URIs from.
536
797
  # +schemes+::
537
798
  # Limit URI matching to a specific schemes.
@@ -549,14 +810,7 @@ module URI
549
810
  # # => ["http://foo.example.com/bla", "mailto:test@example.com"]
550
811
  #
551
812
  def self.extract(str, schemes = nil, &block)
552
- if block_given?
553
- str.scan(regexp(schemes)) { yield $& }
554
- nil
555
- else
556
- result = []
557
- str.scan(regexp(schemes)) { result.push $& }
558
- result
559
- end
813
+ DEFAULT_PARSER.extract(str, schemes, &block)
560
814
  end
561
815
 
562
816
  #
@@ -566,48 +820,182 @@ module URI
566
820
  #
567
821
  # == Args
568
822
  #
569
- # +match_schemes+::
823
+ # +match_schemes+::
570
824
  # Array of schemes. If given, resulting regexp matches to URIs
571
825
  # whose scheme is one of the match_schemes.
572
- #
826
+ #
573
827
  # == Description
574
828
  # Returns a Regexp object which matches to URI-like strings.
575
829
  # The Regexp object returned by this method includes arbitrary
576
830
  # number of capture group (parentheses). Never rely on it's number.
577
- #
831
+ #
578
832
  # == Usage
579
833
  #
580
834
  # require 'uri'
581
835
  #
582
836
  # # extract first URI from html_string
583
837
  # html_string.slice(URI.regexp)
584
- #
838
+ #
585
839
  # # remove ftp URIs
586
840
  # html_string.sub(URI.regexp(['ftp'])
587
- #
841
+ #
588
842
  # # You should not rely on the number of parentheses
589
843
  # html_string.scan(URI.regexp) do |*matches|
590
844
  # p $&
591
845
  # end
592
846
  #
593
847
  def self.regexp(schemes = nil)
594
- unless schemes
595
- ABS_URI_REF
848
+ DEFAULT_PARSER.make_regexp(schemes)
849
+ end
850
+
851
+ TBLENCWWWCOMP_ = {} # :nodoc:
852
+ 256.times do |i|
853
+ TBLENCWWWCOMP_[i.chr] = '%%%02X' % i
854
+ end
855
+ TBLENCWWWCOMP_[' '] = '+'
856
+ TBLENCWWWCOMP_.freeze
857
+ TBLDECWWWCOMP_ = {} # :nodoc:
858
+ 256.times do |i|
859
+ h, l = i>>4, i&15
860
+ TBLDECWWWCOMP_['%%%X%X' % [h, l]] = i.chr
861
+ TBLDECWWWCOMP_['%%%x%X' % [h, l]] = i.chr
862
+ TBLDECWWWCOMP_['%%%X%x' % [h, l]] = i.chr
863
+ TBLDECWWWCOMP_['%%%x%x' % [h, l]] = i.chr
864
+ end
865
+ TBLDECWWWCOMP_['+'] = ' '
866
+ TBLDECWWWCOMP_.freeze
867
+
868
+ HTML5ASCIIINCOMPAT = [Encoding::UTF_7, Encoding::UTF_16BE, Encoding::UTF_16LE,
869
+ Encoding::UTF_32BE, Encoding::UTF_32LE] # :nodoc:
870
+
871
+ # Encode given +str+ to URL-encoded form data.
872
+ #
873
+ # This method doesn't convert *, -, ., 0-9, A-Z, _, a-z, but does convert SP
874
+ # (ASCII space) to + and converts others to %XX.
875
+ #
876
+ # This is an implementation of
877
+ # http://www.w3.org/TR/html5/association-of-controls-and-forms.html#url-encoded-form-data
878
+ #
879
+ # See URI.decode_www_form_component, URI.encode_www_form
880
+ def self.encode_www_form_component(str)
881
+ str = str.to_s
882
+ if HTML5ASCIIINCOMPAT.include?(str.encoding)
883
+ str = str.encode(Encoding::UTF_8)
596
884
  else
597
- /(?=#{Regexp.union(*schemes)}:)#{PATTERN::X_ABS_URI}/xn
885
+ str = str.dup
598
886
  end
887
+ str.force_encoding(Encoding::ASCII_8BIT)
888
+ str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
889
+ str.force_encoding(Encoding::US_ASCII)
599
890
  end
600
891
 
601
- end
892
+ # Decode given +str+ of URL-encoded form data.
893
+ #
894
+ # This decodes + to SP.
895
+ #
896
+ # See URI.encode_www_form_component, URI.decode_www_form
897
+ def self.decode_www_form_component(str, enc=Encoding::UTF_8)
898
+ raise ArgumentError, "invalid %-encoding (#{str})" unless /\A[^%]*(?:%\h\h[^%]*)*\z/ =~ str
899
+ str.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
900
+ end
602
901
 
603
- module Kernel
604
- # alias for URI.parse.
902
+ # Generate URL-encoded form data from given +enum+.
903
+ #
904
+ # This generates application/x-www-form-urlencoded data defined in HTML5
905
+ # from given an Enumerable object.
906
+ #
907
+ # This internally uses URI.encode_www_form_component(str).
908
+ #
909
+ # This method doesn't convert the encoding of given items, so convert them
910
+ # before call this method if you want to send data as other than original
911
+ # encoding or mixed encoding data. (Strings which are encoded in an HTML5
912
+ # ASCII incompatible encoding are converted to UTF-8.)
913
+ #
914
+ # This method doesn't handle files. When you send a file, use
915
+ # multipart/form-data.
916
+ #
917
+ # This is an implementation of
918
+ # http://www.w3.org/TR/html5/forms.html#url-encoded-form-data
919
+ #
920
+ # URI.encode_www_form([["q", "ruby"], ["lang", "en"]])
921
+ # #=> "q=ruby&lang=en"
922
+ # URI.encode_www_form("q" => "ruby", "lang" => "en")
923
+ # #=> "q=ruby&lang=en"
924
+ # URI.encode_www_form("q" => ["ruby", "perl"], "lang" => "en")
925
+ # #=> "q=ruby&q=perl&lang=en"
926
+ # URI.encode_www_form([["q", "ruby"], ["q", "perl"], ["lang", "en"]])
927
+ # #=> "q=ruby&q=perl&lang=en"
928
+ #
929
+ # See URI.encode_www_form_component, URI.decode_www_form
930
+ def self.encode_www_form(enum)
931
+ enum.map do |k,v|
932
+ if v.nil?
933
+ encode_www_form_component(k)
934
+ elsif v.respond_to?(:to_ary)
935
+ v.to_ary.map do |w|
936
+ str = encode_www_form_component(k)
937
+ unless w.nil?
938
+ str << '='
939
+ str << encode_www_form_component(w)
940
+ end
941
+ end.join('&')
942
+ else
943
+ str = encode_www_form_component(k)
944
+ str << '='
945
+ str << encode_www_form_component(v)
946
+ end
947
+ end.join('&')
948
+ end
949
+
950
+ WFKV_ = '(?:[^%#=;&]*(?:%\h\h[^%#=;&]*)*)' # :nodoc:
951
+
952
+ # Decode URL-encoded form data from given +str+.
953
+ #
954
+ # This decodes application/x-www-form-urlencoded data
955
+ # and returns array of key-value array.
956
+ # This internally uses URI.decode_www_form_component.
605
957
  #
606
- # This method is introduced at 1.8.2.
607
- def URI(uri_str) # :doc:
608
- return uri_str if uri_str.is_a? URI
958
+ # _charset_ hack is not supported now because the mapping from given charset
959
+ # to Ruby's encoding is not clear yet.
960
+ # see also http://www.w3.org/TR/html5/syntax.html#character-encodings-0
961
+ #
962
+ # This refers http://www.w3.org/TR/html5/forms.html#url-encoded-form-data
963
+ #
964
+ # ary = URI.decode_www_form("a=1&a=2&b=3")
965
+ # p ary #=> [['a', '1'], ['a', '2'], ['b', '3']]
966
+ # p ary.assoc('a').last #=> '1'
967
+ # p ary.assoc('b').last #=> '3'
968
+ # p ary.rassoc('a').last #=> '2'
969
+ # p Hash[ary] # => {"a"=>"2", "b"=>"3"}
970
+ #
971
+ # See URI.decode_www_form_component, URI.encode_www_form
972
+ def self.decode_www_form(str, enc=Encoding::UTF_8)
973
+ return [] if str.empty?
974
+ unless /\A#{WFKV_}=#{WFKV_}(?:[;&]#{WFKV_}=#{WFKV_})*\z/o =~ str
975
+ raise ArgumentError, "invalid data of application/x-www-form-urlencoded (#{str})"
976
+ end
977
+ ary = []
978
+ $&.scan(/([^=;&]+)=([^;&]*)/) do
979
+ ary << [decode_www_form_component($1, enc), decode_www_form_component($2, enc)]
980
+ end
981
+ ary
982
+ end
983
+ end # module URI
984
+
985
+ module Kernel
609
986
 
610
- URI.parse(uri_str)
987
+ #
988
+ # Returns +uri+ converted to a URI object.
989
+ #
990
+ def URI(uri)
991
+ if uri.is_a?(URI::Generic)
992
+ uri
993
+ elsif uri = String.try_convert(uri)
994
+ URI.parse(uri)
995
+ else
996
+ raise ArgumentError,
997
+ "bad argument (expected URI object or URI string)"
998
+ end
611
999
  end
612
1000
  module_function :URI
613
1001
  end