rubysl-uri 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9dc1de2a61a2cb733ce807462f966d2011278ba6
4
- data.tar.gz: fc2994a37404922968e36219d439a21e2d906e6d
3
+ metadata.gz: 1cdbb4d6b18672fb1ef0eea47a1b42be33ba153c
4
+ data.tar.gz: a83c4da2e152500850a8eef0b8a93c9e795bdb58
5
5
  SHA512:
6
- metadata.gz: 4d7d26cb2bcdf7a8dbe255b37012717439d9b79c901a056c0f3e6e6c4457807f3552be29252472883d15ec2ea9e27dfc80e255a36edf467c04c36d405499579c
7
- data.tar.gz: 7915d126fdea40be02cde056bde73a3b51f400502c29099e172c3db08a67025f95103933e1c71ad94f78b2425060c8f40feea91fd39a066a4d96de8f97dd880d
6
+ metadata.gz: ff5396567881b630b8e5cf085c9d4f0ba53bd98023ac2c3bcdd5e722d7f438eb4cca350b298b1b390e8f2080d96bfbd630c5245273e4c0a8ab1bd412e2edbf3f
7
+ data.tar.gz: 0eae37ca085e4c68a2f54848311226c7064deafbd7bbe9f8aebb5a0bf2d42eef30f7f8cb6058eeef5d7163ef9c6b843f11d0476185107922bdc2971273a2aeae
@@ -3,5 +3,5 @@ env:
3
3
  - RUBYLIB=lib
4
4
  script: bundle exec mspec
5
5
  rvm:
6
- - 1.8.7
7
- - rbx-nightly-18mode
6
+ - 1.9.3
7
+ - rbx-nightly-19mode
@@ -1,2 +1,2 @@
1
- require "rubysl/uri/uri"
2
1
  require "rubysl/uri/version"
2
+ require "rubysl/uri/uri"
@@ -1,14 +1,96 @@
1
+ # URI is a module providing classes to handle Uniform Resource Identifiers
2
+ # (RFC2396[http://tools.ietf.org/html/rfc2396])
1
3
  #
2
- # URI support for Ruby
4
+ # == Features
5
+ #
6
+ # * Uniform handling of handling URIs
7
+ # * Flexibility to introduce custom URI schemes
8
+ # * Flexibility to have an alternate URI::Parser (or just different patterns
9
+ # and regexp's)
10
+ #
11
+ # == Basic example
12
+ #
13
+ # require 'uri'
14
+ #
15
+ # uri = URI("http://foo.com/posts?id=30&limit=5#time=1305298413")
16
+ # #=> #<URI::HTTP:0x00000000b14880
17
+ # URL:http://foo.com/posts?id=30&limit=5#time=1305298413>
18
+ # uri.scheme
19
+ # #=> "http"
20
+ # uri.host
21
+ # #=> "foo.com"
22
+ # uri.path
23
+ # #=> "/posts"
24
+ # uri.query
25
+ # #=> "id=30&limit=5"
26
+ # uri.fragment
27
+ # #=> "time=1305298413"
28
+ #
29
+ # uri.to_s
30
+ # #=> "http://foo.com/posts?id=30&limit=5#time=1305298413"
31
+ #
32
+ # == Adding custom URIs
33
+ #
34
+ # module URI
35
+ # class RSYNC < Generic
36
+ # DEFAULT_PORT = 873
37
+ # end
38
+ # @@schemes['RSYNC'] = RSYNC
39
+ # end
40
+ # #=> URI::RSYNC
41
+ #
42
+ # URI.scheme_list
43
+ # #=> {"FTP"=>URI::FTP, "HTTP"=>URI::HTTP, "HTTPS"=>URI::HTTPS,
44
+ # "LDAP"=>URI::LDAP, "LDAPS"=>URI::LDAPS, "MAILTO"=>URI::MailTo,
45
+ # "RSYNC"=>URI::RSYNC}
46
+ #
47
+ # uri = URI("rsync://rsync.foo.com")
48
+ # #=> #<URI::RSYNC:0x00000000f648c8 URL:rsync://rsync.foo.com>
49
+ #
50
+ # == RFC References
51
+ #
52
+ # A good place to view an RFC spec is http://www.ietf.org/rfc.html
53
+ #
54
+ # Here is a list of all related RFC's.
55
+ # - RFC822[http://tools.ietf.org/html/rfc822]
56
+ # - RFC1738[http://tools.ietf.org/html/rfc1738]
57
+ # - RFC2255[http://tools.ietf.org/html/rfc2255]
58
+ # - RFC2368[http://tools.ietf.org/html/rfc2368]
59
+ # - RFC2373[http://tools.ietf.org/html/rfc2373]
60
+ # - RFC2396[http://tools.ietf.org/html/rfc2396]
61
+ # - RFC2732[http://tools.ietf.org/html/rfc2732]
62
+ # - RFC3986[http://tools.ietf.org/html/rfc3986]
63
+ #
64
+ # == Class tree
65
+ #
66
+ # - URI::Generic (in uri/generic.rb)
67
+ # - URI::FTP - (in uri/ftp.rb)
68
+ # - URI::HTTP - (in uri/http.rb)
69
+ # - URI::HTTPS - (in uri/https.rb)
70
+ # - URI::LDAP - (in uri/ldap.rb)
71
+ # - URI::LDAPS - (in uri/ldaps.rb)
72
+ # - URI::MailTo - (in uri/mailto.rb)
73
+ # - URI::Parser - (in uri/common.rb)
74
+ # - URI::REGEXP - (in uri/common.rb)
75
+ # - URI::REGEXP::PATTERN - (in uri/common.rb)
76
+ # - URI::Util - (in uri/common.rb)
77
+ # - URI::Escape - (in uri/common.rb)
78
+ # - URI::Error - (in uri/common.rb)
79
+ # - URI::InvalidURIError - (in uri/common.rb)
80
+ # - URI::InvalidComponentError - (in uri/common.rb)
81
+ # - URI::BadURIError - (in uri/common.rb)
82
+ #
83
+ # == Copyright Info
3
84
  #
4
85
  # Author:: Akira Yamada <akira@ruby-lang.org>
5
- # Documentation:: Akira Yamada <akira@ruby-lang.org>, Dmitry V. Sabanin <sdmitry@lrn.ru>
6
- # License::
86
+ # Documentation::
87
+ # Akira Yamada <akira@ruby-lang.org>
88
+ # Dmitry V. Sabanin <sdmitry@lrn.ru>
89
+ # Vincent Batts <vbatts@hashbangbash.com>
90
+ # License::
7
91
  # Copyright (c) 2001 akira yamada <akira@ruby-lang.org>
8
92
  # You can redistribute it and/or modify it under the same term as Ruby.
9
- # Revision:: $Id: uri.rb 16038 2008-04-15 09:41:47Z kazu $
10
- #
11
- # See URI for documentation
93
+ # Revision:: $Id$
12
94
  #
13
95
 
14
96
  module URI
@@ -1,5 +1,5 @@
1
1
  module RubySL
2
2
  module URI
3
- VERSION = "1.0.0"
3
+ VERSION = "2.0.0"
4
4
  end
5
5
  end
@@ -1,12 +1,18 @@
1
+ #--
1
2
  # = uri/common.rb
2
3
  #
3
4
  # Author:: Akira Yamada <akira@ruby-lang.org>
4
- # Revision:: $Id: common.rb 14178 2007-12-10 09:31:55Z matz $
5
- # License::
5
+ # Revision:: $Id$
6
+ # License::
6
7
  # You can redistribute it and/or modify it under the same term as Ruby.
7
8
  #
9
+ # See URI for general documentation
10
+ #
8
11
 
9
12
  module URI
13
+ #
14
+ # Includes URI::REGEXP::PATTERN
15
+ #
10
16
  module REGEXP
11
17
  #
12
18
  # Patterns used to parse URI's
@@ -31,29 +37,336 @@ module URI
31
37
  # mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
32
38
  # "(" | ")"
33
39
  # unreserved = alphanum | mark
34
- UNRESERVED = "-_.!~*'()#{ALNUM}"
40
+ UNRESERVED = "\\-_.!~*'()#{ALNUM}"
35
41
  # reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
36
42
  # "$" | ","
37
- # reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
43
+ # reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
38
44
  # "$" | "," | "[" | "]" (RFC 2732)
39
45
  RESERVED = ";/?:@&=+$,\\[\\]"
40
46
 
47
+ # domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
48
+ DOMLABEL = "(?:[#{ALNUM}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
49
+ # toplabel = alpha | alpha *( alphanum | "-" ) alphanum
50
+ TOPLABEL = "(?:[#{ALPHA}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
51
+ # hostname = *( domainlabel "." ) toplabel [ "." ]
52
+ HOSTNAME = "(?:#{DOMLABEL}\\.)*#{TOPLABEL}\\.?"
53
+
54
+ # :startdoc:
55
+ end # PATTERN
56
+
57
+ # :startdoc:
58
+ end # REGEXP
59
+
60
+ # class that Parses String's into URI's
61
+ #
62
+ # It contains a Hash set of patterns and Regexp's that match and validate.
63
+ #
64
+ class Parser
65
+ include REGEXP
66
+
67
+ #
68
+ # == Synopsis
69
+ #
70
+ # URI::Parser.new([opts])
71
+ #
72
+ # == Args
73
+ #
74
+ # The constructor accepts a hash as options for parser.
75
+ # Keys of options are pattern names of URI components
76
+ # and values of options are pattern strings.
77
+ # The constructor generetes set of regexps for parsing URIs.
78
+ #
79
+ # You can use the following keys:
80
+ #
81
+ # * :ESCAPED (URI::PATTERN::ESCAPED in default)
82
+ # * :UNRESERVED (URI::PATTERN::UNRESERVED in default)
83
+ # * :DOMLABEL (URI::PATTERN::DOMLABEL in default)
84
+ # * :TOPLABEL (URI::PATTERN::TOPLABEL in default)
85
+ # * :HOSTNAME (URI::PATTERN::HOSTNAME in default)
86
+ #
87
+ # == Examples
88
+ #
89
+ # p = URI::Parser.new(:ESCAPED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})")
90
+ # u = p.parse("http://example.jp/%uABCD") #=> #<URI::HTTP:0xb78cf4f8 URL:http://example.jp/%uABCD>
91
+ # URI.parse(u.to_s) #=> raises URI::InvalidURIError
92
+ #
93
+ # s = "http://examle.com/ABCD"
94
+ # u1 = p.parse(s) #=> #<URI::HTTP:0xb78c3220 URL:http://example.com/ABCD>
95
+ # u2 = URI.parse(s) #=> #<URI::HTTP:0xb78b6d54 URL:http://example.com/ABCD>
96
+ # u1 == u2 #=> true
97
+ # u1.eql?(u2) #=> false
98
+ #
99
+ def initialize(opts = {})
100
+ @pattern = initialize_pattern(opts)
101
+ @pattern.each_value {|v| v.freeze}
102
+ @pattern.freeze
103
+
104
+ @regexp = initialize_regexp(@pattern)
105
+ @regexp.each_value {|v| v.freeze}
106
+ @regexp.freeze
107
+ end
108
+
109
+ # The Hash of patterns.
110
+ #
111
+ # see also URI::Parser.initialize_pattern
112
+ attr_reader :pattern
113
+
114
+ # The Hash of Regexp
115
+ #
116
+ # see also URI::Parser.initialize_regexp
117
+ attr_reader :regexp
118
+
119
+ # Returns a split URI against regexp[:ABS_URI]
120
+ def split(uri)
121
+ case uri
122
+ when ''
123
+ # null uri
124
+
125
+ when @regexp[:ABS_URI]
126
+ scheme, opaque, userinfo, host, port,
127
+ registry, path, query, fragment = $~[1..-1]
128
+
129
+ # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
130
+
131
+ # absoluteURI = scheme ":" ( hier_part | opaque_part )
132
+ # hier_part = ( net_path | abs_path ) [ "?" query ]
133
+ # opaque_part = uric_no_slash *uric
134
+
135
+ # abs_path = "/" path_segments
136
+ # net_path = "//" authority [ abs_path ]
137
+
138
+ # authority = server | reg_name
139
+ # server = [ [ userinfo "@" ] hostport ]
140
+
141
+ if !scheme
142
+ raise InvalidURIError,
143
+ "bad URI(absolute but no scheme): #{uri}"
144
+ end
145
+ if !opaque && (!path && (!host && !registry))
146
+ raise InvalidURIError,
147
+ "bad URI(absolute but no path): #{uri}"
148
+ end
149
+
150
+ when @regexp[:REL_URI]
151
+ scheme = nil
152
+ opaque = nil
153
+
154
+ userinfo, host, port, registry,
155
+ rel_segment, abs_path, query, fragment = $~[1..-1]
156
+ if rel_segment && abs_path
157
+ path = rel_segment + abs_path
158
+ elsif rel_segment
159
+ path = rel_segment
160
+ elsif abs_path
161
+ path = abs_path
162
+ end
163
+
164
+ # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
165
+
166
+ # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
167
+
168
+ # net_path = "//" authority [ abs_path ]
169
+ # abs_path = "/" path_segments
170
+ # rel_path = rel_segment [ abs_path ]
171
+
172
+ # authority = server | reg_name
173
+ # server = [ [ userinfo "@" ] hostport ]
174
+
175
+ else
176
+ raise InvalidURIError, "bad URI(is not URI?): #{uri}"
177
+ end
178
+
179
+ path = '' if !path && !opaque # (see RFC2396 Section 5.2)
180
+ ret = [
181
+ scheme,
182
+ userinfo, host, port, # X
183
+ registry, # X
184
+ path, # Y
185
+ opaque, # Y
186
+ query,
187
+ fragment
188
+ ]
189
+ return ret
190
+ end
191
+
192
+ #
193
+ # == Args
194
+ #
195
+ # +uri+::
196
+ # String
197
+ #
198
+ # == Description
199
+ #
200
+ # parses +uri+ and constructs either matching URI scheme object
201
+ # (FTP, HTTP, HTTPS, LDAP, LDAPS, or MailTo) or URI::Generic
202
+ #
203
+ # == Usage
204
+ #
205
+ # p = URI::Parser.new
206
+ # p.parse("ldap://ldap.example.com/dc=example?user=john")
207
+ # #=> #<URI::LDAP:0x00000000b9e7e8 URL:ldap://ldap.example.com/dc=example?user=john>
208
+ #
209
+ def parse(uri)
210
+ scheme, userinfo, host, port,
211
+ registry, path, opaque, query, fragment = self.split(uri)
212
+
213
+ if scheme && URI.scheme_list.include?(scheme.upcase)
214
+ URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port,
215
+ registry, path, opaque, query,
216
+ fragment, self)
217
+ else
218
+ Generic.new(scheme, userinfo, host, port,
219
+ registry, path, opaque, query,
220
+ fragment, self)
221
+ end
222
+ end
223
+
224
+
225
+ #
226
+ # == Args
227
+ #
228
+ # +uris+::
229
+ # an Array of Strings
230
+ #
231
+ # == Description
232
+ #
233
+ # Attempts to parse and merge a set of URIs
234
+ #
235
+ def join(*uris)
236
+ uris[0] = convert_to_uri(uris[0])
237
+ uris.inject :merge
238
+ end
239
+
240
+ #
241
+ # :call-seq:
242
+ # extract( str )
243
+ # extract( str, schemes )
244
+ # extract( str, schemes ) {|item| block }
245
+ #
246
+ # == Args
247
+ #
248
+ # +str+::
249
+ # String to search
250
+ # +schemes+::
251
+ # Patterns to apply to +str+
252
+ #
253
+ # == Description
254
+ #
255
+ # Attempts to parse and merge a set of URIs
256
+ # If no +block+ given , then returns the result,
257
+ # else it calls +block+ for each element in result.
258
+ #
259
+ # see also URI::Parser.make_regexp
260
+ #
261
+ def extract(str, schemes = nil)
262
+ if block_given?
263
+ str.scan(make_regexp(schemes)) { yield $& }
264
+ nil
265
+ else
266
+ result = []
267
+ str.scan(make_regexp(schemes)) { result.push $& }
268
+ result
269
+ end
270
+ end
271
+
272
+ # returns Regexp that is default self.regexp[:ABS_URI_REF],
273
+ # unless +schemes+ is provided. Then it is a Regexp.union with self.pattern[:X_ABS_URI]
274
+ def make_regexp(schemes = nil)
275
+ unless schemes
276
+ @regexp[:ABS_URI_REF]
277
+ else
278
+ /(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x
279
+ end
280
+ end
281
+
282
+ #
283
+ # :call-seq:
284
+ # escape( str )
285
+ # escape( str, unsafe )
286
+ #
287
+ # == Args
288
+ #
289
+ # +str+::
290
+ # String to make safe
291
+ # +unsafe+::
292
+ # Regexp to apply. Defaults to self.regexp[:UNSAFE]
293
+ #
294
+ # == Description
295
+ #
296
+ # constructs a safe String from +str+, removing unsafe characters,
297
+ # replacing them with codes.
298
+ #
299
+ def escape(str, unsafe = @regexp[:UNSAFE])
300
+ unless unsafe.kind_of?(Regexp)
301
+ # perhaps unsafe is String object
302
+ unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false)
303
+ end
304
+ str.gsub(unsafe) do
305
+ us = $&
306
+ tmp = ''
307
+ us.each_byte do |uc|
308
+ tmp << sprintf('%%%02X', uc)
309
+ end
310
+ tmp
311
+ end.force_encoding(Encoding::US_ASCII)
312
+ end
313
+
314
+ #
315
+ # :call-seq:
316
+ # unescape( str )
317
+ # unescape( str, unsafe )
318
+ #
319
+ # == Args
320
+ #
321
+ # +str+::
322
+ # String to remove escapes from
323
+ # +unsafe+::
324
+ # Regexp to apply. Defaults to self.regexp[:ESCAPED]
325
+ #
326
+ # == Description
327
+ #
328
+ # Removes escapes from +str+
329
+ #
330
+ def unescape(str, escaped = @regexp[:ESCAPED])
331
+ str.gsub(escaped) { [$&[1, 2].hex].pack('C') }.force_encoding(str.encoding)
332
+ end
333
+
334
+ @@to_s = Kernel.instance_method(:to_s)
335
+ def inspect
336
+ @@to_s.bind(self).call
337
+ end
338
+
339
+ private
340
+
341
+ # Constructs the default Hash of patterns
342
+ def initialize_pattern(opts = {})
343
+ ret = {}
344
+ ret[:ESCAPED] = escaped = (opts.delete(:ESCAPED) || PATTERN::ESCAPED)
345
+ ret[:UNRESERVED] = unreserved = opts.delete(:UNRESERVED) || PATTERN::UNRESERVED
346
+ ret[:RESERVED] = reserved = opts.delete(:RESERVED) || PATTERN::RESERVED
347
+ ret[:DOMLABEL] = opts.delete(:DOMLABEL) || PATTERN::DOMLABEL
348
+ ret[:TOPLABEL] = opts.delete(:TOPLABEL) || PATTERN::TOPLABEL
349
+ ret[:HOSTNAME] = hostname = opts.delete(:HOSTNAME)
350
+
351
+ # RFC 2396 (URI Generic Syntax)
352
+ # RFC 2732 (IPv6 Literal Addresses in URL's)
353
+ # RFC 2373 (IPv6 Addressing Architecture)
354
+
41
355
  # uric = reserved | unreserved | escaped
42
- URIC = "(?:[#{UNRESERVED}#{RESERVED}]|#{ESCAPED})"
356
+ ret[:URIC] = uric = "(?:[#{unreserved}#{reserved}]|#{escaped})"
43
357
  # uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
44
358
  # "&" | "=" | "+" | "$" | ","
45
- URIC_NO_SLASH = "(?:[#{UNRESERVED};?:@&=+$,]|#{ESCAPED})"
359
+ ret[:URIC_NO_SLASH] = uric_no_slash = "(?:[#{unreserved};?:@&=+$,]|#{escaped})"
46
360
  # query = *uric
47
- QUERY = "#{URIC}*"
361
+ ret[:QUERY] = query = "#{uric}*"
48
362
  # fragment = *uric
49
- FRAGMENT = "#{URIC}*"
363
+ ret[:FRAGMENT] = fragment = "#{uric}*"
50
364
 
51
- # domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
52
- DOMLABEL = "(?:[#{ALNUM}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
53
- # toplabel = alpha | alpha *( alphanum | "-" ) alphanum
54
- TOPLABEL = "(?:[#{ALPHA}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
55
365
  # hostname = *( domainlabel "." ) toplabel [ "." ]
56
- HOSTNAME = "(?:#{DOMLABEL}\\.)*#{TOPLABEL}\\.?"
366
+ # reg-name = *( unreserved / pct-encoded / sub-delims ) # RFC3986
367
+ unless hostname
368
+ ret[:HOSTNAME] = hostname = "(?:[a-zA-Z0-9\\-.]|%\\h\\h)+"
369
+ end
57
370
 
58
371
  # RFC 2373, APPENDIX B:
59
372
  # IPv6address = hexpart [ ":" IPv4address ]
@@ -66,153 +379,178 @@ module URI
66
379
  # allowed too. Here is a replacement.
67
380
  #
68
381
  # IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
69
- IPV4ADDR = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"
382
+ ret[:IPV4ADDR] = ipv4addr = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"
70
383
  # hex4 = 1*4HEXDIG
71
- HEX4 = "[#{HEX}]{1,4}"
384
+ hex4 = "[#{PATTERN::HEX}]{1,4}"
72
385
  # lastpart = hex4 | IPv4address
73
- LASTPART = "(?:#{HEX4}|#{IPV4ADDR})"
386
+ lastpart = "(?:#{hex4}|#{ipv4addr})"
74
387
  # hexseq1 = *( hex4 ":" ) hex4
75
- HEXSEQ1 = "(?:#{HEX4}:)*#{HEX4}"
388
+ hexseq1 = "(?:#{hex4}:)*#{hex4}"
76
389
  # hexseq2 = *( hex4 ":" ) lastpart
77
- HEXSEQ2 = "(?:#{HEX4}:)*#{LASTPART}"
390
+ hexseq2 = "(?:#{hex4}:)*#{lastpart}"
78
391
  # IPv6address = hexseq2 | [ hexseq1 ] "::" [ hexseq2 ]
79
- IPV6ADDR = "(?:#{HEXSEQ2}|(?:#{HEXSEQ1})?::(?:#{HEXSEQ2})?)"
392
+ ret[:IPV6ADDR] = ipv6addr = "(?:#{hexseq2}|(?:#{hexseq1})?::(?:#{hexseq2})?)"
80
393
 
81
394
  # IPv6prefix = ( hexseq1 | [ hexseq1 ] "::" [ hexseq1 ] ) "/" 1*2DIGIT
82
395
  # unused
83
396
 
84
397
  # ipv6reference = "[" IPv6address "]" (RFC 2732)
85
- IPV6REF = "\\[#{IPV6ADDR}\\]"
398
+ ret[:IPV6REF] = ipv6ref = "\\[#{ipv6addr}\\]"
86
399
 
87
400
  # host = hostname | IPv4address
88
401
  # host = hostname | IPv4address | IPv6reference (RFC 2732)
89
- HOST = "(?:#{HOSTNAME}|#{IPV4ADDR}|#{IPV6REF})"
402
+ ret[:HOST] = host = "(?:#{hostname}|#{ipv4addr}|#{ipv6ref})"
90
403
  # port = *digit
91
- PORT = '\d*'
404
+ port = '\d*'
92
405
  # hostport = host [ ":" port ]
93
- HOSTPORT = "#{HOST}(?::#{PORT})?"
406
+ ret[:HOSTPORT] = hostport = "#{host}(?::#{port})?"
94
407
 
95
408
  # userinfo = *( unreserved | escaped |
96
409
  # ";" | ":" | "&" | "=" | "+" | "$" | "," )
97
- USERINFO = "(?:[#{UNRESERVED};:&=+$,]|#{ESCAPED})*"
410
+ ret[:USERINFO] = userinfo = "(?:[#{unreserved};:&=+$,]|#{escaped})*"
98
411
 
99
412
  # pchar = unreserved | escaped |
100
413
  # ":" | "@" | "&" | "=" | "+" | "$" | ","
101
- PCHAR = "(?:[#{UNRESERVED}:@&=+$,]|#{ESCAPED})"
414
+ pchar = "(?:[#{unreserved}:@&=+$,]|#{escaped})"
102
415
  # param = *pchar
103
- PARAM = "#{PCHAR}*"
416
+ param = "#{pchar}*"
104
417
  # segment = *pchar *( ";" param )
105
- SEGMENT = "#{PCHAR}*(?:;#{PARAM})*"
418
+ segment = "#{pchar}*(?:;#{param})*"
106
419
  # path_segments = segment *( "/" segment )
107
- PATH_SEGMENTS = "#{SEGMENT}(?:/#{SEGMENT})*"
420
+ ret[:PATH_SEGMENTS] = path_segments = "#{segment}(?:/#{segment})*"
108
421
 
109
422
  # server = [ [ userinfo "@" ] hostport ]
110
- SERVER = "(?:#{USERINFO}@)?#{HOSTPORT}"
423
+ server = "(?:#{userinfo}@)?#{hostport}"
111
424
  # reg_name = 1*( unreserved | escaped | "$" | "," |
112
425
  # ";" | ":" | "@" | "&" | "=" | "+" )
113
- REG_NAME = "(?:[#{UNRESERVED}$,;:@&=+]|#{ESCAPED})+"
426
+ ret[:REG_NAME] = reg_name = "(?:[#{unreserved}$,;:@&=+]|#{escaped})+"
114
427
  # authority = server | reg_name
115
- AUTHORITY = "(?:#{SERVER}|#{REG_NAME})"
428
+ authority = "(?:#{server}|#{reg_name})"
116
429
 
117
430
  # rel_segment = 1*( unreserved | escaped |
118
431
  # ";" | "@" | "&" | "=" | "+" | "$" | "," )
119
- REL_SEGMENT = "(?:[#{UNRESERVED};@&=+$,]|#{ESCAPED})+"
432
+ ret[:REL_SEGMENT] = rel_segment = "(?:[#{unreserved};@&=+$,]|#{escaped})+"
120
433
 
121
434
  # scheme = alpha *( alpha | digit | "+" | "-" | "." )
122
- SCHEME = "[#{ALPHA}][-+.#{ALPHA}\\d]*"
435
+ ret[:SCHEME] = scheme = "[#{PATTERN::ALPHA}][\\-+.#{PATTERN::ALPHA}\\d]*"
123
436
 
124
437
  # abs_path = "/" path_segments
125
- ABS_PATH = "/#{PATH_SEGMENTS}"
438
+ ret[:ABS_PATH] = abs_path = "/#{path_segments}"
126
439
  # rel_path = rel_segment [ abs_path ]
127
- REL_PATH = "#{REL_SEGMENT}(?:#{ABS_PATH})?"
440
+ ret[:REL_PATH] = rel_path = "#{rel_segment}(?:#{abs_path})?"
128
441
  # net_path = "//" authority [ abs_path ]
129
- NET_PATH = "//#{AUTHORITY}(?:#{ABS_PATH})?"
442
+ ret[:NET_PATH] = net_path = "//#{authority}(?:#{abs_path})?"
130
443
 
131
444
  # hier_part = ( net_path | abs_path ) [ "?" query ]
132
- HIER_PART = "(?:#{NET_PATH}|#{ABS_PATH})(?:\\?(?:#{QUERY}))?"
445
+ ret[:HIER_PART] = hier_part = "(?:#{net_path}|#{abs_path})(?:\\?(?:#{query}))?"
133
446
  # opaque_part = uric_no_slash *uric
134
- OPAQUE_PART = "#{URIC_NO_SLASH}#{URIC}*"
447
+ ret[:OPAQUE_PART] = opaque_part = "#{uric_no_slash}#{uric}*"
135
448
 
136
449
  # absoluteURI = scheme ":" ( hier_part | opaque_part )
137
- ABS_URI = "#{SCHEME}:(?:#{HIER_PART}|#{OPAQUE_PART})"
450
+ ret[:ABS_URI] = abs_uri = "#{scheme}:(?:#{hier_part}|#{opaque_part})"
138
451
  # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
139
- REL_URI = "(?:#{NET_PATH}|#{ABS_PATH}|#{REL_PATH})(?:\\?#{QUERY})?"
452
+ ret[:REL_URI] = rel_uri = "(?:#{net_path}|#{abs_path}|#{rel_path})(?:\\?#{query})?"
140
453
 
141
454
  # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
142
- URI_REF = "(?:#{ABS_URI}|#{REL_URI})?(?:##{FRAGMENT})?"
455
+ ret[:URI_REF] = "(?:#{abs_uri}|#{rel_uri})?(?:##{fragment})?"
143
456
 
144
- # XXX:
145
- X_ABS_URI = "
146
- (#{PATTERN::SCHEME}): (?# 1: scheme)
457
+ ret[:X_ABS_URI] = "
458
+ (#{scheme}): (?# 1: scheme)
147
459
  (?:
148
- (#{PATTERN::OPAQUE_PART}) (?# 2: opaque)
460
+ (#{opaque_part}) (?# 2: opaque)
149
461
  |
150
462
  (?:(?:
151
463
  //(?:
152
- (?:(?:(#{PATTERN::USERINFO})@)? (?# 3: userinfo)
153
- (?:(#{PATTERN::HOST})(?::(\\d*))?))?(?# 4: host, 5: port)
464
+ (?:(?:(#{userinfo})@)? (?# 3: userinfo)
465
+ (?:(#{host})(?::(\\d*))?))? (?# 4: host, 5: port)
154
466
  |
155
- (#{PATTERN::REG_NAME}) (?# 6: registry)
467
+ (#{reg_name}) (?# 6: registry)
156
468
  )
157
469
  |
158
- (?!//)) (?# XXX: '//' is the mark for hostport)
159
- (#{PATTERN::ABS_PATH})? (?# 7: path)
160
- )(?:\\?(#{PATTERN::QUERY}))? (?# 8: query)
470
+ (?!//)) (?# XXX: '//' is the mark for hostport)
471
+ (#{abs_path})? (?# 7: path)
472
+ )(?:\\?(#{query}))? (?# 8: query)
161
473
  )
162
- (?:\\#(#{PATTERN::FRAGMENT}))? (?# 9: fragment)
474
+ (?:\\#(#{fragment}))? (?# 9: fragment)
163
475
  "
164
- X_REL_URI = "
476
+
477
+ ret[:X_REL_URI] = "
165
478
  (?:
166
479
  (?:
167
480
  //
168
481
  (?:
169
- (?:(#{PATTERN::USERINFO})@)? (?# 1: userinfo)
170
- (#{PATTERN::HOST})?(?::(\\d*))? (?# 2: host, 3: port)
482
+ (?:(#{userinfo})@)? (?# 1: userinfo)
483
+ (#{host})?(?::(\\d*))? (?# 2: host, 3: port)
171
484
  |
172
- (#{PATTERN::REG_NAME}) (?# 4: registry)
485
+ (#{reg_name}) (?# 4: registry)
173
486
  )
174
487
  )
175
488
  |
176
- (#{PATTERN::REL_SEGMENT}) (?# 5: rel_segment)
489
+ (#{rel_segment}) (?# 5: rel_segment)
177
490
  )?
178
- (#{PATTERN::ABS_PATH})? (?# 6: abs_path)
179
- (?:\\?(#{PATTERN::QUERY}))? (?# 7: query)
180
- (?:\\#(#{PATTERN::FRAGMENT}))? (?# 8: fragment)
491
+ (#{abs_path})? (?# 6: abs_path)
492
+ (?:\\?(#{query}))? (?# 7: query)
493
+ (?:\\#(#{fragment}))? (?# 8: fragment)
181
494
  "
182
- # :startdoc:
183
- end # PATTERN
184
495
 
185
- # :stopdoc:
186
-
187
- # for URI::split
188
- ABS_URI = Regexp.new('^' + PATTERN::X_ABS_URI + '$', #'
189
- Regexp::EXTENDED, 'N').freeze
190
- REL_URI = Regexp.new('^' + PATTERN::X_REL_URI + '$', #'
191
- Regexp::EXTENDED, 'N').freeze
192
-
193
- # for URI::extract
194
- URI_REF = Regexp.new(PATTERN::URI_REF, false, 'N').freeze
195
- ABS_URI_REF = Regexp.new(PATTERN::X_ABS_URI, Regexp::EXTENDED, 'N').freeze
196
- REL_URI_REF = Regexp.new(PATTERN::X_REL_URI, Regexp::EXTENDED, 'N').freeze
197
-
198
- # for URI::escape/unescape
199
- ESCAPED = Regexp.new(PATTERN::ESCAPED, false, 'N').freeze
200
- UNSAFE = Regexp.new("[^#{PATTERN::UNRESERVED}#{PATTERN::RESERVED}]",
201
- false, 'N').freeze
202
-
203
- # for Generic#initialize
204
- SCHEME = Regexp.new("^#{PATTERN::SCHEME}$", false, 'N').freeze #"
205
- USERINFO = Regexp.new("^#{PATTERN::USERINFO}$", false, 'N').freeze #"
206
- HOST = Regexp.new("^#{PATTERN::HOST}$", false, 'N').freeze #"
207
- PORT = Regexp.new("^#{PATTERN::PORT}$", false, 'N').freeze #"
208
- OPAQUE = Regexp.new("^#{PATTERN::OPAQUE_PART}$", false, 'N').freeze #"
209
- REGISTRY = Regexp.new("^#{PATTERN::REG_NAME}$", false, 'N').freeze #"
210
- ABS_PATH = Regexp.new("^#{PATTERN::ABS_PATH}$", false, 'N').freeze #"
211
- REL_PATH = Regexp.new("^#{PATTERN::REL_PATH}$", false, 'N').freeze #"
212
- QUERY = Regexp.new("^#{PATTERN::QUERY}$", false, 'N').freeze #"
213
- FRAGMENT = Regexp.new("^#{PATTERN::FRAGMENT}$", false, 'N').freeze #"
214
- # :startdoc:
215
- end # REGEXP
496
+ ret
497
+ end
498
+
499
+ # Constructs the default Hash of Regexp's
500
+ def initialize_regexp(pattern)
501
+ ret = {}
502
+
503
+ # for URI::split
504
+ ret[:ABS_URI] = Regexp.new('\A\s*' + pattern[:X_ABS_URI] + '\s*\z', Regexp::EXTENDED)
505
+ ret[:REL_URI] = Regexp.new('\A\s*' + pattern[:X_REL_URI] + '\s*\z', Regexp::EXTENDED)
506
+
507
+ # for URI::extract
508
+ ret[:URI_REF] = Regexp.new(pattern[:URI_REF])
509
+ ret[:ABS_URI_REF] = Regexp.new(pattern[:X_ABS_URI], Regexp::EXTENDED)
510
+ ret[:REL_URI_REF] = Regexp.new(pattern[:X_REL_URI], Regexp::EXTENDED)
511
+
512
+ # for URI::escape/unescape
513
+ ret[:ESCAPED] = Regexp.new(pattern[:ESCAPED])
514
+ ret[:UNSAFE] = Regexp.new("[^#{pattern[:UNRESERVED]}#{pattern[:RESERVED]}]")
515
+
516
+ # for Generic#initialize
517
+ ret[:SCHEME] = Regexp.new("\\A#{pattern[:SCHEME]}\\z")
518
+ ret[:USERINFO] = Regexp.new("\\A#{pattern[:USERINFO]}\\z")
519
+ ret[:HOST] = Regexp.new("\\A#{pattern[:HOST]}\\z")
520
+ ret[:PORT] = Regexp.new("\\A#{pattern[:PORT]}\\z")
521
+ ret[:OPAQUE] = Regexp.new("\\A#{pattern[:OPAQUE_PART]}\\z")
522
+ ret[:REGISTRY] = Regexp.new("\\A#{pattern[:REG_NAME]}\\z")
523
+ ret[:ABS_PATH] = Regexp.new("\\A#{pattern[:ABS_PATH]}\\z")
524
+ ret[:REL_PATH] = Regexp.new("\\A#{pattern[:REL_PATH]}\\z")
525
+ ret[:QUERY] = Regexp.new("\\A#{pattern[:QUERY]}\\z")
526
+ ret[:FRAGMENT] = Regexp.new("\\A#{pattern[:FRAGMENT]}\\z")
527
+
528
+ ret
529
+ end
530
+
531
+ def convert_to_uri(uri)
532
+ if uri.is_a?(URI::Generic)
533
+ uri
534
+ elsif uri = String.try_convert(uri)
535
+ parse(uri)
536
+ else
537
+ raise ArgumentError,
538
+ "bad argument (expected URI object or URI string)"
539
+ end
540
+ end
541
+
542
+ end # class Parser
543
+
544
+ # URI::Parser.new
545
+ DEFAULT_PARSER = Parser.new
546
+ DEFAULT_PARSER.pattern.each_pair do |sym, str|
547
+ unless REGEXP::PATTERN.const_defined?(sym)
548
+ REGEXP::PATTERN.const_set(sym, str)
549
+ end
550
+ end
551
+ DEFAULT_PARSER.regexp.each_pair do |sym, str|
552
+ const_set(sym, str)
553
+ end
216
554
 
217
555
  module Util # :nodoc:
218
556
  def make_components_hash(klass, array_hash)
@@ -236,7 +574,7 @@ module URI
236
574
  end
237
575
  end
238
576
  else
239
- raise ArgumentError,
577
+ raise ArgumentError,
240
578
  "expected Array of or Hash of components of #{klass.to_s} (#{klass.component[1..-1].join(', ')})"
241
579
  end
242
580
  tmp[:scheme] = klass.to_s.sub(/\A.*::/, '').downcase
@@ -246,9 +584,8 @@ module URI
246
584
  module_function :make_components_hash
247
585
  end
248
586
 
587
+ # module for escaping unsafe characters with codes.
249
588
  module Escape
250
- include REGEXP
251
-
252
589
  #
253
590
  # == Synopsis
254
591
  #
@@ -281,18 +618,9 @@ module URI
281
618
  # p URI.escape("@?@!", "!?")
282
619
  # # => "@%3F@%21"
283
620
  #
284
- def escape(str, unsafe = UNSAFE)
285
- unless unsafe.kind_of?(Regexp)
286
- # perhaps unsafe is String object
287
- unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false, 'N')
288
- end
289
- str.gsub(unsafe) do |us|
290
- tmp = ''
291
- us.each_byte do |uc|
292
- tmp << sprintf('%%%02X', uc)
293
- end
294
- tmp
295
- end
621
+ def escape(*arg)
622
+ warn "#{caller(1)[0]}: warning: URI.escape is obsolete" if $VERBOSE
623
+ DEFAULT_PARSER.escape(*arg)
296
624
  end
297
625
  alias encode escape
298
626
  #
@@ -316,19 +644,22 @@ module URI
316
644
  # p URI.unescape(enc_uri)
317
645
  # # => "http://example.com/?a=\t\r"
318
646
  #
319
- def unescape(str)
320
- str.gsub(ESCAPED) do
321
- $&[1,2].hex.chr
322
- end
647
+ def unescape(*arg)
648
+ warn "#{caller(1)[0]}: warning: URI.unescape is obsolete" if $VERBOSE
649
+ DEFAULT_PARSER.unescape(*arg)
323
650
  end
324
651
  alias decode unescape
325
- end
652
+ end # module Escape
326
653
 
327
- include REGEXP
328
654
  extend Escape
655
+ include REGEXP
329
656
 
330
657
  @@schemes = {}
331
-
658
+ # Returns a Hash of the defined schemes
659
+ def self.scheme_list
660
+ @@schemes
661
+ end
662
+
332
663
  #
333
664
  # Base class for all URI exceptions.
334
665
  #
@@ -369,7 +700,7 @@ module URI
369
700
  # * Opaque
370
701
  # * Query
371
702
  # * Fragment
372
- #
703
+ #
373
704
  # == Usage
374
705
  #
375
706
  # require 'uri'
@@ -378,75 +709,7 @@ module URI
378
709
  # # => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil]
379
710
  #
380
711
  def self.split(uri)
381
- case uri
382
- when ''
383
- # null uri
384
-
385
- when ABS_URI
386
- scheme, opaque, userinfo, host, port,
387
- registry, path, query, fragment = $~[1..-1]
388
-
389
- # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
390
-
391
- # absoluteURI = scheme ":" ( hier_part | opaque_part )
392
- # hier_part = ( net_path | abs_path ) [ "?" query ]
393
- # opaque_part = uric_no_slash *uric
394
-
395
- # abs_path = "/" path_segments
396
- # net_path = "//" authority [ abs_path ]
397
-
398
- # authority = server | reg_name
399
- # server = [ [ userinfo "@" ] hostport ]
400
-
401
- if !scheme
402
- raise InvalidURIError,
403
- "bad URI(absolute but no scheme): #{uri}"
404
- end
405
- if !opaque && (!path && (!host && !registry))
406
- raise InvalidURIError,
407
- "bad URI(absolute but no path): #{uri}"
408
- end
409
-
410
- when REL_URI
411
- scheme = nil
412
- opaque = nil
413
-
414
- userinfo, host, port, registry,
415
- rel_segment, abs_path, query, fragment = $~[1..-1]
416
- if rel_segment && abs_path
417
- path = rel_segment + abs_path
418
- elsif rel_segment
419
- path = rel_segment
420
- elsif abs_path
421
- path = abs_path
422
- end
423
-
424
- # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
425
-
426
- # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
427
-
428
- # net_path = "//" authority [ abs_path ]
429
- # abs_path = "/" path_segments
430
- # rel_path = rel_segment [ abs_path ]
431
-
432
- # authority = server | reg_name
433
- # server = [ [ userinfo "@" ] hostport ]
434
-
435
- else
436
- raise InvalidURIError, "bad URI(is not URI?): #{uri}"
437
- end
438
-
439
- path = '' if !path && !opaque # (see RFC2396 Section 5.2)
440
- ret = [
441
- scheme,
442
- userinfo, host, port, # X
443
- registry, # X
444
- path, # Y
445
- opaque, # Y
446
- query,
447
- fragment
448
- ]
449
- return ret
712
+ DEFAULT_PARSER.split(uri)
450
713
  end
451
714
 
452
715
  #
@@ -462,7 +725,7 @@ module URI
462
725
  # == Description
463
726
  #
464
727
  # Creates one of the URI's subclasses instance from the string.
465
- #
728
+ #
466
729
  # == Raises
467
730
  #
468
731
  # URI::InvalidURIError
@@ -475,24 +738,13 @@ module URI
475
738
  # uri = URI.parse("http://www.ruby-lang.org/")
476
739
  # p uri
477
740
  # # => #<URI::HTTP:0x202281be URL:http://www.ruby-lang.org/>
478
- # p uri.scheme
479
- # # => "http"
480
- # p uri.host
481
- # # => "www.ruby-lang.org"
482
- #
741
+ # p uri.scheme
742
+ # # => "http"
743
+ # p uri.host
744
+ # # => "www.ruby-lang.org"
745
+ #
483
746
  def self.parse(uri)
484
- scheme, userinfo, host, port,
485
- registry, path, opaque, query, fragment = self.split(uri)
486
-
487
- if scheme && @@schemes.include?(scheme.upcase)
488
- @@schemes[scheme.upcase].new(scheme, userinfo, host, port,
489
- registry, path, opaque, query,
490
- fragment)
491
- else
492
- Generic.new(scheme, userinfo, host, port,
493
- registry, path, opaque, query,
494
- fragment)
495
- end
747
+ DEFAULT_PARSER.parse(uri)
496
748
  end
497
749
 
498
750
  #
@@ -513,15 +765,24 @@ module URI
513
765
  #
514
766
  # require 'uri'
515
767
  #
516
- # p URI.join("http://localhost/","main.rbx")
768
+ # p URI.join("http://example.com/","main.rbx")
517
769
  # # => #<URI::HTTP:0x2022ac02 URL:http://localhost/main.rbx>
518
770
  #
771
+ # p URI.join('http://example.com', 'foo')
772
+ # # => #<URI::HTTP:0x01ab80a0 URL:http://example.com/foo>
773
+ #
774
+ # p URI.join('http://example.com', '/foo', '/bar')
775
+ # # => #<URI::HTTP:0x01aaf0b0 URL:http://example.com/bar>
776
+ #
777
+ # p URI.join('http://example.com', '/foo', 'bar')
778
+ # # => #<URI::HTTP:0x801a92af0 URL:http://example.com/bar>
779
+ #
780
+ # p URI.join('http://example.com', '/foo/', 'bar')
781
+ # # => #<URI::HTTP:0x80135a3a0 URL:http://example.com/foo/bar>
782
+ #
783
+ #
519
784
  def self.join(*str)
520
- u = self.parse(str[0])
521
- str[1 .. -1].each do |x|
522
- u = u.merge(x)
523
- end
524
- u
785
+ DEFAULT_PARSER.join(*str)
525
786
  end
526
787
 
527
788
  #
@@ -531,7 +792,7 @@ module URI
531
792
  #
532
793
  # == Args
533
794
  #
534
- # +str+::
795
+ # +str+::
535
796
  # String to extract URIs from.
536
797
  # +schemes+::
537
798
  # Limit URI matching to a specific schemes.
@@ -549,14 +810,7 @@ module URI
549
810
  # # => ["http://foo.example.com/bla", "mailto:test@example.com"]
550
811
  #
551
812
  def self.extract(str, schemes = nil, &block)
552
- if block_given?
553
- str.scan(regexp(schemes)) { yield $& }
554
- nil
555
- else
556
- result = []
557
- str.scan(regexp(schemes)) { result.push $& }
558
- result
559
- end
813
+ DEFAULT_PARSER.extract(str, schemes, &block)
560
814
  end
561
815
 
562
816
  #
@@ -566,48 +820,182 @@ module URI
566
820
  #
567
821
  # == Args
568
822
  #
569
- # +match_schemes+::
823
+ # +match_schemes+::
570
824
  # Array of schemes. If given, resulting regexp matches to URIs
571
825
  # whose scheme is one of the match_schemes.
572
- #
826
+ #
573
827
  # == Description
574
828
  # Returns a Regexp object which matches to URI-like strings.
575
829
  # The Regexp object returned by this method includes arbitrary
576
830
  # number of capture group (parentheses). Never rely on it's number.
577
- #
831
+ #
578
832
  # == Usage
579
833
  #
580
834
  # require 'uri'
581
835
  #
582
836
  # # extract first URI from html_string
583
837
  # html_string.slice(URI.regexp)
584
- #
838
+ #
585
839
  # # remove ftp URIs
586
840
  # html_string.sub(URI.regexp(['ftp'])
587
- #
841
+ #
588
842
  # # You should not rely on the number of parentheses
589
843
  # html_string.scan(URI.regexp) do |*matches|
590
844
  # p $&
591
845
  # end
592
846
  #
593
847
  def self.regexp(schemes = nil)
594
- unless schemes
595
- ABS_URI_REF
848
+ DEFAULT_PARSER.make_regexp(schemes)
849
+ end
850
+
851
+ TBLENCWWWCOMP_ = {} # :nodoc:
852
+ 256.times do |i|
853
+ TBLENCWWWCOMP_[i.chr] = '%%%02X' % i
854
+ end
855
+ TBLENCWWWCOMP_[' '] = '+'
856
+ TBLENCWWWCOMP_.freeze
857
+ TBLDECWWWCOMP_ = {} # :nodoc:
858
+ 256.times do |i|
859
+ h, l = i>>4, i&15
860
+ TBLDECWWWCOMP_['%%%X%X' % [h, l]] = i.chr
861
+ TBLDECWWWCOMP_['%%%x%X' % [h, l]] = i.chr
862
+ TBLDECWWWCOMP_['%%%X%x' % [h, l]] = i.chr
863
+ TBLDECWWWCOMP_['%%%x%x' % [h, l]] = i.chr
864
+ end
865
+ TBLDECWWWCOMP_['+'] = ' '
866
+ TBLDECWWWCOMP_.freeze
867
+
868
+ HTML5ASCIIINCOMPAT = [Encoding::UTF_7, Encoding::UTF_16BE, Encoding::UTF_16LE,
869
+ Encoding::UTF_32BE, Encoding::UTF_32LE] # :nodoc:
870
+
871
+ # Encode given +str+ to URL-encoded form data.
872
+ #
873
+ # This method doesn't convert *, -, ., 0-9, A-Z, _, a-z, but does convert SP
874
+ # (ASCII space) to + and converts others to %XX.
875
+ #
876
+ # This is an implementation of
877
+ # http://www.w3.org/TR/html5/association-of-controls-and-forms.html#url-encoded-form-data
878
+ #
879
+ # See URI.decode_www_form_component, URI.encode_www_form
880
+ def self.encode_www_form_component(str)
881
+ str = str.to_s
882
+ if HTML5ASCIIINCOMPAT.include?(str.encoding)
883
+ str = str.encode(Encoding::UTF_8)
596
884
  else
597
- /(?=#{Regexp.union(*schemes)}:)#{PATTERN::X_ABS_URI}/xn
885
+ str = str.dup
598
886
  end
887
+ str.force_encoding(Encoding::ASCII_8BIT)
888
+ str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
889
+ str.force_encoding(Encoding::US_ASCII)
599
890
  end
600
891
 
601
- end
892
+ # Decode given +str+ of URL-encoded form data.
893
+ #
894
+ # This decodes + to SP.
895
+ #
896
+ # See URI.encode_www_form_component, URI.decode_www_form
897
+ def self.decode_www_form_component(str, enc=Encoding::UTF_8)
898
+ raise ArgumentError, "invalid %-encoding (#{str})" unless /\A[^%]*(?:%\h\h[^%]*)*\z/ =~ str
899
+ str.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
900
+ end
602
901
 
603
- module Kernel
604
- # alias for URI.parse.
902
+ # Generate URL-encoded form data from given +enum+.
903
+ #
904
+ # This generates application/x-www-form-urlencoded data defined in HTML5
905
+ # from given an Enumerable object.
906
+ #
907
+ # This internally uses URI.encode_www_form_component(str).
908
+ #
909
+ # This method doesn't convert the encoding of given items, so convert them
910
+ # before call this method if you want to send data as other than original
911
+ # encoding or mixed encoding data. (Strings which are encoded in an HTML5
912
+ # ASCII incompatible encoding are converted to UTF-8.)
913
+ #
914
+ # This method doesn't handle files. When you send a file, use
915
+ # multipart/form-data.
916
+ #
917
+ # This is an implementation of
918
+ # http://www.w3.org/TR/html5/forms.html#url-encoded-form-data
919
+ #
920
+ # URI.encode_www_form([["q", "ruby"], ["lang", "en"]])
921
+ # #=> "q=ruby&lang=en"
922
+ # URI.encode_www_form("q" => "ruby", "lang" => "en")
923
+ # #=> "q=ruby&lang=en"
924
+ # URI.encode_www_form("q" => ["ruby", "perl"], "lang" => "en")
925
+ # #=> "q=ruby&q=perl&lang=en"
926
+ # URI.encode_www_form([["q", "ruby"], ["q", "perl"], ["lang", "en"]])
927
+ # #=> "q=ruby&q=perl&lang=en"
928
+ #
929
+ # See URI.encode_www_form_component, URI.decode_www_form
930
+ def self.encode_www_form(enum)
931
+ enum.map do |k,v|
932
+ if v.nil?
933
+ encode_www_form_component(k)
934
+ elsif v.respond_to?(:to_ary)
935
+ v.to_ary.map do |w|
936
+ str = encode_www_form_component(k)
937
+ unless w.nil?
938
+ str << '='
939
+ str << encode_www_form_component(w)
940
+ end
941
+ end.join('&')
942
+ else
943
+ str = encode_www_form_component(k)
944
+ str << '='
945
+ str << encode_www_form_component(v)
946
+ end
947
+ end.join('&')
948
+ end
949
+
950
+ WFKV_ = '(?:[^%#=;&]*(?:%\h\h[^%#=;&]*)*)' # :nodoc:
951
+
952
+ # Decode URL-encoded form data from given +str+.
953
+ #
954
+ # This decodes application/x-www-form-urlencoded data
955
+ # and returns array of key-value array.
956
+ # This internally uses URI.decode_www_form_component.
605
957
  #
606
- # This method is introduced at 1.8.2.
607
- def URI(uri_str) # :doc:
608
- return uri_str if uri_str.is_a? URI
958
+ # _charset_ hack is not supported now because the mapping from given charset
959
+ # to Ruby's encoding is not clear yet.
960
+ # see also http://www.w3.org/TR/html5/syntax.html#character-encodings-0
961
+ #
962
+ # This refers http://www.w3.org/TR/html5/forms.html#url-encoded-form-data
963
+ #
964
+ # ary = URI.decode_www_form("a=1&a=2&b=3")
965
+ # p ary #=> [['a', '1'], ['a', '2'], ['b', '3']]
966
+ # p ary.assoc('a').last #=> '1'
967
+ # p ary.assoc('b').last #=> '3'
968
+ # p ary.rassoc('a').last #=> '2'
969
+ # p Hash[ary] # => {"a"=>"2", "b"=>"3"}
970
+ #
971
+ # See URI.decode_www_form_component, URI.encode_www_form
972
+ def self.decode_www_form(str, enc=Encoding::UTF_8)
973
+ return [] if str.empty?
974
+ unless /\A#{WFKV_}=#{WFKV_}(?:[;&]#{WFKV_}=#{WFKV_})*\z/o =~ str
975
+ raise ArgumentError, "invalid data of application/x-www-form-urlencoded (#{str})"
976
+ end
977
+ ary = []
978
+ $&.scan(/([^=;&]+)=([^;&]*)/) do
979
+ ary << [decode_www_form_component($1, enc), decode_www_form_component($2, enc)]
980
+ end
981
+ ary
982
+ end
983
+ end # module URI
984
+
985
+ module Kernel
609
986
 
610
- URI.parse(uri_str)
987
+ #
988
+ # Returns +uri+ converted to a URI object.
989
+ #
990
+ def URI(uri)
991
+ if uri.is_a?(URI::Generic)
992
+ uri
993
+ elsif uri = String.try_convert(uri)
994
+ URI.parse(uri)
995
+ else
996
+ raise ArgumentError,
997
+ "bad argument (expected URI object or URI string)"
998
+ end
611
999
  end
612
1000
  module_function :URI
613
1001
  end