rubysl-uri 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -2
- data/lib/rubysl/uri.rb +1 -1
- data/lib/rubysl/uri/uri.rb +88 -6
- data/lib/rubysl/uri/version.rb +1 -1
- data/lib/uri/common.rb +621 -233
- data/lib/uri/ftp.rb +81 -22
- data/lib/uri/generic.rb +665 -115
- data/lib/uri/http.rb +25 -19
- data/lib/uri/https.rb +4 -2
- data/lib/uri/ldap.rb +75 -5
- data/lib/uri/ldaps.rb +8 -0
- data/lib/uri/mailto.rb +36 -22
- data/rubysl-uri.gemspec +2 -0
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1cdbb4d6b18672fb1ef0eea47a1b42be33ba153c
|
4
|
+
data.tar.gz: a83c4da2e152500850a8eef0b8a93c9e795bdb58
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff5396567881b630b8e5cf085c9d4f0ba53bd98023ac2c3bcdd5e722d7f438eb4cca350b298b1b390e8f2080d96bfbd630c5245273e4c0a8ab1bd412e2edbf3f
|
7
|
+
data.tar.gz: 0eae37ca085e4c68a2f54848311226c7064deafbd7bbe9f8aebb5a0bf2d42eef30f7f8cb6058eeef5d7163ef9c6b843f11d0476185107922bdc2971273a2aeae
|
data/.travis.yml
CHANGED
data/lib/rubysl/uri.rb
CHANGED
data/lib/rubysl/uri/uri.rb
CHANGED
@@ -1,14 +1,96 @@
|
|
1
|
+
# URI is a module providing classes to handle Uniform Resource Identifiers
|
2
|
+
# (RFC2396[http://tools.ietf.org/html/rfc2396])
|
1
3
|
#
|
2
|
-
#
|
4
|
+
# == Features
|
5
|
+
#
|
6
|
+
# * Uniform handling of handling URIs
|
7
|
+
# * Flexibility to introduce custom URI schemes
|
8
|
+
# * Flexibility to have an alternate URI::Parser (or just different patterns
|
9
|
+
# and regexp's)
|
10
|
+
#
|
11
|
+
# == Basic example
|
12
|
+
#
|
13
|
+
# require 'uri'
|
14
|
+
#
|
15
|
+
# uri = URI("http://foo.com/posts?id=30&limit=5#time=1305298413")
|
16
|
+
# #=> #<URI::HTTP:0x00000000b14880
|
17
|
+
# URL:http://foo.com/posts?id=30&limit=5#time=1305298413>
|
18
|
+
# uri.scheme
|
19
|
+
# #=> "http"
|
20
|
+
# uri.host
|
21
|
+
# #=> "foo.com"
|
22
|
+
# uri.path
|
23
|
+
# #=> "/posts"
|
24
|
+
# uri.query
|
25
|
+
# #=> "id=30&limit=5"
|
26
|
+
# uri.fragment
|
27
|
+
# #=> "time=1305298413"
|
28
|
+
#
|
29
|
+
# uri.to_s
|
30
|
+
# #=> "http://foo.com/posts?id=30&limit=5#time=1305298413"
|
31
|
+
#
|
32
|
+
# == Adding custom URIs
|
33
|
+
#
|
34
|
+
# module URI
|
35
|
+
# class RSYNC < Generic
|
36
|
+
# DEFAULT_PORT = 873
|
37
|
+
# end
|
38
|
+
# @@schemes['RSYNC'] = RSYNC
|
39
|
+
# end
|
40
|
+
# #=> URI::RSYNC
|
41
|
+
#
|
42
|
+
# URI.scheme_list
|
43
|
+
# #=> {"FTP"=>URI::FTP, "HTTP"=>URI::HTTP, "HTTPS"=>URI::HTTPS,
|
44
|
+
# "LDAP"=>URI::LDAP, "LDAPS"=>URI::LDAPS, "MAILTO"=>URI::MailTo,
|
45
|
+
# "RSYNC"=>URI::RSYNC}
|
46
|
+
#
|
47
|
+
# uri = URI("rsync://rsync.foo.com")
|
48
|
+
# #=> #<URI::RSYNC:0x00000000f648c8 URL:rsync://rsync.foo.com>
|
49
|
+
#
|
50
|
+
# == RFC References
|
51
|
+
#
|
52
|
+
# A good place to view an RFC spec is http://www.ietf.org/rfc.html
|
53
|
+
#
|
54
|
+
# Here is a list of all related RFC's.
|
55
|
+
# - RFC822[http://tools.ietf.org/html/rfc822]
|
56
|
+
# - RFC1738[http://tools.ietf.org/html/rfc1738]
|
57
|
+
# - RFC2255[http://tools.ietf.org/html/rfc2255]
|
58
|
+
# - RFC2368[http://tools.ietf.org/html/rfc2368]
|
59
|
+
# - RFC2373[http://tools.ietf.org/html/rfc2373]
|
60
|
+
# - RFC2396[http://tools.ietf.org/html/rfc2396]
|
61
|
+
# - RFC2732[http://tools.ietf.org/html/rfc2732]
|
62
|
+
# - RFC3986[http://tools.ietf.org/html/rfc3986]
|
63
|
+
#
|
64
|
+
# == Class tree
|
65
|
+
#
|
66
|
+
# - URI::Generic (in uri/generic.rb)
|
67
|
+
# - URI::FTP - (in uri/ftp.rb)
|
68
|
+
# - URI::HTTP - (in uri/http.rb)
|
69
|
+
# - URI::HTTPS - (in uri/https.rb)
|
70
|
+
# - URI::LDAP - (in uri/ldap.rb)
|
71
|
+
# - URI::LDAPS - (in uri/ldaps.rb)
|
72
|
+
# - URI::MailTo - (in uri/mailto.rb)
|
73
|
+
# - URI::Parser - (in uri/common.rb)
|
74
|
+
# - URI::REGEXP - (in uri/common.rb)
|
75
|
+
# - URI::REGEXP::PATTERN - (in uri/common.rb)
|
76
|
+
# - URI::Util - (in uri/common.rb)
|
77
|
+
# - URI::Escape - (in uri/common.rb)
|
78
|
+
# - URI::Error - (in uri/common.rb)
|
79
|
+
# - URI::InvalidURIError - (in uri/common.rb)
|
80
|
+
# - URI::InvalidComponentError - (in uri/common.rb)
|
81
|
+
# - URI::BadURIError - (in uri/common.rb)
|
82
|
+
#
|
83
|
+
# == Copyright Info
|
3
84
|
#
|
4
85
|
# Author:: Akira Yamada <akira@ruby-lang.org>
|
5
|
-
# Documentation::
|
6
|
-
#
|
86
|
+
# Documentation::
|
87
|
+
# Akira Yamada <akira@ruby-lang.org>
|
88
|
+
# Dmitry V. Sabanin <sdmitry@lrn.ru>
|
89
|
+
# Vincent Batts <vbatts@hashbangbash.com>
|
90
|
+
# License::
|
7
91
|
# Copyright (c) 2001 akira yamada <akira@ruby-lang.org>
|
8
92
|
# You can redistribute it and/or modify it under the same term as Ruby.
|
9
|
-
# Revision:: $Id
|
10
|
-
#
|
11
|
-
# See URI for documentation
|
93
|
+
# Revision:: $Id$
|
12
94
|
#
|
13
95
|
|
14
96
|
module URI
|
data/lib/rubysl/uri/version.rb
CHANGED
data/lib/uri/common.rb
CHANGED
@@ -1,12 +1,18 @@
|
|
1
|
+
#--
|
1
2
|
# = uri/common.rb
|
2
3
|
#
|
3
4
|
# Author:: Akira Yamada <akira@ruby-lang.org>
|
4
|
-
# Revision:: $Id
|
5
|
-
# License::
|
5
|
+
# Revision:: $Id$
|
6
|
+
# License::
|
6
7
|
# You can redistribute it and/or modify it under the same term as Ruby.
|
7
8
|
#
|
9
|
+
# See URI for general documentation
|
10
|
+
#
|
8
11
|
|
9
12
|
module URI
|
13
|
+
#
|
14
|
+
# Includes URI::REGEXP::PATTERN
|
15
|
+
#
|
10
16
|
module REGEXP
|
11
17
|
#
|
12
18
|
# Patterns used to parse URI's
|
@@ -31,29 +37,336 @@ module URI
|
|
31
37
|
# mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
|
32
38
|
# "(" | ")"
|
33
39
|
# unreserved = alphanum | mark
|
34
|
-
UNRESERVED = "
|
40
|
+
UNRESERVED = "\\-_.!~*'()#{ALNUM}"
|
35
41
|
# reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
|
36
42
|
# "$" | ","
|
37
|
-
# reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
|
43
|
+
# reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
|
38
44
|
# "$" | "," | "[" | "]" (RFC 2732)
|
39
45
|
RESERVED = ";/?:@&=+$,\\[\\]"
|
40
46
|
|
47
|
+
# domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
|
48
|
+
DOMLABEL = "(?:[#{ALNUM}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
|
49
|
+
# toplabel = alpha | alpha *( alphanum | "-" ) alphanum
|
50
|
+
TOPLABEL = "(?:[#{ALPHA}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
|
51
|
+
# hostname = *( domainlabel "." ) toplabel [ "." ]
|
52
|
+
HOSTNAME = "(?:#{DOMLABEL}\\.)*#{TOPLABEL}\\.?"
|
53
|
+
|
54
|
+
# :startdoc:
|
55
|
+
end # PATTERN
|
56
|
+
|
57
|
+
# :startdoc:
|
58
|
+
end # REGEXP
|
59
|
+
|
60
|
+
# class that Parses String's into URI's
|
61
|
+
#
|
62
|
+
# It contains a Hash set of patterns and Regexp's that match and validate.
|
63
|
+
#
|
64
|
+
class Parser
|
65
|
+
include REGEXP
|
66
|
+
|
67
|
+
#
|
68
|
+
# == Synopsis
|
69
|
+
#
|
70
|
+
# URI::Parser.new([opts])
|
71
|
+
#
|
72
|
+
# == Args
|
73
|
+
#
|
74
|
+
# The constructor accepts a hash as options for parser.
|
75
|
+
# Keys of options are pattern names of URI components
|
76
|
+
# and values of options are pattern strings.
|
77
|
+
# The constructor generetes set of regexps for parsing URIs.
|
78
|
+
#
|
79
|
+
# You can use the following keys:
|
80
|
+
#
|
81
|
+
# * :ESCAPED (URI::PATTERN::ESCAPED in default)
|
82
|
+
# * :UNRESERVED (URI::PATTERN::UNRESERVED in default)
|
83
|
+
# * :DOMLABEL (URI::PATTERN::DOMLABEL in default)
|
84
|
+
# * :TOPLABEL (URI::PATTERN::TOPLABEL in default)
|
85
|
+
# * :HOSTNAME (URI::PATTERN::HOSTNAME in default)
|
86
|
+
#
|
87
|
+
# == Examples
|
88
|
+
#
|
89
|
+
# p = URI::Parser.new(:ESCAPED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})")
|
90
|
+
# u = p.parse("http://example.jp/%uABCD") #=> #<URI::HTTP:0xb78cf4f8 URL:http://example.jp/%uABCD>
|
91
|
+
# URI.parse(u.to_s) #=> raises URI::InvalidURIError
|
92
|
+
#
|
93
|
+
# s = "http://examle.com/ABCD"
|
94
|
+
# u1 = p.parse(s) #=> #<URI::HTTP:0xb78c3220 URL:http://example.com/ABCD>
|
95
|
+
# u2 = URI.parse(s) #=> #<URI::HTTP:0xb78b6d54 URL:http://example.com/ABCD>
|
96
|
+
# u1 == u2 #=> true
|
97
|
+
# u1.eql?(u2) #=> false
|
98
|
+
#
|
99
|
+
def initialize(opts = {})
|
100
|
+
@pattern = initialize_pattern(opts)
|
101
|
+
@pattern.each_value {|v| v.freeze}
|
102
|
+
@pattern.freeze
|
103
|
+
|
104
|
+
@regexp = initialize_regexp(@pattern)
|
105
|
+
@regexp.each_value {|v| v.freeze}
|
106
|
+
@regexp.freeze
|
107
|
+
end
|
108
|
+
|
109
|
+
# The Hash of patterns.
|
110
|
+
#
|
111
|
+
# see also URI::Parser.initialize_pattern
|
112
|
+
attr_reader :pattern
|
113
|
+
|
114
|
+
# The Hash of Regexp
|
115
|
+
#
|
116
|
+
# see also URI::Parser.initialize_regexp
|
117
|
+
attr_reader :regexp
|
118
|
+
|
119
|
+
# Returns a split URI against regexp[:ABS_URI]
|
120
|
+
def split(uri)
|
121
|
+
case uri
|
122
|
+
when ''
|
123
|
+
# null uri
|
124
|
+
|
125
|
+
when @regexp[:ABS_URI]
|
126
|
+
scheme, opaque, userinfo, host, port,
|
127
|
+
registry, path, query, fragment = $~[1..-1]
|
128
|
+
|
129
|
+
# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
|
130
|
+
|
131
|
+
# absoluteURI = scheme ":" ( hier_part | opaque_part )
|
132
|
+
# hier_part = ( net_path | abs_path ) [ "?" query ]
|
133
|
+
# opaque_part = uric_no_slash *uric
|
134
|
+
|
135
|
+
# abs_path = "/" path_segments
|
136
|
+
# net_path = "//" authority [ abs_path ]
|
137
|
+
|
138
|
+
# authority = server | reg_name
|
139
|
+
# server = [ [ userinfo "@" ] hostport ]
|
140
|
+
|
141
|
+
if !scheme
|
142
|
+
raise InvalidURIError,
|
143
|
+
"bad URI(absolute but no scheme): #{uri}"
|
144
|
+
end
|
145
|
+
if !opaque && (!path && (!host && !registry))
|
146
|
+
raise InvalidURIError,
|
147
|
+
"bad URI(absolute but no path): #{uri}"
|
148
|
+
end
|
149
|
+
|
150
|
+
when @regexp[:REL_URI]
|
151
|
+
scheme = nil
|
152
|
+
opaque = nil
|
153
|
+
|
154
|
+
userinfo, host, port, registry,
|
155
|
+
rel_segment, abs_path, query, fragment = $~[1..-1]
|
156
|
+
if rel_segment && abs_path
|
157
|
+
path = rel_segment + abs_path
|
158
|
+
elsif rel_segment
|
159
|
+
path = rel_segment
|
160
|
+
elsif abs_path
|
161
|
+
path = abs_path
|
162
|
+
end
|
163
|
+
|
164
|
+
# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
|
165
|
+
|
166
|
+
# relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
|
167
|
+
|
168
|
+
# net_path = "//" authority [ abs_path ]
|
169
|
+
# abs_path = "/" path_segments
|
170
|
+
# rel_path = rel_segment [ abs_path ]
|
171
|
+
|
172
|
+
# authority = server | reg_name
|
173
|
+
# server = [ [ userinfo "@" ] hostport ]
|
174
|
+
|
175
|
+
else
|
176
|
+
raise InvalidURIError, "bad URI(is not URI?): #{uri}"
|
177
|
+
end
|
178
|
+
|
179
|
+
path = '' if !path && !opaque # (see RFC2396 Section 5.2)
|
180
|
+
ret = [
|
181
|
+
scheme,
|
182
|
+
userinfo, host, port, # X
|
183
|
+
registry, # X
|
184
|
+
path, # Y
|
185
|
+
opaque, # Y
|
186
|
+
query,
|
187
|
+
fragment
|
188
|
+
]
|
189
|
+
return ret
|
190
|
+
end
|
191
|
+
|
192
|
+
#
|
193
|
+
# == Args
|
194
|
+
#
|
195
|
+
# +uri+::
|
196
|
+
# String
|
197
|
+
#
|
198
|
+
# == Description
|
199
|
+
#
|
200
|
+
# parses +uri+ and constructs either matching URI scheme object
|
201
|
+
# (FTP, HTTP, HTTPS, LDAP, LDAPS, or MailTo) or URI::Generic
|
202
|
+
#
|
203
|
+
# == Usage
|
204
|
+
#
|
205
|
+
# p = URI::Parser.new
|
206
|
+
# p.parse("ldap://ldap.example.com/dc=example?user=john")
|
207
|
+
# #=> #<URI::LDAP:0x00000000b9e7e8 URL:ldap://ldap.example.com/dc=example?user=john>
|
208
|
+
#
|
209
|
+
def parse(uri)
|
210
|
+
scheme, userinfo, host, port,
|
211
|
+
registry, path, opaque, query, fragment = self.split(uri)
|
212
|
+
|
213
|
+
if scheme && URI.scheme_list.include?(scheme.upcase)
|
214
|
+
URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port,
|
215
|
+
registry, path, opaque, query,
|
216
|
+
fragment, self)
|
217
|
+
else
|
218
|
+
Generic.new(scheme, userinfo, host, port,
|
219
|
+
registry, path, opaque, query,
|
220
|
+
fragment, self)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
|
225
|
+
#
|
226
|
+
# == Args
|
227
|
+
#
|
228
|
+
# +uris+::
|
229
|
+
# an Array of Strings
|
230
|
+
#
|
231
|
+
# == Description
|
232
|
+
#
|
233
|
+
# Attempts to parse and merge a set of URIs
|
234
|
+
#
|
235
|
+
def join(*uris)
|
236
|
+
uris[0] = convert_to_uri(uris[0])
|
237
|
+
uris.inject :merge
|
238
|
+
end
|
239
|
+
|
240
|
+
#
|
241
|
+
# :call-seq:
|
242
|
+
# extract( str )
|
243
|
+
# extract( str, schemes )
|
244
|
+
# extract( str, schemes ) {|item| block }
|
245
|
+
#
|
246
|
+
# == Args
|
247
|
+
#
|
248
|
+
# +str+::
|
249
|
+
# String to search
|
250
|
+
# +schemes+::
|
251
|
+
# Patterns to apply to +str+
|
252
|
+
#
|
253
|
+
# == Description
|
254
|
+
#
|
255
|
+
# Attempts to parse and merge a set of URIs
|
256
|
+
# If no +block+ given , then returns the result,
|
257
|
+
# else it calls +block+ for each element in result.
|
258
|
+
#
|
259
|
+
# see also URI::Parser.make_regexp
|
260
|
+
#
|
261
|
+
def extract(str, schemes = nil)
|
262
|
+
if block_given?
|
263
|
+
str.scan(make_regexp(schemes)) { yield $& }
|
264
|
+
nil
|
265
|
+
else
|
266
|
+
result = []
|
267
|
+
str.scan(make_regexp(schemes)) { result.push $& }
|
268
|
+
result
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
# returns Regexp that is default self.regexp[:ABS_URI_REF],
|
273
|
+
# unless +schemes+ is provided. Then it is a Regexp.union with self.pattern[:X_ABS_URI]
|
274
|
+
def make_regexp(schemes = nil)
|
275
|
+
unless schemes
|
276
|
+
@regexp[:ABS_URI_REF]
|
277
|
+
else
|
278
|
+
/(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
#
|
283
|
+
# :call-seq:
|
284
|
+
# escape( str )
|
285
|
+
# escape( str, unsafe )
|
286
|
+
#
|
287
|
+
# == Args
|
288
|
+
#
|
289
|
+
# +str+::
|
290
|
+
# String to make safe
|
291
|
+
# +unsafe+::
|
292
|
+
# Regexp to apply. Defaults to self.regexp[:UNSAFE]
|
293
|
+
#
|
294
|
+
# == Description
|
295
|
+
#
|
296
|
+
# constructs a safe String from +str+, removing unsafe characters,
|
297
|
+
# replacing them with codes.
|
298
|
+
#
|
299
|
+
def escape(str, unsafe = @regexp[:UNSAFE])
|
300
|
+
unless unsafe.kind_of?(Regexp)
|
301
|
+
# perhaps unsafe is String object
|
302
|
+
unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false)
|
303
|
+
end
|
304
|
+
str.gsub(unsafe) do
|
305
|
+
us = $&
|
306
|
+
tmp = ''
|
307
|
+
us.each_byte do |uc|
|
308
|
+
tmp << sprintf('%%%02X', uc)
|
309
|
+
end
|
310
|
+
tmp
|
311
|
+
end.force_encoding(Encoding::US_ASCII)
|
312
|
+
end
|
313
|
+
|
314
|
+
#
|
315
|
+
# :call-seq:
|
316
|
+
# unescape( str )
|
317
|
+
# unescape( str, unsafe )
|
318
|
+
#
|
319
|
+
# == Args
|
320
|
+
#
|
321
|
+
# +str+::
|
322
|
+
# String to remove escapes from
|
323
|
+
# +unsafe+::
|
324
|
+
# Regexp to apply. Defaults to self.regexp[:ESCAPED]
|
325
|
+
#
|
326
|
+
# == Description
|
327
|
+
#
|
328
|
+
# Removes escapes from +str+
|
329
|
+
#
|
330
|
+
def unescape(str, escaped = @regexp[:ESCAPED])
|
331
|
+
str.gsub(escaped) { [$&[1, 2].hex].pack('C') }.force_encoding(str.encoding)
|
332
|
+
end
|
333
|
+
|
334
|
+
@@to_s = Kernel.instance_method(:to_s)
|
335
|
+
def inspect
|
336
|
+
@@to_s.bind(self).call
|
337
|
+
end
|
338
|
+
|
339
|
+
private
|
340
|
+
|
341
|
+
# Constructs the default Hash of patterns
|
342
|
+
def initialize_pattern(opts = {})
|
343
|
+
ret = {}
|
344
|
+
ret[:ESCAPED] = escaped = (opts.delete(:ESCAPED) || PATTERN::ESCAPED)
|
345
|
+
ret[:UNRESERVED] = unreserved = opts.delete(:UNRESERVED) || PATTERN::UNRESERVED
|
346
|
+
ret[:RESERVED] = reserved = opts.delete(:RESERVED) || PATTERN::RESERVED
|
347
|
+
ret[:DOMLABEL] = opts.delete(:DOMLABEL) || PATTERN::DOMLABEL
|
348
|
+
ret[:TOPLABEL] = opts.delete(:TOPLABEL) || PATTERN::TOPLABEL
|
349
|
+
ret[:HOSTNAME] = hostname = opts.delete(:HOSTNAME)
|
350
|
+
|
351
|
+
# RFC 2396 (URI Generic Syntax)
|
352
|
+
# RFC 2732 (IPv6 Literal Addresses in URL's)
|
353
|
+
# RFC 2373 (IPv6 Addressing Architecture)
|
354
|
+
|
41
355
|
# uric = reserved | unreserved | escaped
|
42
|
-
URIC = "(?:[#{
|
356
|
+
ret[:URIC] = uric = "(?:[#{unreserved}#{reserved}]|#{escaped})"
|
43
357
|
# uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
|
44
358
|
# "&" | "=" | "+" | "$" | ","
|
45
|
-
URIC_NO_SLASH = "(?:[#{
|
359
|
+
ret[:URIC_NO_SLASH] = uric_no_slash = "(?:[#{unreserved};?:@&=+$,]|#{escaped})"
|
46
360
|
# query = *uric
|
47
|
-
QUERY = "#{
|
361
|
+
ret[:QUERY] = query = "#{uric}*"
|
48
362
|
# fragment = *uric
|
49
|
-
FRAGMENT = "#{
|
363
|
+
ret[:FRAGMENT] = fragment = "#{uric}*"
|
50
364
|
|
51
|
-
# domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
|
52
|
-
DOMLABEL = "(?:[#{ALNUM}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
|
53
|
-
# toplabel = alpha | alpha *( alphanum | "-" ) alphanum
|
54
|
-
TOPLABEL = "(?:[#{ALPHA}](?:[-#{ALNUM}]*[#{ALNUM}])?)"
|
55
365
|
# hostname = *( domainlabel "." ) toplabel [ "." ]
|
56
|
-
|
366
|
+
# reg-name = *( unreserved / pct-encoded / sub-delims ) # RFC3986
|
367
|
+
unless hostname
|
368
|
+
ret[:HOSTNAME] = hostname = "(?:[a-zA-Z0-9\\-.]|%\\h\\h)+"
|
369
|
+
end
|
57
370
|
|
58
371
|
# RFC 2373, APPENDIX B:
|
59
372
|
# IPv6address = hexpart [ ":" IPv4address ]
|
@@ -66,153 +379,178 @@ module URI
|
|
66
379
|
# allowed too. Here is a replacement.
|
67
380
|
#
|
68
381
|
# IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
|
69
|
-
IPV4ADDR = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"
|
382
|
+
ret[:IPV4ADDR] = ipv4addr = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"
|
70
383
|
# hex4 = 1*4HEXDIG
|
71
|
-
|
384
|
+
hex4 = "[#{PATTERN::HEX}]{1,4}"
|
72
385
|
# lastpart = hex4 | IPv4address
|
73
|
-
|
386
|
+
lastpart = "(?:#{hex4}|#{ipv4addr})"
|
74
387
|
# hexseq1 = *( hex4 ":" ) hex4
|
75
|
-
|
388
|
+
hexseq1 = "(?:#{hex4}:)*#{hex4}"
|
76
389
|
# hexseq2 = *( hex4 ":" ) lastpart
|
77
|
-
|
390
|
+
hexseq2 = "(?:#{hex4}:)*#{lastpart}"
|
78
391
|
# IPv6address = hexseq2 | [ hexseq1 ] "::" [ hexseq2 ]
|
79
|
-
IPV6ADDR = "(?:#{
|
392
|
+
ret[:IPV6ADDR] = ipv6addr = "(?:#{hexseq2}|(?:#{hexseq1})?::(?:#{hexseq2})?)"
|
80
393
|
|
81
394
|
# IPv6prefix = ( hexseq1 | [ hexseq1 ] "::" [ hexseq1 ] ) "/" 1*2DIGIT
|
82
395
|
# unused
|
83
396
|
|
84
397
|
# ipv6reference = "[" IPv6address "]" (RFC 2732)
|
85
|
-
IPV6REF = "\\[#{
|
398
|
+
ret[:IPV6REF] = ipv6ref = "\\[#{ipv6addr}\\]"
|
86
399
|
|
87
400
|
# host = hostname | IPv4address
|
88
401
|
# host = hostname | IPv4address | IPv6reference (RFC 2732)
|
89
|
-
HOST = "(?:#{
|
402
|
+
ret[:HOST] = host = "(?:#{hostname}|#{ipv4addr}|#{ipv6ref})"
|
90
403
|
# port = *digit
|
91
|
-
|
404
|
+
port = '\d*'
|
92
405
|
# hostport = host [ ":" port ]
|
93
|
-
HOSTPORT = "#{
|
406
|
+
ret[:HOSTPORT] = hostport = "#{host}(?::#{port})?"
|
94
407
|
|
95
408
|
# userinfo = *( unreserved | escaped |
|
96
409
|
# ";" | ":" | "&" | "=" | "+" | "$" | "," )
|
97
|
-
USERINFO = "(?:[#{
|
410
|
+
ret[:USERINFO] = userinfo = "(?:[#{unreserved};:&=+$,]|#{escaped})*"
|
98
411
|
|
99
412
|
# pchar = unreserved | escaped |
|
100
413
|
# ":" | "@" | "&" | "=" | "+" | "$" | ","
|
101
|
-
|
414
|
+
pchar = "(?:[#{unreserved}:@&=+$,]|#{escaped})"
|
102
415
|
# param = *pchar
|
103
|
-
|
416
|
+
param = "#{pchar}*"
|
104
417
|
# segment = *pchar *( ";" param )
|
105
|
-
|
418
|
+
segment = "#{pchar}*(?:;#{param})*"
|
106
419
|
# path_segments = segment *( "/" segment )
|
107
|
-
PATH_SEGMENTS = "#{
|
420
|
+
ret[:PATH_SEGMENTS] = path_segments = "#{segment}(?:/#{segment})*"
|
108
421
|
|
109
422
|
# server = [ [ userinfo "@" ] hostport ]
|
110
|
-
|
423
|
+
server = "(?:#{userinfo}@)?#{hostport}"
|
111
424
|
# reg_name = 1*( unreserved | escaped | "$" | "," |
|
112
425
|
# ";" | ":" | "@" | "&" | "=" | "+" )
|
113
|
-
REG_NAME = "(?:[#{
|
426
|
+
ret[:REG_NAME] = reg_name = "(?:[#{unreserved}$,;:@&=+]|#{escaped})+"
|
114
427
|
# authority = server | reg_name
|
115
|
-
|
428
|
+
authority = "(?:#{server}|#{reg_name})"
|
116
429
|
|
117
430
|
# rel_segment = 1*( unreserved | escaped |
|
118
431
|
# ";" | "@" | "&" | "=" | "+" | "$" | "," )
|
119
|
-
REL_SEGMENT = "(?:[#{
|
432
|
+
ret[:REL_SEGMENT] = rel_segment = "(?:[#{unreserved};@&=+$,]|#{escaped})+"
|
120
433
|
|
121
434
|
# scheme = alpha *( alpha | digit | "+" | "-" | "." )
|
122
|
-
SCHEME = "[#{ALPHA}][
|
435
|
+
ret[:SCHEME] = scheme = "[#{PATTERN::ALPHA}][\\-+.#{PATTERN::ALPHA}\\d]*"
|
123
436
|
|
124
437
|
# abs_path = "/" path_segments
|
125
|
-
ABS_PATH = "/#{
|
438
|
+
ret[:ABS_PATH] = abs_path = "/#{path_segments}"
|
126
439
|
# rel_path = rel_segment [ abs_path ]
|
127
|
-
REL_PATH = "#{
|
440
|
+
ret[:REL_PATH] = rel_path = "#{rel_segment}(?:#{abs_path})?"
|
128
441
|
# net_path = "//" authority [ abs_path ]
|
129
|
-
NET_PATH
|
442
|
+
ret[:NET_PATH] = net_path = "//#{authority}(?:#{abs_path})?"
|
130
443
|
|
131
444
|
# hier_part = ( net_path | abs_path ) [ "?" query ]
|
132
|
-
HIER_PART
|
445
|
+
ret[:HIER_PART] = hier_part = "(?:#{net_path}|#{abs_path})(?:\\?(?:#{query}))?"
|
133
446
|
# opaque_part = uric_no_slash *uric
|
134
|
-
OPAQUE_PART = "#{
|
447
|
+
ret[:OPAQUE_PART] = opaque_part = "#{uric_no_slash}#{uric}*"
|
135
448
|
|
136
449
|
# absoluteURI = scheme ":" ( hier_part | opaque_part )
|
137
|
-
ABS_URI
|
450
|
+
ret[:ABS_URI] = abs_uri = "#{scheme}:(?:#{hier_part}|#{opaque_part})"
|
138
451
|
# relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
|
139
|
-
REL_URI = "(?:#{
|
452
|
+
ret[:REL_URI] = rel_uri = "(?:#{net_path}|#{abs_path}|#{rel_path})(?:\\?#{query})?"
|
140
453
|
|
141
454
|
# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
|
142
|
-
URI_REF = "(?:#{
|
455
|
+
ret[:URI_REF] = "(?:#{abs_uri}|#{rel_uri})?(?:##{fragment})?"
|
143
456
|
|
144
|
-
|
145
|
-
|
146
|
-
(#{PATTERN::SCHEME}): (?# 1: scheme)
|
457
|
+
ret[:X_ABS_URI] = "
|
458
|
+
(#{scheme}): (?# 1: scheme)
|
147
459
|
(?:
|
148
|
-
(#{
|
460
|
+
(#{opaque_part}) (?# 2: opaque)
|
149
461
|
|
|
150
462
|
(?:(?:
|
151
463
|
//(?:
|
152
|
-
(?:(?:(#{
|
153
|
-
(?:(#{
|
464
|
+
(?:(?:(#{userinfo})@)? (?# 3: userinfo)
|
465
|
+
(?:(#{host})(?::(\\d*))?))? (?# 4: host, 5: port)
|
154
466
|
|
|
155
|
-
(#{
|
467
|
+
(#{reg_name}) (?# 6: registry)
|
156
468
|
)
|
157
469
|
|
|
158
|
-
(?!//))
|
159
|
-
(#{
|
160
|
-
)(?:\\?(#{
|
470
|
+
(?!//)) (?# XXX: '//' is the mark for hostport)
|
471
|
+
(#{abs_path})? (?# 7: path)
|
472
|
+
)(?:\\?(#{query}))? (?# 8: query)
|
161
473
|
)
|
162
|
-
(?:\\#(#{
|
474
|
+
(?:\\#(#{fragment}))? (?# 9: fragment)
|
163
475
|
"
|
164
|
-
|
476
|
+
|
477
|
+
ret[:X_REL_URI] = "
|
165
478
|
(?:
|
166
479
|
(?:
|
167
480
|
//
|
168
481
|
(?:
|
169
|
-
(?:(#{
|
170
|
-
(#{
|
482
|
+
(?:(#{userinfo})@)? (?# 1: userinfo)
|
483
|
+
(#{host})?(?::(\\d*))? (?# 2: host, 3: port)
|
171
484
|
|
|
172
|
-
(#{
|
485
|
+
(#{reg_name}) (?# 4: registry)
|
173
486
|
)
|
174
487
|
)
|
175
488
|
|
|
176
|
-
(#{
|
489
|
+
(#{rel_segment}) (?# 5: rel_segment)
|
177
490
|
)?
|
178
|
-
(#{
|
179
|
-
(?:\\?(#{
|
180
|
-
(?:\\#(#{
|
491
|
+
(#{abs_path})? (?# 6: abs_path)
|
492
|
+
(?:\\?(#{query}))? (?# 7: query)
|
493
|
+
(?:\\#(#{fragment}))? (?# 8: fragment)
|
181
494
|
"
|
182
|
-
# :startdoc:
|
183
|
-
end # PATTERN
|
184
495
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
#
|
215
|
-
|
496
|
+
ret
|
497
|
+
end
|
498
|
+
|
499
|
+
# Constructs the default Hash of Regexp's
|
500
|
+
def initialize_regexp(pattern)
|
501
|
+
ret = {}
|
502
|
+
|
503
|
+
# for URI::split
|
504
|
+
ret[:ABS_URI] = Regexp.new('\A\s*' + pattern[:X_ABS_URI] + '\s*\z', Regexp::EXTENDED)
|
505
|
+
ret[:REL_URI] = Regexp.new('\A\s*' + pattern[:X_REL_URI] + '\s*\z', Regexp::EXTENDED)
|
506
|
+
|
507
|
+
# for URI::extract
|
508
|
+
ret[:URI_REF] = Regexp.new(pattern[:URI_REF])
|
509
|
+
ret[:ABS_URI_REF] = Regexp.new(pattern[:X_ABS_URI], Regexp::EXTENDED)
|
510
|
+
ret[:REL_URI_REF] = Regexp.new(pattern[:X_REL_URI], Regexp::EXTENDED)
|
511
|
+
|
512
|
+
# for URI::escape/unescape
|
513
|
+
ret[:ESCAPED] = Regexp.new(pattern[:ESCAPED])
|
514
|
+
ret[:UNSAFE] = Regexp.new("[^#{pattern[:UNRESERVED]}#{pattern[:RESERVED]}]")
|
515
|
+
|
516
|
+
# for Generic#initialize
|
517
|
+
ret[:SCHEME] = Regexp.new("\\A#{pattern[:SCHEME]}\\z")
|
518
|
+
ret[:USERINFO] = Regexp.new("\\A#{pattern[:USERINFO]}\\z")
|
519
|
+
ret[:HOST] = Regexp.new("\\A#{pattern[:HOST]}\\z")
|
520
|
+
ret[:PORT] = Regexp.new("\\A#{pattern[:PORT]}\\z")
|
521
|
+
ret[:OPAQUE] = Regexp.new("\\A#{pattern[:OPAQUE_PART]}\\z")
|
522
|
+
ret[:REGISTRY] = Regexp.new("\\A#{pattern[:REG_NAME]}\\z")
|
523
|
+
ret[:ABS_PATH] = Regexp.new("\\A#{pattern[:ABS_PATH]}\\z")
|
524
|
+
ret[:REL_PATH] = Regexp.new("\\A#{pattern[:REL_PATH]}\\z")
|
525
|
+
ret[:QUERY] = Regexp.new("\\A#{pattern[:QUERY]}\\z")
|
526
|
+
ret[:FRAGMENT] = Regexp.new("\\A#{pattern[:FRAGMENT]}\\z")
|
527
|
+
|
528
|
+
ret
|
529
|
+
end
|
530
|
+
|
531
|
+
def convert_to_uri(uri)
|
532
|
+
if uri.is_a?(URI::Generic)
|
533
|
+
uri
|
534
|
+
elsif uri = String.try_convert(uri)
|
535
|
+
parse(uri)
|
536
|
+
else
|
537
|
+
raise ArgumentError,
|
538
|
+
"bad argument (expected URI object or URI string)"
|
539
|
+
end
|
540
|
+
end
|
541
|
+
|
542
|
+
end # class Parser
|
543
|
+
|
544
|
+
# URI::Parser.new
|
545
|
+
DEFAULT_PARSER = Parser.new
|
546
|
+
DEFAULT_PARSER.pattern.each_pair do |sym, str|
|
547
|
+
unless REGEXP::PATTERN.const_defined?(sym)
|
548
|
+
REGEXP::PATTERN.const_set(sym, str)
|
549
|
+
end
|
550
|
+
end
|
551
|
+
DEFAULT_PARSER.regexp.each_pair do |sym, str|
|
552
|
+
const_set(sym, str)
|
553
|
+
end
|
216
554
|
|
217
555
|
module Util # :nodoc:
|
218
556
|
def make_components_hash(klass, array_hash)
|
@@ -236,7 +574,7 @@ module URI
|
|
236
574
|
end
|
237
575
|
end
|
238
576
|
else
|
239
|
-
raise ArgumentError,
|
577
|
+
raise ArgumentError,
|
240
578
|
"expected Array of or Hash of components of #{klass.to_s} (#{klass.component[1..-1].join(', ')})"
|
241
579
|
end
|
242
580
|
tmp[:scheme] = klass.to_s.sub(/\A.*::/, '').downcase
|
@@ -246,9 +584,8 @@ module URI
|
|
246
584
|
module_function :make_components_hash
|
247
585
|
end
|
248
586
|
|
587
|
+
# module for escaping unsafe characters with codes.
|
249
588
|
module Escape
|
250
|
-
include REGEXP
|
251
|
-
|
252
589
|
#
|
253
590
|
# == Synopsis
|
254
591
|
#
|
@@ -281,18 +618,9 @@ module URI
|
|
281
618
|
# p URI.escape("@?@!", "!?")
|
282
619
|
# # => "@%3F@%21"
|
283
620
|
#
|
284
|
-
def escape(
|
285
|
-
|
286
|
-
|
287
|
-
unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false, 'N')
|
288
|
-
end
|
289
|
-
str.gsub(unsafe) do |us|
|
290
|
-
tmp = ''
|
291
|
-
us.each_byte do |uc|
|
292
|
-
tmp << sprintf('%%%02X', uc)
|
293
|
-
end
|
294
|
-
tmp
|
295
|
-
end
|
621
|
+
def escape(*arg)
|
622
|
+
warn "#{caller(1)[0]}: warning: URI.escape is obsolete" if $VERBOSE
|
623
|
+
DEFAULT_PARSER.escape(*arg)
|
296
624
|
end
|
297
625
|
alias encode escape
|
298
626
|
#
|
@@ -316,19 +644,22 @@ module URI
|
|
316
644
|
# p URI.unescape(enc_uri)
|
317
645
|
# # => "http://example.com/?a=\t\r"
|
318
646
|
#
|
319
|
-
def unescape(
|
320
|
-
|
321
|
-
|
322
|
-
end
|
647
|
+
def unescape(*arg)
|
648
|
+
warn "#{caller(1)[0]}: warning: URI.unescape is obsolete" if $VERBOSE
|
649
|
+
DEFAULT_PARSER.unescape(*arg)
|
323
650
|
end
|
324
651
|
alias decode unescape
|
325
|
-
end
|
652
|
+
end # module Escape
|
326
653
|
|
327
|
-
include REGEXP
|
328
654
|
extend Escape
|
655
|
+
include REGEXP
|
329
656
|
|
330
657
|
@@schemes = {}
|
331
|
-
|
658
|
+
# Returns a Hash of the defined schemes
|
659
|
+
def self.scheme_list
|
660
|
+
@@schemes
|
661
|
+
end
|
662
|
+
|
332
663
|
#
|
333
664
|
# Base class for all URI exceptions.
|
334
665
|
#
|
@@ -369,7 +700,7 @@ module URI
|
|
369
700
|
# * Opaque
|
370
701
|
# * Query
|
371
702
|
# * Fragment
|
372
|
-
#
|
703
|
+
#
|
373
704
|
# == Usage
|
374
705
|
#
|
375
706
|
# require 'uri'
|
@@ -378,75 +709,7 @@ module URI
|
|
378
709
|
# # => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil]
|
379
710
|
#
|
380
711
|
def self.split(uri)
|
381
|
-
|
382
|
-
when ''
|
383
|
-
# null uri
|
384
|
-
|
385
|
-
when ABS_URI
|
386
|
-
scheme, opaque, userinfo, host, port,
|
387
|
-
registry, path, query, fragment = $~[1..-1]
|
388
|
-
|
389
|
-
# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
|
390
|
-
|
391
|
-
# absoluteURI = scheme ":" ( hier_part | opaque_part )
|
392
|
-
# hier_part = ( net_path | abs_path ) [ "?" query ]
|
393
|
-
# opaque_part = uric_no_slash *uric
|
394
|
-
|
395
|
-
# abs_path = "/" path_segments
|
396
|
-
# net_path = "//" authority [ abs_path ]
|
397
|
-
|
398
|
-
# authority = server | reg_name
|
399
|
-
# server = [ [ userinfo "@" ] hostport ]
|
400
|
-
|
401
|
-
if !scheme
|
402
|
-
raise InvalidURIError,
|
403
|
-
"bad URI(absolute but no scheme): #{uri}"
|
404
|
-
end
|
405
|
-
if !opaque && (!path && (!host && !registry))
|
406
|
-
raise InvalidURIError,
|
407
|
-
"bad URI(absolute but no path): #{uri}"
|
408
|
-
end
|
409
|
-
|
410
|
-
when REL_URI
|
411
|
-
scheme = nil
|
412
|
-
opaque = nil
|
413
|
-
|
414
|
-
userinfo, host, port, registry,
|
415
|
-
rel_segment, abs_path, query, fragment = $~[1..-1]
|
416
|
-
if rel_segment && abs_path
|
417
|
-
path = rel_segment + abs_path
|
418
|
-
elsif rel_segment
|
419
|
-
path = rel_segment
|
420
|
-
elsif abs_path
|
421
|
-
path = abs_path
|
422
|
-
end
|
423
|
-
|
424
|
-
# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
|
425
|
-
|
426
|
-
# relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
|
427
|
-
|
428
|
-
# net_path = "//" authority [ abs_path ]
|
429
|
-
# abs_path = "/" path_segments
|
430
|
-
# rel_path = rel_segment [ abs_path ]
|
431
|
-
|
432
|
-
# authority = server | reg_name
|
433
|
-
# server = [ [ userinfo "@" ] hostport ]
|
434
|
-
|
435
|
-
else
|
436
|
-
raise InvalidURIError, "bad URI(is not URI?): #{uri}"
|
437
|
-
end
|
438
|
-
|
439
|
-
path = '' if !path && !opaque # (see RFC2396 Section 5.2)
|
440
|
-
ret = [
|
441
|
-
scheme,
|
442
|
-
userinfo, host, port, # X
|
443
|
-
registry, # X
|
444
|
-
path, # Y
|
445
|
-
opaque, # Y
|
446
|
-
query,
|
447
|
-
fragment
|
448
|
-
]
|
449
|
-
return ret
|
712
|
+
DEFAULT_PARSER.split(uri)
|
450
713
|
end
|
451
714
|
|
452
715
|
#
|
@@ -462,7 +725,7 @@ module URI
|
|
462
725
|
# == Description
|
463
726
|
#
|
464
727
|
# Creates one of the URI's subclasses instance from the string.
|
465
|
-
#
|
728
|
+
#
|
466
729
|
# == Raises
|
467
730
|
#
|
468
731
|
# URI::InvalidURIError
|
@@ -475,24 +738,13 @@ module URI
|
|
475
738
|
# uri = URI.parse("http://www.ruby-lang.org/")
|
476
739
|
# p uri
|
477
740
|
# # => #<URI::HTTP:0x202281be URL:http://www.ruby-lang.org/>
|
478
|
-
# p uri.scheme
|
479
|
-
# # => "http"
|
480
|
-
# p uri.host
|
481
|
-
# # => "www.ruby-lang.org"
|
482
|
-
#
|
741
|
+
# p uri.scheme
|
742
|
+
# # => "http"
|
743
|
+
# p uri.host
|
744
|
+
# # => "www.ruby-lang.org"
|
745
|
+
#
|
483
746
|
def self.parse(uri)
|
484
|
-
|
485
|
-
registry, path, opaque, query, fragment = self.split(uri)
|
486
|
-
|
487
|
-
if scheme && @@schemes.include?(scheme.upcase)
|
488
|
-
@@schemes[scheme.upcase].new(scheme, userinfo, host, port,
|
489
|
-
registry, path, opaque, query,
|
490
|
-
fragment)
|
491
|
-
else
|
492
|
-
Generic.new(scheme, userinfo, host, port,
|
493
|
-
registry, path, opaque, query,
|
494
|
-
fragment)
|
495
|
-
end
|
747
|
+
DEFAULT_PARSER.parse(uri)
|
496
748
|
end
|
497
749
|
|
498
750
|
#
|
@@ -513,15 +765,24 @@ module URI
|
|
513
765
|
#
|
514
766
|
# require 'uri'
|
515
767
|
#
|
516
|
-
# p URI.join("http://
|
768
|
+
# p URI.join("http://example.com/","main.rbx")
|
517
769
|
# # => #<URI::HTTP:0x2022ac02 URL:http://localhost/main.rbx>
|
518
770
|
#
|
771
|
+
# p URI.join('http://example.com', 'foo')
|
772
|
+
# # => #<URI::HTTP:0x01ab80a0 URL:http://example.com/foo>
|
773
|
+
#
|
774
|
+
# p URI.join('http://example.com', '/foo', '/bar')
|
775
|
+
# # => #<URI::HTTP:0x01aaf0b0 URL:http://example.com/bar>
|
776
|
+
#
|
777
|
+
# p URI.join('http://example.com', '/foo', 'bar')
|
778
|
+
# # => #<URI::HTTP:0x801a92af0 URL:http://example.com/bar>
|
779
|
+
#
|
780
|
+
# p URI.join('http://example.com', '/foo/', 'bar')
|
781
|
+
# # => #<URI::HTTP:0x80135a3a0 URL:http://example.com/foo/bar>
|
782
|
+
#
|
783
|
+
#
|
519
784
|
def self.join(*str)
|
520
|
-
|
521
|
-
str[1 .. -1].each do |x|
|
522
|
-
u = u.merge(x)
|
523
|
-
end
|
524
|
-
u
|
785
|
+
DEFAULT_PARSER.join(*str)
|
525
786
|
end
|
526
787
|
|
527
788
|
#
|
@@ -531,7 +792,7 @@ module URI
|
|
531
792
|
#
|
532
793
|
# == Args
|
533
794
|
#
|
534
|
-
# +str+::
|
795
|
+
# +str+::
|
535
796
|
# String to extract URIs from.
|
536
797
|
# +schemes+::
|
537
798
|
# Limit URI matching to a specific schemes.
|
@@ -549,14 +810,7 @@ module URI
|
|
549
810
|
# # => ["http://foo.example.com/bla", "mailto:test@example.com"]
|
550
811
|
#
|
551
812
|
def self.extract(str, schemes = nil, &block)
|
552
|
-
|
553
|
-
str.scan(regexp(schemes)) { yield $& }
|
554
|
-
nil
|
555
|
-
else
|
556
|
-
result = []
|
557
|
-
str.scan(regexp(schemes)) { result.push $& }
|
558
|
-
result
|
559
|
-
end
|
813
|
+
DEFAULT_PARSER.extract(str, schemes, &block)
|
560
814
|
end
|
561
815
|
|
562
816
|
#
|
@@ -566,48 +820,182 @@ module URI
|
|
566
820
|
#
|
567
821
|
# == Args
|
568
822
|
#
|
569
|
-
# +match_schemes+::
|
823
|
+
# +match_schemes+::
|
570
824
|
# Array of schemes. If given, resulting regexp matches to URIs
|
571
825
|
# whose scheme is one of the match_schemes.
|
572
|
-
#
|
826
|
+
#
|
573
827
|
# == Description
|
574
828
|
# Returns a Regexp object which matches to URI-like strings.
|
575
829
|
# The Regexp object returned by this method includes arbitrary
|
576
830
|
# number of capture group (parentheses). Never rely on it's number.
|
577
|
-
#
|
831
|
+
#
|
578
832
|
# == Usage
|
579
833
|
#
|
580
834
|
# require 'uri'
|
581
835
|
#
|
582
836
|
# # extract first URI from html_string
|
583
837
|
# html_string.slice(URI.regexp)
|
584
|
-
#
|
838
|
+
#
|
585
839
|
# # remove ftp URIs
|
586
840
|
# html_string.sub(URI.regexp(['ftp'])
|
587
|
-
#
|
841
|
+
#
|
588
842
|
# # You should not rely on the number of parentheses
|
589
843
|
# html_string.scan(URI.regexp) do |*matches|
|
590
844
|
# p $&
|
591
845
|
# end
|
592
846
|
#
|
593
847
|
def self.regexp(schemes = nil)
|
594
|
-
|
595
|
-
|
848
|
+
DEFAULT_PARSER.make_regexp(schemes)
|
849
|
+
end
|
850
|
+
|
851
|
+
TBLENCWWWCOMP_ = {} # :nodoc:
|
852
|
+
256.times do |i|
|
853
|
+
TBLENCWWWCOMP_[i.chr] = '%%%02X' % i
|
854
|
+
end
|
855
|
+
TBLENCWWWCOMP_[' '] = '+'
|
856
|
+
TBLENCWWWCOMP_.freeze
|
857
|
+
TBLDECWWWCOMP_ = {} # :nodoc:
|
858
|
+
256.times do |i|
|
859
|
+
h, l = i>>4, i&15
|
860
|
+
TBLDECWWWCOMP_['%%%X%X' % [h, l]] = i.chr
|
861
|
+
TBLDECWWWCOMP_['%%%x%X' % [h, l]] = i.chr
|
862
|
+
TBLDECWWWCOMP_['%%%X%x' % [h, l]] = i.chr
|
863
|
+
TBLDECWWWCOMP_['%%%x%x' % [h, l]] = i.chr
|
864
|
+
end
|
865
|
+
TBLDECWWWCOMP_['+'] = ' '
|
866
|
+
TBLDECWWWCOMP_.freeze
|
867
|
+
|
868
|
+
HTML5ASCIIINCOMPAT = [Encoding::UTF_7, Encoding::UTF_16BE, Encoding::UTF_16LE,
|
869
|
+
Encoding::UTF_32BE, Encoding::UTF_32LE] # :nodoc:
|
870
|
+
|
871
|
+
# Encode given +str+ to URL-encoded form data.
|
872
|
+
#
|
873
|
+
# This method doesn't convert *, -, ., 0-9, A-Z, _, a-z, but does convert SP
|
874
|
+
# (ASCII space) to + and converts others to %XX.
|
875
|
+
#
|
876
|
+
# This is an implementation of
|
877
|
+
# http://www.w3.org/TR/html5/association-of-controls-and-forms.html#url-encoded-form-data
|
878
|
+
#
|
879
|
+
# See URI.decode_www_form_component, URI.encode_www_form
|
880
|
+
def self.encode_www_form_component(str)
|
881
|
+
str = str.to_s
|
882
|
+
if HTML5ASCIIINCOMPAT.include?(str.encoding)
|
883
|
+
str = str.encode(Encoding::UTF_8)
|
596
884
|
else
|
597
|
-
|
885
|
+
str = str.dup
|
598
886
|
end
|
887
|
+
str.force_encoding(Encoding::ASCII_8BIT)
|
888
|
+
str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
|
889
|
+
str.force_encoding(Encoding::US_ASCII)
|
599
890
|
end
|
600
891
|
|
601
|
-
|
892
|
+
# Decode given +str+ of URL-encoded form data.
|
893
|
+
#
|
894
|
+
# This decodes + to SP.
|
895
|
+
#
|
896
|
+
# See URI.encode_www_form_component, URI.decode_www_form
|
897
|
+
def self.decode_www_form_component(str, enc=Encoding::UTF_8)
|
898
|
+
raise ArgumentError, "invalid %-encoding (#{str})" unless /\A[^%]*(?:%\h\h[^%]*)*\z/ =~ str
|
899
|
+
str.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
|
900
|
+
end
|
602
901
|
|
603
|
-
|
604
|
-
#
|
902
|
+
# Generate URL-encoded form data from given +enum+.
|
903
|
+
#
|
904
|
+
# This generates application/x-www-form-urlencoded data defined in HTML5
|
905
|
+
# from given an Enumerable object.
|
906
|
+
#
|
907
|
+
# This internally uses URI.encode_www_form_component(str).
|
908
|
+
#
|
909
|
+
# This method doesn't convert the encoding of given items, so convert them
|
910
|
+
# before call this method if you want to send data as other than original
|
911
|
+
# encoding or mixed encoding data. (Strings which are encoded in an HTML5
|
912
|
+
# ASCII incompatible encoding are converted to UTF-8.)
|
913
|
+
#
|
914
|
+
# This method doesn't handle files. When you send a file, use
|
915
|
+
# multipart/form-data.
|
916
|
+
#
|
917
|
+
# This is an implementation of
|
918
|
+
# http://www.w3.org/TR/html5/forms.html#url-encoded-form-data
|
919
|
+
#
|
920
|
+
# URI.encode_www_form([["q", "ruby"], ["lang", "en"]])
|
921
|
+
# #=> "q=ruby&lang=en"
|
922
|
+
# URI.encode_www_form("q" => "ruby", "lang" => "en")
|
923
|
+
# #=> "q=ruby&lang=en"
|
924
|
+
# URI.encode_www_form("q" => ["ruby", "perl"], "lang" => "en")
|
925
|
+
# #=> "q=ruby&q=perl&lang=en"
|
926
|
+
# URI.encode_www_form([["q", "ruby"], ["q", "perl"], ["lang", "en"]])
|
927
|
+
# #=> "q=ruby&q=perl&lang=en"
|
928
|
+
#
|
929
|
+
# See URI.encode_www_form_component, URI.decode_www_form
|
930
|
+
def self.encode_www_form(enum)
|
931
|
+
enum.map do |k,v|
|
932
|
+
if v.nil?
|
933
|
+
encode_www_form_component(k)
|
934
|
+
elsif v.respond_to?(:to_ary)
|
935
|
+
v.to_ary.map do |w|
|
936
|
+
str = encode_www_form_component(k)
|
937
|
+
unless w.nil?
|
938
|
+
str << '='
|
939
|
+
str << encode_www_form_component(w)
|
940
|
+
end
|
941
|
+
end.join('&')
|
942
|
+
else
|
943
|
+
str = encode_www_form_component(k)
|
944
|
+
str << '='
|
945
|
+
str << encode_www_form_component(v)
|
946
|
+
end
|
947
|
+
end.join('&')
|
948
|
+
end
|
949
|
+
|
950
|
+
WFKV_ = '(?:[^%#=;&]*(?:%\h\h[^%#=;&]*)*)' # :nodoc:
|
951
|
+
|
952
|
+
# Decode URL-encoded form data from given +str+.
|
953
|
+
#
|
954
|
+
# This decodes application/x-www-form-urlencoded data
|
955
|
+
# and returns array of key-value array.
|
956
|
+
# This internally uses URI.decode_www_form_component.
|
605
957
|
#
|
606
|
-
#
|
607
|
-
|
608
|
-
|
958
|
+
# _charset_ hack is not supported now because the mapping from given charset
|
959
|
+
# to Ruby's encoding is not clear yet.
|
960
|
+
# see also http://www.w3.org/TR/html5/syntax.html#character-encodings-0
|
961
|
+
#
|
962
|
+
# This refers http://www.w3.org/TR/html5/forms.html#url-encoded-form-data
|
963
|
+
#
|
964
|
+
# ary = URI.decode_www_form("a=1&a=2&b=3")
|
965
|
+
# p ary #=> [['a', '1'], ['a', '2'], ['b', '3']]
|
966
|
+
# p ary.assoc('a').last #=> '1'
|
967
|
+
# p ary.assoc('b').last #=> '3'
|
968
|
+
# p ary.rassoc('a').last #=> '2'
|
969
|
+
# p Hash[ary] # => {"a"=>"2", "b"=>"3"}
|
970
|
+
#
|
971
|
+
# See URI.decode_www_form_component, URI.encode_www_form
|
972
|
+
def self.decode_www_form(str, enc=Encoding::UTF_8)
|
973
|
+
return [] if str.empty?
|
974
|
+
unless /\A#{WFKV_}=#{WFKV_}(?:[;&]#{WFKV_}=#{WFKV_})*\z/o =~ str
|
975
|
+
raise ArgumentError, "invalid data of application/x-www-form-urlencoded (#{str})"
|
976
|
+
end
|
977
|
+
ary = []
|
978
|
+
$&.scan(/([^=;&]+)=([^;&]*)/) do
|
979
|
+
ary << [decode_www_form_component($1, enc), decode_www_form_component($2, enc)]
|
980
|
+
end
|
981
|
+
ary
|
982
|
+
end
|
983
|
+
end # module URI
|
984
|
+
|
985
|
+
module Kernel
|
609
986
|
|
610
|
-
|
987
|
+
#
|
988
|
+
# Returns +uri+ converted to a URI object.
|
989
|
+
#
|
990
|
+
def URI(uri)
|
991
|
+
if uri.is_a?(URI::Generic)
|
992
|
+
uri
|
993
|
+
elsif uri = String.try_convert(uri)
|
994
|
+
URI.parse(uri)
|
995
|
+
else
|
996
|
+
raise ArgumentError,
|
997
|
+
"bad argument (expected URI object or URI string)"
|
998
|
+
end
|
611
999
|
end
|
612
1000
|
module_function :URI
|
613
1001
|
end
|