addressable 1.0.4 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,6 @@
1
+ # coding:utf-8
1
2
  #--
2
- # Addressable, Copyright (c) 2006-2007 Bob Aman
3
+ # Addressable, Copyright (c) 2006-2008 Bob Aman
3
4
  #
4
5
  # Permission is hereby granted, free of charge, to any person obtaining
5
6
  # a copy of this software and associated documentation files (the
@@ -24,23 +25,43 @@
24
25
  $:.unshift(File.expand_path(File.join(File.dirname(__FILE__), '/..')))
25
26
  $:.uniq!
26
27
 
27
- require 'addressable/version'
28
+ require "addressable/version"
29
+ require "addressable/idna"
28
30
 
29
31
  module Addressable
30
- # This is an implementation of a URI parser based on RFC 3986, 3987.
32
+ ##
33
+ # This is an implementation of a URI parser based on
34
+ # <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>,
35
+ # <a href="http://www.ietf.org/rfc/rfc3987.txt">RFC 3987</a>.
31
36
  class URI
37
+ ##
32
38
  # Raised if something other than a uri is supplied.
33
39
  class InvalidURIError < StandardError
34
40
  end
35
-
41
+
42
+ ##
36
43
  # Raised if an invalid method option is supplied.
37
44
  class InvalidOptionError < StandardError
38
45
  end
39
-
40
- # Raised if an invalid method option is supplied.
41
- class InvalidTemplateValue < StandardError
46
+
47
+ ##
48
+ # Raised if an invalid template value is supplied.
49
+ class InvalidTemplateValueError < StandardError
50
+ end
51
+
52
+ ##
53
+ # Raised if an invalid template operator is used in a pattern.
54
+ class InvalidTemplateOperatorError < StandardError
42
55
  end
43
56
 
57
+ ##
58
+ # Raised if an invalid template operator is used in a pattern.
59
+ class TemplateOperatorAbortedError < StandardError
60
+ end
61
+
62
+ ##
63
+ # Container for the character classes specified in
64
+ # <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.
44
65
  module CharacterClasses
45
66
  ALPHA = "a-zA-Z"
46
67
  DIGIT = "0-9"
@@ -54,24 +75,39 @@ module Addressable
54
75
  PATH = PCHAR + "\\/"
55
76
  QUERY = PCHAR + "\\/\\?"
56
77
  FRAGMENT = PCHAR + "\\/\\?"
57
- end
58
-
78
+ end
79
+
80
+ ##
59
81
  # Returns a URI object based on the parsed string.
60
- def self.parse(uri_string)
61
- return nil if uri_string.nil?
62
-
82
+ #
83
+ # @param [String, Addressable::URI, #to_str] uri
84
+ # The URI string to parse. No parsing is performed if the object is
85
+ # already an <tt>Addressable::URI</tt>.
86
+ #
87
+ # @return [Addressable::URI] The parsed URI.
88
+ def self.parse(uri)
89
+ # If we were given nil, return nil.
90
+ return nil unless uri
63
91
  # If a URI object is passed, just return itself.
64
- return uri_string if uri_string.kind_of?(self)
65
-
92
+ return uri if uri.kind_of?(self)
93
+ if !uri.respond_to?(:to_str)
94
+ raise TypeError, "Can't convert #{uri.class} into String."
95
+ end
96
+ # Otherwise, convert to a String
97
+ uri = uri.to_str
98
+
66
99
  # If a URI object of the Ruby standard library variety is passed,
67
100
  # convert it to a string, then parse the string.
68
- if uri_string.class.name =~ /^URI::/
69
- uri_string = uri_string.to_s
101
+ # We do the check this way because we don't want to accidentally
102
+ # cause a missing constant exception to be thrown.
103
+ if uri.class.name =~ /^URI\b/
104
+ uri = uri.to_s
70
105
  end
71
-
106
+
107
+ # This Regexp supplied as an example in RFC 3986, and it works great.
72
108
  uri_regex =
73
109
  /^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/
74
- scan = uri_string.scan(uri_regex)
110
+ scan = uri.scan(uri_regex)
75
111
  fragments = scan[0]
76
112
  return nil if fragments.nil?
77
113
  scheme = fragments[1]
@@ -85,47 +121,74 @@ module Addressable
85
121
  host = nil
86
122
  port = nil
87
123
  if authority != nil
88
- userinfo = authority.scan(/^([^\[\]]*)@/).flatten[0]
124
+ # The Regexp above doesn't split apart the authority.
125
+ userinfo = authority[/^([^\[\]]*)@/, 1]
89
126
  if userinfo != nil
90
- user = userinfo.strip.scan(/^([^:]*):?/).flatten[0]
91
- password = userinfo.strip.scan(/:(.*)$/).flatten[0]
127
+ user = userinfo.strip[/^([^:]*):?/, 1]
128
+ password = userinfo.strip[/:(.*)$/, 1]
92
129
  end
93
130
  host = authority.gsub(/^([^\[\]]*)@/, "").gsub(/:([^:@\[\]]*?)$/, "")
94
- port = authority.scan(/:([^:@\[\]]*?)$/).flatten[0]
131
+ port = authority[/:([^:@\[\]]*?)$/, 1]
95
132
  end
96
133
  if port == ""
97
134
  port = nil
98
135
  end
99
-
136
+
100
137
  return Addressable::URI.new(
101
- scheme, user, password, host, port, path, query, fragment)
138
+ :scheme => scheme,
139
+ :user => user,
140
+ :password => password,
141
+ :host => host,
142
+ :port => port,
143
+ :path => path,
144
+ :query => query,
145
+ :fragment => fragment
146
+ )
102
147
  end
103
-
148
+
149
+ ##
104
150
  # Converts an input to a URI. The input does not have to be a valid
105
- # URI -- the method will use heuristics to guess what URI was intended.
106
- # This is not standards compliant, merely user-friendly.
107
- def self.heuristic_parse(input, hints={})
108
- input = input.dup
151
+ # URI the method will use heuristics to guess what URI was intended.
152
+ # This is not standards-compliant, merely user-friendly.
153
+ #
154
+ # @param [String, Addressable::URI, #to_str] uri
155
+ # The URI string to parse. No parsing is performed if the object is
156
+ # already an <tt>Addressable::URI</tt>.
157
+ # @param [Hash] hints
158
+ # A <tt>Hash</tt> of hints to the heuristic parser. Defaults to
159
+ # <tt>{:scheme => "http"}</tt>.
160
+ #
161
+ # @return [Addressable::URI] The parsed URI.
162
+ def self.heuristic_parse(uri, hints={})
163
+ # If we were given nil, return nil.
164
+ return nil unless uri
165
+ # If a URI object is passed, just return itself.
166
+ return uri if uri.kind_of?(self)
167
+ if !uri.respond_to?(:to_str)
168
+ raise TypeError, "Can't convert #{uri.class} into String."
169
+ end
170
+ # Otherwise, convert to a String
171
+ uri = uri.to_str.dup
109
172
  hints = {
110
173
  :scheme => "http"
111
174
  }.merge(hints)
112
- case input
175
+ case uri
113
176
  when /^http:\/+/
114
- input.gsub!(/^http:\/+/, "http://")
177
+ uri.gsub!(/^http:\/+/, "http://")
115
178
  when /^feed:\/+http:\/+/
116
- input.gsub!(/^feed:\/+http:\/+/, "feed:http://")
179
+ uri.gsub!(/^feed:\/+http:\/+/, "feed:http://")
117
180
  when /^feed:\/+/
118
- input.gsub!(/^feed:\/+/, "feed://")
181
+ uri.gsub!(/^feed:\/+/, "feed://")
119
182
  when /^file:\/+/
120
- input.gsub!(/^file:\/+/, "file:///")
183
+ uri.gsub!(/^file:\/+/, "file:///")
121
184
  end
122
- parsed = self.parse(input)
185
+ parsed = self.parse(uri)
123
186
  if parsed.scheme =~ /^[^\/?#\.]+\.[^\/?#]+$/
124
- parsed = self.parse(hints[:scheme] + "://" + input)
187
+ parsed = self.parse(hints[:scheme] + "://" + uri)
125
188
  end
126
189
  if parsed.authority == nil
127
190
  if parsed.path =~ /^[^\/]+\./
128
- new_host = parsed.path.scan(/^([^\/]+\.[^\/]*)/).flatten[0]
191
+ new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1]
129
192
  if new_host
130
193
  new_path = parsed.path.gsub(
131
194
  Regexp.new("^" + Regexp.escape(new_host)), "")
@@ -137,77 +200,167 @@ module Addressable
137
200
  end
138
201
  return parsed
139
202
  end
140
-
141
- # Converts a path to a file protocol URI. If the path supplied is
203
+
204
+ ##
205
+ # Converts a path to a file scheme URI. If the path supplied is
142
206
  # relative, it will be returned as a relative URI. If the path supplied
143
- # is actually a URI, it will return the parsed URI.
207
+ # is actually a non-file URI, it will parse the URI as if it had been
208
+ # parsed with <tt>Addressable::URI.parse</tt>. Handles all of the
209
+ # various Microsoft-specific formats for specifying paths.
210
+ #
211
+ # @param [String, Addressable::URI, #to_str] path
212
+ # Typically a <tt>String</tt> path to a file or directory, but
213
+ # will return a sensible return value if an absolute URI is supplied
214
+ # instead.
215
+ #
216
+ # @return [Addressable::URI]
217
+ # The parsed file scheme URI or the original URI if some other URI
218
+ # scheme was provided.
219
+ #
220
+ # @example
221
+ # base = Addressable::URI.convert_path("/absolute/path/")
222
+ # uri = Addressable::URI.convert_path("relative/path")
223
+ # (base + uri).to_s
224
+ # #=> "file:///absolute/path/relative/path"
225
+ #
226
+ # Addressable::URI.convert_path(
227
+ # "c:\\windows\\My Documents 100%20\\foo.txt"
228
+ # ).to_s
229
+ # #=> "file:///c:/windows/My%20Documents%20100%20/foo.txt"
230
+ #
231
+ # Addressable::URI.convert_path("http://example.com/").to_s
232
+ # #=> "http://example.com/"
144
233
  def self.convert_path(path)
145
- return nil if path.nil?
146
-
147
- converted_uri = path.strip
148
- if converted_uri.length > 0 && converted_uri[0..0] == "/"
149
- converted_uri = "file://" + converted_uri
150
- end
151
- if converted_uri.length > 0 &&
152
- converted_uri.scan(/^[a-zA-Z]:[\\\/]/).size > 0
153
- converted_uri = "file:///" + converted_uri
234
+ # If we were given nil, return nil.
235
+ return nil unless path
236
+ # If a URI object is passed, just return itself.
237
+ return path if path.kind_of?(self)
238
+ if !path.respond_to?(:to_str)
239
+ raise TypeError, "Can't convert #{path.class} into String."
154
240
  end
155
- converted_uri.gsub!(/^file:\/*/i, "file:///")
156
- if converted_uri =~ /^file:/i
241
+ # Otherwise, convert to a String
242
+ path = path.to_str.strip
243
+
244
+ path.gsub!(/^file:\/?\/?/, "") if path =~ /^file:\/?\/?/
245
+ path = "/" + path if path =~ /^([a-zA-Z])(\||:)/
246
+ uri = self.parse(path)
247
+
248
+ if uri.scheme == nil
157
249
  # Adjust windows-style uris
158
- converted_uri.gsub!(/^file:\/\/\/([a-zA-Z])\|/i, 'file:///\1:')
159
- converted_uri.gsub!(/\\/, '/')
160
- converted_uri = self.parse(converted_uri).normalize
161
- if File.exists?(converted_uri.path) &&
162
- File.stat(converted_uri.path).directory?
163
- converted_uri.path.gsub!(/\/$/, "")
164
- converted_uri.path = converted_uri.path + '/'
250
+ uri.path.gsub!(/^\/?([a-zA-Z])\|(\\|\/)/, "/\\1:/")
251
+ uri.path.gsub!(/\\/, "/")
252
+ if File.exists?(uri.path) &&
253
+ File.stat(uri.path).directory?
254
+ uri.path.gsub!(/\/$/, "")
255
+ uri.path = uri.path + '/'
165
256
  end
166
- else
167
- converted_uri = self.parse(converted_uri)
257
+
258
+ # If the path is absolute, set the scheme and host.
259
+ if uri.path =~ /^\//
260
+ uri.scheme = "file"
261
+ uri.host = ""
262
+ end
263
+ uri.normalize!
168
264
  end
169
-
170
- return converted_uri
265
+
266
+ return uri
171
267
  end
172
-
268
+
269
+ ##
173
270
  # Expands a URI template into a full URI.
174
271
  #
175
- # An optional processor object may be supplied. The object should
176
- # respond to either the :validate or :transform messages or both.
177
- # Both the :validate and :transform methods should take two parameters:
178
- # :name and :value. The :validate method should return true or false;
179
- # true if the value of the variable is valid, false otherwise. The
180
- # :transform method should return the transformed variable value as a
181
- # string.
182
- #
183
- # An example:
184
- #
185
- # class ExampleProcessor
186
- # def self.validate(name, value)
187
- # return !!(value =~ /^[\w ]+$/) if name == "query"
188
- # return true
189
- # end
190
- #
191
- # def self.transform(name, value)
192
- # return value.gsub(/ /, "+") if name == "query"
193
- # return value
194
- # end
195
- # end
196
- #
197
- # Addressable::URI.expand_template(
198
- # "http://example.com/search/{query}/",
199
- # {"query" => "an example search query"},
200
- # ExampleProcessor).to_s
201
- # => "http://example.com/search/an+example+search+query/"
272
+ # @param [String, #to_str] pattern The URI template pattern.
273
+ # @param [Hash] mapping The mapping that corresponds to the pattern.
274
+ # @param [#validate, #transform] processor
275
+ # An optional processor object may be supplied. The object should
276
+ # respond to either the <tt>validate</tt> or <tt>transform</tt> messages
277
+ # or both. Both the <tt>validate</tt> and <tt>transform</tt> methods
278
+ # should take two parameters: <tt>name</tt> and <tt>value</tt>. The
279
+ # <tt>validate</tt> method should return <tt>true</tt> or
280
+ # <tt>false</tt>; <tt>true</tt> if the value of the variable is valid,
281
+ # <tt>false</tt> otherwise. An <tt>InvalidTemplateValueError</tt>
282
+ # exception will be raised if the value is invalid. The
283
+ # <tt>transform</tt> method should return the transformed variable
284
+ # value as a <tt>String</tt>.
285
+ #
286
+ # @return [Addressable::URI] The expanded URI template.
287
+ #
288
+ # @example
289
+ # class ExampleProcessor
290
+ # def self.validate(name, value)
291
+ # return !!(value =~ /^[\w ]+$/) if name == "query"
292
+ # return true
293
+ # end
294
+ #
295
+ # def self.transform(name, value)
296
+ # return value.gsub(/ /, "+") if name == "query"
297
+ # return value
298
+ # end
299
+ # end
300
+ #
301
+ # Addressable::URI.expand_template(
302
+ # "http://example.com/search/{query}/",
303
+ # {"query" => "an example search query"},
304
+ # ExampleProcessor
305
+ # ).to_s
306
+ # #=> "http://example.com/search/an+example+search+query/"
307
+ #
308
+ # Addressable::URI.expand_template(
309
+ # "http://example.com/search/{-list|+|query}/",
310
+ # {"query" => "an example search query".split(" ")}
311
+ # ).to_s
312
+ # #=> "http://example.com/search/an+example+search+query/"
313
+ #
314
+ # Addressable::URI.expand_template(
315
+ # "http://example.com/search/{query}/",
316
+ # {"query" => "bogus!"},
317
+ # ExampleProcessor
318
+ # ).to_s
319
+ # #=> Addressable::URI::InvalidTemplateValueError
202
320
  def self.expand_template(pattern, mapping, processor=nil)
321
+
322
+ # FIXME: MUST REFACTOR!!!
323
+
203
324
  result = pattern.dup
204
- for name, value in mapping
205
- transformed_value = value
325
+
326
+ reserved = Addressable::URI::CharacterClasses::RESERVED
327
+ unreserved = Addressable::URI::CharacterClasses::UNRESERVED
328
+ anything = reserved + unreserved
329
+ operator_expansion =
330
+ /\{-([a-zA-Z]+)\|([#{anything}]+)\|([#{anything}]+)\}/
331
+ variable_expansion = /\{([#{anything}]+?)(=([#{anything}]+))?\}/
332
+
333
+ transformed_mapping = mapping.inject({}) do |accu, pair|
334
+ name, value = pair
335
+ unless value.respond_to?(:to_ary) || value.respond_to?(:to_str)
336
+ raise TypeError,
337
+ "Can't convert #{value.class} into String or Array."
338
+ end
339
+ transformed_value =
340
+ value.respond_to?(:to_ary) ? value.to_ary : value.to_str
341
+
342
+ # Handle percent escaping, and unicode normalization
343
+ if transformed_value.kind_of?(Array)
344
+ transformed_value.map! do |value|
345
+ self.encode_component(
346
+ Addressable::IDNA.unicode_normalize_kc(value),
347
+ Addressable::URI::CharacterClasses::UNRESERVED
348
+ )
349
+ end
350
+ else
351
+ transformed_value = self.encode_component(
352
+ Addressable::IDNA.unicode_normalize_kc(transformed_value),
353
+ Addressable::URI::CharacterClasses::UNRESERVED
354
+ )
355
+ end
356
+
357
+ # Process, if we've got a processor
206
358
  if processor != nil
207
359
  if processor.respond_to?(:validate)
208
360
  if !processor.validate(name, value)
209
- raise InvalidTemplateValue,
210
- "(#{name}, #{value}) is an invalid template value."
361
+ display_value = value.kind_of?(Array) ? value.inspect : value
362
+ raise InvalidTemplateValueError,
363
+ "#{name}=#{display_value} is an invalid template value."
211
364
  end
212
365
  end
213
366
  if processor.respond_to?(:transform)
@@ -215,229 +368,773 @@ module Addressable
215
368
  end
216
369
  end
217
370
 
218
- # Handle percent escaping
219
- transformed_value = self.encode_segment(transformed_value,
220
- Addressable::URI::CharacterClasses::RESERVED +
221
- Addressable::URI::CharacterClasses::UNRESERVED)
222
-
223
- result.gsub!(/\{#{Regexp.escape(name)}\}/, transformed_value)
371
+ accu[name] = transformed_value
372
+ accu
224
373
  end
225
374
  result.gsub!(
226
- /\{[#{Addressable::URI::CharacterClasses::UNRESERVED}]+\}/, "")
375
+ /#{operator_expansion}|#{variable_expansion}/
376
+ ) do |capture|
377
+ if capture =~ operator_expansion
378
+ operator, argument, variables, default_mapping =
379
+ parse_template_expansion(capture, transformed_mapping)
380
+ expand_method = "expand_#{operator}_operator"
381
+ if ([expand_method, expand_method.to_sym] & private_methods).empty?
382
+ raise InvalidTemplateOperatorError,
383
+ "Invalid template operator: #{operator}"
384
+ else
385
+ send(expand_method.to_sym, argument, variables, default_mapping)
386
+ end
387
+ else
388
+ varname, _, vardefault = capture.scan(/^\{(.+?)(=(.*))?\}$/)[0]
389
+ transformed_mapping[varname] || vardefault
390
+ end
391
+ end
227
392
  return Addressable::URI.parse(result)
228
393
  end
229
-
394
+
395
+ ##
396
+ # Expands a URI Template opt operator.
397
+ #
398
+ # @param [String] argument The argument to the operator.
399
+ # @param [Array] variables The variables the operator is working on.
400
+ # @param [Hash] mapping The mapping of variables to values.
401
+ #
402
+ # @return [String] The expanded result.
403
+ def self.expand_opt_operator(argument, variables, mapping)
404
+ if (variables.any? do |variable|
405
+ mapping[variable] != [] &&
406
+ mapping[variable]
407
+ end)
408
+ argument
409
+ else
410
+ ""
411
+ end
412
+ end
413
+ class <<self; private :expand_opt_operator; end
414
+
415
+ ##
416
+ # Expands a URI Template neg operator.
417
+ #
418
+ # @param [String] argument The argument to the operator.
419
+ # @param [Array] variables The variables the operator is working on.
420
+ # @param [Hash] mapping The mapping of variables to values.
421
+ #
422
+ # @return [String] The expanded result.
423
+ def self.expand_neg_operator(argument, variables, mapping)
424
+ if (variables.any? do |variable|
425
+ mapping[variable] != [] &&
426
+ mapping[variable]
427
+ end)
428
+ ""
429
+ else
430
+ argument
431
+ end
432
+ end
433
+ class <<self; private :expand_neg_operator; end
434
+
435
+ ##
436
+ # Expands a URI Template prefix operator.
437
+ #
438
+ # @param [String] argument The argument to the operator.
439
+ # @param [Array] variables The variables the operator is working on.
440
+ # @param [Hash] mapping The mapping of variables to values.
441
+ #
442
+ # @return [String] The expanded result.
443
+ def self.expand_prefix_operator(argument, variables, mapping)
444
+ if variables.size != 1
445
+ raise InvalidTemplateOperatorError,
446
+ "Template operator 'prefix' takes exactly one variable."
447
+ end
448
+ value = mapping[variables.first]
449
+ if value.kind_of?(Array)
450
+ (value.map { |list_value| argument + list_value }).join("")
451
+ else
452
+ argument + value.to_s
453
+ end
454
+ end
455
+ class <<self; private :expand_prefix_operator; end
456
+
457
+ ##
458
+ # Expands a URI Template suffix operator.
459
+ #
460
+ # @param [String] argument The argument to the operator.
461
+ # @param [Array] variables The variables the operator is working on.
462
+ # @param [Hash] mapping The mapping of variables to values.
463
+ #
464
+ # @return [String] The expanded result.
465
+ def self.expand_suffix_operator(argument, variables, mapping)
466
+ if variables.size != 1
467
+ raise InvalidTemplateOperatorError,
468
+ "Template operator 'suffix' takes exactly one variable."
469
+ end
470
+ value = mapping[variables.first]
471
+ if value.kind_of?(Array)
472
+ (value.map { |list_value| list_value + argument }).join("")
473
+ else
474
+ value.to_s + argument
475
+ end
476
+ end
477
+ class <<self; private :expand_suffix_operator; end
478
+
479
+ ##
480
+ # Expands a URI Template join operator.
481
+ #
482
+ # @param [String] argument The argument to the operator.
483
+ # @param [Array] variables The variables the operator is working on.
484
+ # @param [Hash] mapping The mapping of variables to values.
485
+ #
486
+ # @return [String] The expanded result.
487
+ def self.expand_join_operator(argument, variables, mapping)
488
+ variable_values = variables.inject([]) do |accu, variable|
489
+ if !mapping[variable].kind_of?(Array)
490
+ if mapping[variable]
491
+ accu << variable + "=" + (mapping[variable])
492
+ end
493
+ else
494
+ raise InvalidTemplateOperatorError,
495
+ "Template operator 'join' does not accept Array values."
496
+ end
497
+ accu
498
+ end
499
+ variable_values.join(argument)
500
+ end
501
+ class <<self; private :expand_join_operator; end
502
+
503
+ ##
504
+ # Expands a URI Template list operator.
505
+ #
506
+ # @param [String] argument The argument to the operator.
507
+ # @param [Array] variables The variables the operator is working on.
508
+ # @param [Hash] mapping The mapping of variables to values.
509
+ #
510
+ # @return [String] The expanded result.
511
+ def self.expand_list_operator(argument, variables, mapping)
512
+ if variables.size != 1
513
+ raise InvalidTemplateOperatorError,
514
+ "Template operator 'list' takes exactly one variable."
515
+ end
516
+ mapping[variables.first].join(argument)
517
+ end
518
+ class <<self; private :expand_list_operator; end
519
+
520
+ ##
521
+ # Parses a URI template expansion <tt>String</tt>.
522
+ #
523
+ # @param [String] expansion The operator <tt>String</tt>.
524
+ # @param [Hash] mapping The mapping to merge defaults into.
525
+ #
526
+ # @return [Array]
527
+ # A tuple of the operator, argument, variables, and mapping.
528
+ def self.parse_template_expansion(capture, mapping)
529
+ operator, argument, variables = capture[1...-1].split("|")
530
+ operator.gsub!(/^\-/, "")
531
+ variables = variables.split(",")
532
+ mapping = (variables.inject({}) do |accu, var|
533
+ varname, _, vardefault = var.scan(/^(.+?)(=(.*))?$/)[0]
534
+ accu[varname] = vardefault
535
+ accu
536
+ end).merge(mapping)
537
+ variables = variables.map { |var| var.gsub(/=.*$/, "") }
538
+ return operator, argument, variables, mapping
539
+ end
540
+ class <<self; private :parse_template_expansion; end
541
+
542
+ ##
230
543
  # Extracts a mapping from the URI using a URI Template pattern.
231
- # Returns nil if the pattern doesn't match the URI.
232
- #
233
- # An optional processor object may be supplied. The object should
234
- # respond to either the :restore or :match messages or both.
235
- # The :restore method should take two parameters: :name and :value.
236
- # The :restore method should reverse any transformations that have been
237
- # performed on the value to ensure a valid URI. The :match method
238
- # should take a single parameter: :name. The :match method should
239
- # return a String containing a regular expression capture group for
240
- # matching on that particular variable. The default value is ".*".
241
- #
242
- # An example:
243
- #
244
- # class ExampleProcessor
245
- # def self.restore(name, value)
246
- # return value.gsub(/\+/, " ") if name == "query"
247
- # return value
248
- # end
249
- #
250
- # def self.match(name)
251
- # return ".*?" if name == "first"
252
- # return ".*"
253
- # end
254
- # end
255
- #
256
- # uri = Addressable::URI.parse(
257
- # "http://example.com/search/an+example+search+query/")
258
- # uri.extract_mapping("http://example.com/search/{query}/",
259
- # ExampleProcessor)
260
- # => {"query" => "an example search query"}
261
- #
262
- # uri = Addressable::URI.parse(
263
- # "http://example.com/a/b/c/")
264
- # uri.extract_mapping("http://example.com/{first}/{second}/",
265
- # ExampleProcessor)
266
- # => {"first" => "a", "second" => "b/c"}
544
+ #
545
+ # @param [String] pattern
546
+ # A URI template pattern.
547
+ # @param [#restore, #match] processor
548
+ # A template processor object may optionally be supplied.
549
+ # The object should respond to either the <tt>restore</tt> or
550
+ # <tt>match</tt> messages or both. The <tt>restore</tt> method should
551
+ # take two parameters: [String] name and [String] value. The
552
+ # <tt>restore</tt> method should reverse any transformations that have
553
+ # been performed on the value to ensure a valid URI. The
554
+ # <tt>match</tt> method should take a single parameter: [String] name.
555
+ # The <tt>match</tt> method should return a <tt>String</tt> containing
556
+ # a regular expression capture group for matching on that particular
557
+ # variable. The default value is ".*?". The <tt>match</tt> method has
558
+ # no effect on multivariate operator expansions.
559
+ # @return [Hash, NilClass]
560
+ # The <tt>Hash</tt> mapping that was extracted from the URI, or
561
+ # <tt>nil</tt> if the URI didn't match the template.
562
+ #
563
+ # @example
564
+ # class ExampleProcessor
565
+ # def self.restore(name, value)
566
+ # return value.gsub(/\+/, " ") if name == "query"
567
+ # return value
568
+ # end
569
+ #
570
+ # def self.match(name)
571
+ # return ".*?" if name == "first"
572
+ # return ".*"
573
+ # end
574
+ # end
575
+ #
576
+ # uri = Addressable::URI.parse(
577
+ # "http://example.com/search/an+example+search+query/"
578
+ # )
579
+ # uri.extract_mapping(
580
+ # "http://example.com/search/{query}/",
581
+ # ExampleProcessor
582
+ # )
583
+ # #=> {"query" => "an example search query"}
584
+ #
585
+ # uri = Addressable::URI.parse("http://example.com/a/b/c/")
586
+ # uri.extract_mapping(
587
+ # "http://example.com/{first}/{second}/",
588
+ # ExampleProcessor
589
+ # )
590
+ # #=> {"first" => "a", "second" => "b/c"}
591
+ #
592
+ # uri = Addressable::URI.parse("http://example.com/a/b/c/")
593
+ # uri.extract_mapping(
594
+ # "http://example.com/{first}/{-list|/|second}/"
595
+ # )
596
+ # #=> {"first" => "a", "second" => ["b", "c"]}
267
597
  def extract_mapping(pattern, processor=nil)
598
+ reserved = Addressable::URI::CharacterClasses::RESERVED
599
+ unreserved = Addressable::URI::CharacterClasses::UNRESERVED
600
+ anything = reserved + unreserved
601
+ operator_expansion =
602
+ /\{-([a-zA-Z]+)\|([#{anything}]+)\|([#{anything}]+)\}/
603
+ variable_expansion = /\{([#{anything}]+?)(=([#{anything}]+))?\}/
604
+
605
+ # First, we need to process the pattern, and extract the values.
606
+ expansions, expansion_regexp =
607
+ parse_template_pattern(pattern, processor)
608
+ unparsed_values = self.to_s.scan(expansion_regexp).flatten
609
+
268
610
  mapping = {}
269
- variable_regexp =
270
- /\{([#{Addressable::URI::CharacterClasses::UNRESERVED}]+)\}/
271
-
272
- # Get all the variables in the pattern
273
- variables = pattern.scan(variable_regexp).flatten
274
-
275
- # Initialize all result values to the empty string
276
- variables.each { |v| mapping[v] = "" }
277
-
278
- # Escape the pattern
279
- escaped_pattern =
280
- Regexp.escape(pattern).gsub(/\\\{/, "{").gsub(/\\\}/, "}")
281
-
611
+
612
+ if self.to_s == pattern
613
+ return mapping
614
+ elsif expansions.size > 0 && expansions.size == unparsed_values.size
615
+ expansions.each_with_index do |expansion, index|
616
+ unparsed_value = unparsed_values[index]
617
+ if expansion =~ operator_expansion
618
+ operator, argument, variables =
619
+ parse_template_expansion(expansion)
620
+ extract_method = "extract_#{operator}_operator"
621
+ if ([extract_method, extract_method.to_sym] &
622
+ private_methods).empty?
623
+ raise InvalidTemplateOperatorError,
624
+ "Invalid template operator: #{operator}"
625
+ else
626
+ begin
627
+ send(
628
+ extract_method.to_sym, unparsed_value, processor,
629
+ argument, variables, mapping
630
+ )
631
+ rescue TemplateOperatorAbortedError
632
+ return nil
633
+ end
634
+ end
635
+ else
636
+ name = expansion[variable_expansion, 1]
637
+ value = unparsed_value
638
+ if processor != nil && processor.respond_to?(:restore)
639
+ value = processor.restore(name, value)
640
+ end
641
+ mapping[name] = value
642
+ end
643
+ end
644
+ return mapping
645
+ else
646
+ return nil
647
+ end
648
+ end
649
+
650
+ ##
651
+ # Generates the <tt>Regexp</tt> that parses a template pattern.
652
+ #
653
+ # @param [String] pattern The URI template pattern.
654
+ # @param [#match] processor The template processor to use.
655
+ #
656
+ # @return [Regexp]
657
+ # A regular expression which may be used to parse a template pattern.
658
+ def parse_template_pattern(pattern, processor)
659
+ reserved = Addressable::URI::CharacterClasses::RESERVED
660
+ unreserved = Addressable::URI::CharacterClasses::UNRESERVED
661
+ anything = reserved + unreserved
662
+ operator_expansion =
663
+ /\{-[a-zA-Z]+\|[#{anything}]+\|[#{anything}]+\}/
664
+ variable_expansion = /\{([#{anything}]+?)(=([#{anything}]+))?\}/
665
+
666
+ # Escape the pattern. The two gsubs restore the escaped curly braces
667
+ # back to their original form. Basically, escape everything that isn't
668
+ # within an expansion.
669
+ escaped_pattern = Regexp.escape(
670
+ pattern
671
+ ).gsub(/\\\{(.*?)\\\}/) do |escaped|
672
+ escaped.gsub(/\\(.)/, "\\1")
673
+ end
674
+
675
+ expansions = []
676
+
282
677
  # Create a regular expression that captures the values of the
283
678
  # variables in the URI.
284
- regexp_string = escaped_pattern.gsub(variable_regexp) do |v|
285
- capture_group = "(.*)"
286
-
287
- if processor != nil
288
- if processor.respond_to?(:match)
289
- name = v.scan(variable_regexp).flatten[0]
679
+ regexp_string = escaped_pattern.gsub(
680
+ /#{operator_expansion}|#{variable_expansion}/
681
+ ) do |expansion|
682
+ expansions << expansion
683
+ if expansion =~ operator_expansion
684
+ capture_group = "(.*)"
685
+ if processor != nil && processor.respond_to?(:match)
686
+ # We can only lookup the match values for single variable
687
+ # operator expansions. Besides, ".*" is usually the only
688
+ # reasonable value for multivariate operators anyways.
689
+ operator, _, names, _ =
690
+ parse_template_expansion(expansion)
691
+ if ["prefix", "suffix", "list"].include?(operator)
692
+ capture_group = "(#{processor.match(names.first)})"
693
+ end
694
+ end
695
+ capture_group
696
+ else
697
+ capture_group = "(.*?)"
698
+ if processor != nil && processor.respond_to?(:match)
699
+ name = expansion[/\{([^\}=]+)(=[^\}]+)?\}/, 1]
290
700
  capture_group = "(#{processor.match(name)})"
291
701
  end
702
+ capture_group
292
703
  end
293
-
294
- capture_group
295
704
  end
296
-
705
+
297
706
  # Ensure that the regular expression matches the whole URI.
298
707
  regexp_string = "^#{regexp_string}$"
299
-
300
- regexp = Regexp.new(regexp_string)
301
- values = self.to_s.scan(regexp).flatten
302
-
303
- if variables.size == values.size && variables.size > 0
304
- # We have a match.
305
- for i in 0...variables.size
306
- name = variables[i]
307
- value = values[i]
308
-
309
- if processor != nil
310
- if processor.respond_to?(:restore)
311
- value = processor.restore(name, value)
312
- end
313
- end
314
-
315
- mapping[name] = value
708
+
709
+ return expansions, Regexp.new(regexp_string)
710
+ end
711
+ private :parse_template_pattern
712
+
713
+ ##
714
+ # Parses a URI template expansion <tt>String</tt>.
715
+ #
716
+ # @param [String] expansion The operator <tt>String</tt>.
717
+ #
718
+ # @return [Array]
719
+ # A tuple of the operator, argument, variables.
720
+ def parse_template_expansion(capture)
721
+ operator, argument, variables = capture[1...-1].split("|")
722
+ operator.gsub!(/^\-/, "")
723
+ variables = variables.split(",").map { |var| var.gsub(/=.*$/, "") }
724
+ return operator, argument, variables
725
+ end
726
+ private :parse_template_expansion
727
+
728
+
729
+ ##
730
+ # Extracts a URI Template opt operator.
731
+ #
732
+ # @param [String] value The unparsed value to extract from.
733
+ # @param [#restore] processor The processor object.
734
+ # @param [String] argument The argument to the operator.
735
+ # @param [Array] variables The variables the operator is working on.
736
+ # @param [Hash] mapping The mapping of variables to values.
737
+ #
738
+ # @return [String] The extracted result.
739
+ def extract_opt_operator(
740
+ value, processor, argument, variables, mapping)
741
+ if value != "" && value != argument
742
+ raise TemplateOperatorAbortedError,
743
+ "Value for template operator 'neg' was unexpected."
744
+ end
745
+ end
746
+ private :extract_opt_operator
747
+
748
+ ##
749
+ # Extracts a URI Template neg operator.
750
+ #
751
+ # @param [String] value The unparsed value to extract from.
752
+ # @param [#restore] processor The processor object.
753
+ # @param [String] argument The argument to the operator.
754
+ # @param [Array] variables The variables the operator is working on.
755
+ # @param [Hash] mapping The mapping of variables to values.
756
+ #
757
+ # @return [String] The extracted result.
758
+ def extract_neg_operator(
759
+ value, processor, argument, variables, mapping)
760
+ if value != "" && value != argument
761
+ raise TemplateOperatorAbortedError,
762
+ "Value for template operator 'neg' was unexpected."
763
+ end
764
+ end
765
+ private :extract_neg_operator
766
+
767
+ ##
768
+ # Extracts a URI Template prefix operator.
769
+ #
770
+ # @param [String] value The unparsed value to extract from.
771
+ # @param [#restore] processor The processor object.
772
+ # @param [String] argument The argument to the operator.
773
+ # @param [Array] variables The variables the operator is working on.
774
+ # @param [Hash] mapping The mapping of variables to values.
775
+ #
776
+ # @return [String] The extracted result.
777
+ def extract_prefix_operator(
778
+ value, processor, argument, variables, mapping)
779
+ if variables.size != 1
780
+ raise InvalidTemplateOperatorError,
781
+ "Template operator 'suffix' takes exactly one variable."
782
+ end
783
+ if value[0...argument.size] != argument
784
+ raise TemplateOperatorAbortedError,
785
+ "Value for template operator 'prefix' missing expected prefix."
786
+ end
787
+ values = value.split(argument)
788
+ # Compensate for the crappy result from split.
789
+ if value[-argument.size..-1] == argument
790
+ values << ""
791
+ end
792
+ if values[0] == ""
793
+ values.shift
794
+ end
795
+ if processor && processor.respond_to?(:restore)
796
+ values.map! { |value| processor.restore(variables.first, value) }
797
+ end
798
+ mapping[variables.first] = values
799
+ end
800
+ private :extract_prefix_operator
801
+
802
+ ##
803
+ # Extracts a URI Template suffix operator.
804
+ #
805
+ # @param [String] value The unparsed value to extract from.
806
+ # @param [#restore] processor The processor object.
807
+ # @param [String] argument The argument to the operator.
808
+ # @param [Array] variables The variables the operator is working on.
809
+ # @param [Hash] mapping The mapping of variables to values.
810
+ #
811
+ # @return [String] The extracted result.
812
+ def extract_suffix_operator(
813
+ value, processor, argument, variables, mapping)
814
+ if variables.size != 1
815
+ raise InvalidTemplateOperatorError,
816
+ "Template operator 'suffix' takes exactly one variable."
817
+ end
818
+ if value[-argument.size..-1] != argument
819
+ raise TemplateOperatorAbortedError,
820
+ "Value for template operator 'suffix' missing expected suffix."
821
+ end
822
+ values = value.split(argument)
823
+ # Compensate for the crappy result from split.
824
+ if value[-argument.size..-1] == argument
825
+ values << ""
826
+ end
827
+ if values[-1] == ""
828
+ values.pop
829
+ end
830
+ if processor && processor.respond_to?(:restore)
831
+ values.map! { |value| processor.restore(variables.first, value) }
832
+ end
833
+ mapping[variables.first] = values
834
+ end
835
+ private :extract_suffix_operator
836
+
837
+ ##
838
+ # Extracts a URI Template join operator.
839
+ #
840
+ # @param [String] value The unparsed value to extract from.
841
+ # @param [#restore] processor The processor object.
842
+ # @param [String] argument The argument to the operator.
843
+ # @param [Array] variables The variables the operator is working on.
844
+ # @param [Hash] mapping The mapping of variables to values.
845
+ #
846
+ # @return [String] The extracted result.
847
+ def extract_join_operator(value, processor, argument, variables, mapping)
848
+ unparsed_values = value.split(argument)
849
+ parsed_variables = []
850
+ for unparsed_value in unparsed_values
851
+ name = unparsed_value[/^(.+?)=(.+)$/, 1]
852
+ parsed_variables << name
853
+ parsed_value = unparsed_value[/^(.+?)=(.+)$/, 2]
854
+ if processor && processor.respond_to?(:restore)
855
+ parsed_value = processor.restore(name, parsed_value)
316
856
  end
317
- return mapping
318
- elsif self.to_s == pattern
319
- # The pattern contained no variables but still matched.
320
- return mapping
321
- else
322
- # Pattern failed to match URI.
323
- return nil
857
+ mapping[name] = parsed_value
858
+ end
859
+ if (parsed_variables & variables) != parsed_variables
860
+ raise TemplateOperatorAbortedError,
861
+ "Template operator 'join' variable mismatch: " +
862
+ "#{parsed_variables.inspect}, #{variables.inspect}"
324
863
  end
325
864
  end
326
-
327
- # Joins several uris together.
865
+ private :extract_join_operator
866
+
867
+ ##
868
+ # Extracts a URI Template list operator.
869
+ #
870
+ # @param [String] value The unparsed value to extract from.
871
+ # @param [#restore] processor The processor object.
872
+ # @param [String] argument The argument to the operator.
873
+ # @param [Array] variables The variables the operator is working on.
874
+ # @param [Hash] mapping The mapping of variables to values.
875
+ #
876
+ # @return [String] The extracted result.
877
+ def extract_list_operator(value, processor, argument, variables, mapping)
878
+ if variables.size != 1
879
+ raise InvalidTemplateOperatorError,
880
+ "Template operator 'list' takes exactly one variable."
881
+ end
882
+ values = value.split(argument)
883
+ if processor && processor.respond_to?(:restore)
884
+ values.map! { |value| processor.restore(variables.first, value) }
885
+ end
886
+ mapping[variables.first] = values
887
+ end
888
+ private :extract_list_operator
889
+
890
+ ##
891
+ # Joins several URIs together.
892
+ #
893
+ # @param [String, Addressable::URI, #to_str] *uris
894
+ # The URIs to join.
895
+ #
896
+ # @return [Addressable::URI] The joined URI.
897
+ #
898
+ # @example
899
+ # base = "http://example.com/"
900
+ # uri = Addressable::URI.parse("relative/path")
901
+ # Addressable::URI.join(base, uri)
902
+ # #=> #<Addressable::URI:0xcab390 URI:http://example.com/relative/path>
328
903
  def self.join(*uris)
329
904
  uri_objects = uris.collect do |uri|
330
- uri.kind_of?(self) ? uri : self.parse(uri.to_s)
905
+ if !uri.respond_to?(:to_str)
906
+ raise TypeError, "Can't convert #{uri.class} into String."
907
+ end
908
+ uri.kind_of?(self) ? uri : self.parse(uri.to_str)
331
909
  end
332
910
  result = uri_objects.shift.dup
333
911
  for uri in uri_objects
334
- result.merge!(uri)
912
+ result.join!(uri)
335
913
  end
336
914
  return result
337
915
  end
338
-
339
- # Percent encodes a URI segment. Returns a string. Takes an optional
340
- # character class parameter, which should be specified as a string
341
- # containing a regular expression character class (not including the
342
- # surrounding square brackets). The character class parameter defaults
343
- # to the reserved plus unreserved character classes specified in
344
- # RFC 3986. Usage of the constants within the CharacterClasses module is
345
- # highly recommended when using this method.
346
- #
347
- # An example:
348
- #
349
- # Addressable::URI.escape_segment("simple-example", "b-zB-Z0-9")
350
- # => "simple%2Dex%61mple"
351
- def self.encode_segment(segment, character_class=
352
- Addressable::URI::CharacterClasses::RESERVED +
353
- Addressable::URI::CharacterClasses::UNRESERVED)
354
- return nil if segment.nil?
355
- return segment.gsub(
356
- /[^#{character_class}]/
357
- ) do |sequence|
358
- ("%" + sequence.unpack('C')[0].to_s(16).upcase)
359
- end
360
- end
361
-
362
- # Unencodes any percent encoded characters within a URI segment.
363
- # Returns a string.
364
- def self.unencode_segment(segment)
365
- return nil if segment.nil?
366
- return segment.to_s.gsub(/%[0-9a-f]{2}/i) do |sequence|
916
+
917
+ ##
918
+ # Percent encodes a URI component.
919
+ #
920
+ # @param [String, #to_str] component The URI component to encode.
921
+ #
922
+ # @param [String, Regexp] character_class
923
+ # The characters which are not percent encoded. If a <tt>String</tt>
924
+ # is passed, the <tt>String</tt> must be formatted as a regular
925
+ # expression character class. (Do not include the surrounding square
926
+ # brackets.) For example, <tt>"b-zB-Z0-9"</tt> would cause everything
927
+ # but the letters 'b' through 'z' and the numbers '0' through '9' to be
928
+ # percent encoded. If a <tt>Regexp</tt> is passed, the value
929
+ # <tt>/[^b-zB-Z0-9]/</tt> would have the same effect.
930
+ # A set of useful <tt>String</tt> values may be found in the
931
+ # <tt>Addressable::URI::CharacterClasses</tt> module. The default value
932
+ # is the reserved plus unreserved character classes specified in
933
+ # <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.
934
+ #
935
+ # @return [String] The encoded component.
936
+ #
937
+ # @example
938
+ # Addressable::URI.encode_component("simple/example", "b-zB-Z0-9")
939
+ # => "simple%2Fex%61mple"
940
+ # Addressable::URI.encode_component("simple/example", /[^b-zB-Z0-9]/)
941
+ # => "simple%2Fex%61mple"
942
+ # Addressable::URI.encode_component(
943
+ # "simple/example", Addressable::URI::CharacterClasses::UNRESERVED
944
+ # )
945
+ # => "simple%2Fexample"
946
+ def self.encode_component(component, character_class=
947
+ CharacterClasses::RESERVED + CharacterClasses::UNRESERVED)
948
+ return nil if component.nil?
949
+ if !component.respond_to?(:to_str)
950
+ raise TypeError, "Can't convert #{component.class} into String."
951
+ end
952
+ component = component.to_str
953
+ if ![String, Regexp].include?(character_class.class)
954
+ raise TypeError,
955
+ "Expected String or Regexp, got #{character_class.inspect}"
956
+ end
957
+ if character_class.kind_of?(String)
958
+ character_class = /[^#{character_class}]/
959
+ end
960
+ return component.gsub(character_class) do |sequence|
961
+ (sequence.unpack('C*').map { |c| "%#{c.to_s(16).upcase}" }).join("")
962
+ end
963
+ end
964
+
965
+ class << self
966
+ alias_method :encode_component, :encode_component
967
+ end
968
+
969
+ ##
970
+ # Unencodes any percent encoded characters within a URI component.
971
+ # This method may be used for unencoding either components or full URIs,
972
+ # however, it is recommended to use the <tt>unencode_component</tt> alias
973
+ # when unencoding components.
974
+ #
975
+ # @param [String, Addressable::URI, #to_str] uri
976
+ # The URI or component to unencode.
977
+ #
978
+ # @param [Class] returning
979
+ # The type of object to return. This value may only be set to
980
+ # <tt>String</tt> or <tt>Addressable::URI</tt>. All other values
981
+ # are invalid. Defaults to <tt>String</tt>.
982
+ #
983
+ # @return [String, Addressable::URI]
984
+ # The unencoded component or URI. The return type is determined by
985
+ # the <tt>returning</tt> parameter.
986
+ def self.unencode(uri, returning=String)
987
+ return nil if uri.nil?
988
+ if !uri.respond_to?(:to_str)
989
+ raise TypeError, "Can't convert #{uri.class} into String."
990
+ end
991
+ if ![String, ::Addressable::URI].include?(returning)
992
+ raise TypeError,
993
+ "Expected String or Addressable::URI, got #{returning.inspect}"
994
+ end
995
+ result = uri.to_str.gsub(/%[0-9a-f]{2}/i) do |sequence|
367
996
  sequence[1..3].to_i(16).chr
368
997
  end
998
+ result.force_encoding("utf-8") if result.respond_to?(:force_encoding)
999
+ if returning == String
1000
+ return result
1001
+ elsif returning == ::Addressable::URI
1002
+ return ::Addressable::URI.parse(result)
1003
+ end
369
1004
  end
370
-
371
- # Percent encodes any special characters in the URI. This method does
372
- # not take IRIs or IDNs into account.
373
- def self.encode(uri)
374
- uri_object = uri.kind_of?(self) ? uri : self.parse(uri.to_s)
375
- return Addressable::URI.new(
376
- self.encode_segment(uri_object.scheme,
1005
+
1006
+ class << self
1007
+ alias_method :unescape, :unencode
1008
+ alias_method :unencode_component, :unencode
1009
+ alias_method :unescape_component, :unencode
1010
+ end
1011
+
1012
+ ##
1013
+ # Percent encodes any special characters in the URI.
1014
+ #
1015
+ # @param [String, Addressable::URI, #to_str] uri
1016
+ # The URI to encode.
1017
+ #
1018
+ # @param [Class] returning
1019
+ # The type of object to return. This value may only be set to
1020
+ # <tt>String</tt> or <tt>Addressable::URI</tt>. All other values
1021
+ # are invalid. Defaults to <tt>String</tt>.
1022
+ #
1023
+ # @return [String, Addressable::URI]
1024
+ # The encoded URI. The return type is determined by
1025
+ # the <tt>returning</tt> parameter.
1026
+ def self.encode(uri, returning=String)
1027
+ return nil if uri.nil?
1028
+ if !uri.respond_to?(:to_str)
1029
+ raise TypeError, "Can't convert #{uri.class} into String."
1030
+ end
1031
+ if ![String, ::Addressable::URI].include?(returning)
1032
+ raise TypeError,
1033
+ "Expected String or Addressable::URI, got #{returning.inspect}"
1034
+ end
1035
+ uri_object = uri.kind_of?(self) ? uri : self.parse(uri.to_str)
1036
+ encoded_uri = Addressable::URI.new(
1037
+ :scheme => self.encode_component(uri_object.scheme,
377
1038
  Addressable::URI::CharacterClasses::SCHEME),
378
- self.encode_segment(uri_object.user,
379
- Addressable::URI::CharacterClasses::AUTHORITY),
380
- self.encode_segment(uri_object.password,
381
- Addressable::URI::CharacterClasses::AUTHORITY),
382
- self.encode_segment(uri_object.host,
383
- Addressable::URI::CharacterClasses::AUTHORITY),
384
- self.encode_segment(uri_object.specified_port,
1039
+ :authority => self.encode_component(uri_object.authority,
385
1040
  Addressable::URI::CharacterClasses::AUTHORITY),
386
- self.encode_segment(uri_object.path,
1041
+ :path => self.encode_component(uri_object.path,
387
1042
  Addressable::URI::CharacterClasses::PATH),
388
- self.encode_segment(uri_object.query,
1043
+ :query => self.encode_component(uri_object.query,
389
1044
  Addressable::URI::CharacterClasses::QUERY),
390
- self.encode_segment(uri_object.fragment,
1045
+ :fragment => self.encode_component(uri_object.fragment,
391
1046
  Addressable::URI::CharacterClasses::FRAGMENT)
392
- ).to_s
1047
+ )
1048
+ if returning == String
1049
+ return encoded_uri.to_s
1050
+ elsif returning == ::Addressable::URI
1051
+ return encoded_uri
1052
+ end
393
1053
  end
394
-
1054
+
395
1055
  class << self
396
1056
  alias_method :escape, :encode
397
1057
  end
398
-
1058
+
1059
+ ##
399
1060
  # Normalizes the encoding of a URI. Characters within a hostname are
400
1061
  # not percent encoded to allow for internationalized domain names.
401
- def self.normalized_encode(uri)
402
- uri_object = uri.kind_of?(self) ? uri : self.parse(uri.to_s)
403
- segments = {
404
- :scheme => self.unencode_segment(uri_object.scheme),
405
- :user => self.unencode_segment(uri_object.user),
406
- :password => self.unencode_segment(uri_object.password),
407
- :host => self.unencode_segment(uri_object.host),
408
- :port => self.unencode_segment(uri_object.specified_port),
409
- :path => self.unencode_segment(uri_object.path),
410
- :query => self.unencode_segment(uri_object.query),
411
- :fragment => self.unencode_segment(uri_object.fragment)
1062
+ #
1063
+ # @param [String, Addressable::URI, #to_str] uri
1064
+ # The URI to encode.
1065
+ #
1066
+ # @param [Class] returning
1067
+ # The type of object to return. This value may only be set to
1068
+ # <tt>String</tt> or <tt>Addressable::URI</tt>. All other values
1069
+ # are invalid. Defaults to <tt>String</tt>.
1070
+ #
1071
+ # @return [String, Addressable::URI]
1072
+ # The encoded URI. The return type is determined by
1073
+ # the <tt>returning</tt> parameter.
1074
+ def self.normalized_encode(uri, returning=String)
1075
+ if !uri.respond_to?(:to_str)
1076
+ raise TypeError, "Can't convert #{uri.class} into String."
1077
+ end
1078
+ if ![String, ::Addressable::URI].include?(returning)
1079
+ raise TypeError,
1080
+ "Expected String or Addressable::URI, got #{returning.inspect}"
1081
+ end
1082
+ uri_object = uri.kind_of?(self) ? uri : self.parse(uri.to_str)
1083
+ components = {
1084
+ :scheme => self.unencode_component(uri_object.scheme),
1085
+ :user => self.unencode_component(uri_object.user),
1086
+ :password => self.unencode_component(uri_object.password),
1087
+ :host => self.unencode_component(uri_object.host),
1088
+ :port => uri_object.port,
1089
+ :path => self.unencode_component(uri_object.path),
1090
+ :query => self.unencode_component(uri_object.query),
1091
+ :fragment => self.unencode_component(uri_object.fragment)
412
1092
  }
413
- if URI::IDNA.send(:use_libidn?)
414
- segments.each do |key, value|
415
- if value != nil
416
- segments[key] = IDN::Stringprep.nfkc_normalize(value.to_s)
417
- end
1093
+ components.each do |key, value|
1094
+ if value != nil
1095
+ components[key] = Addressable::IDNA.unicode_normalize_kc(value.to_s)
418
1096
  end
419
1097
  end
420
- return Addressable::URI.new(
421
- self.encode_segment(segments[:scheme],
1098
+ encoded_uri = Addressable::URI.new(
1099
+ :scheme => self.encode_component(components[:scheme],
422
1100
  Addressable::URI::CharacterClasses::SCHEME),
423
- self.encode_segment(segments[:user],
1101
+ :user => self.encode_component(components[:user],
424
1102
  Addressable::URI::CharacterClasses::AUTHORITY),
425
- self.encode_segment(segments[:password],
1103
+ :password => self.encode_component(components[:password],
426
1104
  Addressable::URI::CharacterClasses::AUTHORITY),
427
- segments[:host],
428
- segments[:port],
429
- self.encode_segment(segments[:path],
1105
+ :host => components[:host],
1106
+ :port => components[:port],
1107
+ :path => self.encode_component(components[:path],
430
1108
  Addressable::URI::CharacterClasses::PATH),
431
- self.encode_segment(segments[:query],
1109
+ :query => self.encode_component(components[:query],
432
1110
  Addressable::URI::CharacterClasses::QUERY),
433
- self.encode_segment(segments[:fragment],
1111
+ :fragment => self.encode_component(components[:fragment],
434
1112
  Addressable::URI::CharacterClasses::FRAGMENT)
435
- ).to_s
1113
+ )
1114
+ if returning == String
1115
+ return encoded_uri.to_s
1116
+ elsif returning == ::Addressable::URI
1117
+ return encoded_uri
1118
+ end
436
1119
  end
437
1120
 
1121
+ ##
438
1122
  # Extracts uris from an arbitrary body of text.
1123
+ #
1124
+ # @param [String, #to_str] text
1125
+ # The body of text to extract URIs from.
1126
+ #
1127
+ # @option [String, Addressable::URI, #to_str] base
1128
+ # Causes any relative URIs to be resolved against the base URI.
1129
+ #
1130
+ # @option [TrueClass, FalseClass] parse
1131
+ # If parse is true, all extracted URIs will be parsed. If parse is
1132
+ # false, the return value with be an <tt>Array</tt> of <tt>Strings</aa>.
1133
+ # Defaults to false.
1134
+ #
1135
+ # @return [Array] The extracted URIs.
439
1136
  def self.extract(text, options={})
440
- defaults = {:base => nil, :parse => false}
1137
+ defaults = {:base => nil, :parse => false}
441
1138
  options = defaults.merge(options)
442
1139
  raise InvalidOptionError unless (options.keys - defaults.keys).empty?
443
1140
  # This regular expression needs to be less forgiving or else it would
@@ -470,16 +1167,10 @@ module Addressable
470
1167
  nil
471
1168
  end
472
1169
  end
473
- parsed_uris.reject! do |uri|
474
- (uri.scheme =~ /T\d+/ ||
475
- uri.scheme == "xmlns" ||
476
- uri.scheme == "xml" ||
477
- uri.scheme == "thr" ||
478
- uri.scheme == "this" ||
479
- uri.scheme == "float" ||
480
- uri.scheme == "user" ||
481
- uri.scheme == "username" ||
482
- uri.scheme == "out")
1170
+ parsed_uris = parsed_uris.select do |uri|
1171
+ (self.ip_based_schemes | [
1172
+ "file", "git", "svn", "mailto", "tel"
1173
+ ]).include?(uri.normalized_scheme)
483
1174
  end
484
1175
  if options[:parse]
485
1176
  return parsed_uris
@@ -487,51 +1178,126 @@ module Addressable
487
1178
  return parsed_uris.collect { |uri| uri.to_s }
488
1179
  end
489
1180
  end
490
-
491
- # Creates a new uri object from component parts. Passing nil for
492
- # any of these parameters is acceptable.
493
- def initialize(scheme, user, password, host, port, path, query, fragment)
494
- @scheme = scheme
495
- @scheme = nil if @scheme.to_s.strip == ""
496
- @user = user
497
- @password = password
498
- @host = host
499
- @specified_port = port.to_s
500
- @port = port.kind_of?(Fixnum) ? port.to_s : port
501
- if @port != nil && !(@port =~ /^\d+$/)
502
- raise InvalidURIError,
503
- "Invalid port number: #{@port.inspect}"
1181
+
1182
+ ##
1183
+ # Creates a new uri object from component parts.
1184
+ #
1185
+ # @option [String, #to_str] scheme The scheme component.
1186
+ # @option [String, #to_str] user The user component.
1187
+ # @option [String, #to_str] password The password component.
1188
+ # @option [String, #to_str] userinfo
1189
+ # The userinfo component. If this is supplied, the user and password
1190
+ # components must be omitted.
1191
+ # @option [String, #to_str] host The host component.
1192
+ # @option [String, #to_str] port The port component.
1193
+ # @option [String, #to_str] authority
1194
+ # The authority component. If this is supplied, the user, password,
1195
+ # userinfo, host, and port components must be omitted.
1196
+ # @option [String, #to_str] path The path component.
1197
+ # @option [String, #to_str] query The query component.
1198
+ # @option [String, #to_str] fragment The fragment component.
1199
+ #
1200
+ # @return [Addressable::URI] The constructed URI object.
1201
+ def initialize(options={})
1202
+ if options.has_key?(:authority)
1203
+ if (options.keys & [:userinfo, :user, :password, :host, :port]).any?
1204
+ raise ArgumentError,
1205
+ "Cannot specify both an authority and any of the components " +
1206
+ "within the authority."
1207
+ end
504
1208
  end
505
- @port = @port.to_i
506
- @port = nil if @port == 0
507
- @path = (path || "")
508
- if @path != "" && @path[0..0] != "/" && @host != nil
509
- @path = "/#{@path}"
1209
+ if options.has_key?(:userinfo)
1210
+ if (options.keys & [:user, :password]).any?
1211
+ raise ArgumentError,
1212
+ "Cannot specify both a userinfo and either the user or password."
1213
+ end
510
1214
  end
511
- @query = query
512
- @fragment = fragment
513
1215
 
514
- validate()
1216
+ self.validation_deferred = true
1217
+ self.scheme = options[:scheme] if options[:scheme]
1218
+ self.user = options[:user] if options[:user]
1219
+ self.password = options[:password] if options[:password]
1220
+ self.userinfo = options[:userinfo] if options[:userinfo]
1221
+ self.host = options[:host] if options[:host]
1222
+ self.port = options[:port] if options[:port]
1223
+ self.authority = options[:authority] if options[:authority]
1224
+ self.path = options[:path] if options[:path]
1225
+ self.query = options[:query] if options[:query]
1226
+ self.fragment = options[:fragment] if options[:fragment]
1227
+ self.validation_deferred = false
515
1228
  end
516
-
517
- # Returns the scheme (protocol) for this URI.
1229
+
1230
+ ##
1231
+ # The scheme component for this URI.
1232
+ #
1233
+ # @return [String] The scheme component.
518
1234
  def scheme
519
1235
  return @scheme
520
1236
  end
521
-
522
- # Sets the scheme (protocol for this URI.)
1237
+
1238
+ ##
1239
+ # The scheme component for this URI, normalized.
1240
+ #
1241
+ # @return [String] The scheme component, normalized.
1242
+ def normalized_scheme
1243
+ @normalized_scheme ||= (begin
1244
+ if self.scheme != nil
1245
+ if self.scheme =~ /^\s*ssh\+svn\s*$/i
1246
+ "svn+ssh"
1247
+ else
1248
+ self.scheme.strip.downcase
1249
+ end
1250
+ else
1251
+ nil
1252
+ end
1253
+ end)
1254
+ end
1255
+
1256
+ ##
1257
+ # Sets the scheme component for this URI.
1258
+ #
1259
+ # @param [String, #to_str] new_scheme The new scheme component.
523
1260
  def scheme=(new_scheme)
524
- @scheme = new_scheme
1261
+ @scheme = new_scheme ? new_scheme.to_str : nil
1262
+ @scheme = nil if @scheme.to_s.strip == ""
1263
+
1264
+ # Reset dependant values
1265
+ @normalized_scheme = nil
525
1266
  end
526
-
527
- # Returns the user for this URI.
1267
+
1268
+ ##
1269
+ # The user component for this URI.
1270
+ #
1271
+ # @return [String] The user component.
528
1272
  def user
529
1273
  return @user
530
1274
  end
531
-
532
- # Sets the user for this URI.
1275
+
1276
+ ##
1277
+ # The user component for this URI, normalized.
1278
+ #
1279
+ # @return [String] The user component, normalized.
1280
+ def normalized_user
1281
+ @normalized_user ||= (begin
1282
+ if self.user
1283
+ if normalized_scheme =~ /https?/ && self.user.strip == "" &&
1284
+ (!self.password || self.password.strip == "")
1285
+ nil
1286
+ else
1287
+ self.user.strip
1288
+ end
1289
+ else
1290
+ nil
1291
+ end
1292
+ end)
1293
+ end
1294
+
1295
+ ##
1296
+ # Sets the user component for this URI.
1297
+ #
1298
+ # @param [String, #to_str] new_user The new user component.
533
1299
  def user=(new_user)
534
- @user = new_user
1300
+ @user = new_user ? new_user.to_str : nil
535
1301
 
536
1302
  # You can't have a nil user with a non-nil password
537
1303
  if @password != nil
@@ -540,20 +1306,47 @@ module Addressable
540
1306
 
541
1307
  # Reset dependant values
542
1308
  @userinfo = nil
1309
+ @normalized_userinfo = nil
543
1310
  @authority = nil
1311
+ @normalized_user = nil
544
1312
 
545
1313
  # Ensure we haven't created an invalid URI
546
1314
  validate()
547
1315
  end
548
-
549
- # Returns the password for this URI.
1316
+
1317
+ ##
1318
+ # The password component for this URI.
1319
+ #
1320
+ # @return [String] The password component.
550
1321
  def password
551
1322
  return @password
552
1323
  end
553
1324
 
554
- # Sets the password for this URI.
1325
+ ##
1326
+ # The password component for this URI, normalized.
1327
+ #
1328
+ # @return [String] The password component, normalized.
1329
+ def normalized_password
1330
+ @normalized_password ||= (begin
1331
+ if self.password
1332
+ if normalized_scheme =~ /https?/ && self.password.strip == "" &&
1333
+ (!self.user || self.user.strip == "")
1334
+ nil
1335
+ else
1336
+ self.password.strip
1337
+ end
1338
+ else
1339
+ nil
1340
+ end
1341
+ end)
1342
+ end
1343
+
1344
+ ##
1345
+ # Sets the password component for this URI.
1346
+ #
1347
+ # @param [String, #to_str] new_password The new password component.
555
1348
  def password=(new_password)
556
- @password = new_password
1349
+ @password = new_password ? new_password.to_str : nil
557
1350
 
558
1351
  # You can't have a nil user with a non-nil password
559
1352
  if @password != nil
@@ -562,33 +1355,65 @@ module Addressable
562
1355
 
563
1356
  # Reset dependant values
564
1357
  @userinfo = nil
1358
+ @normalized_userinfo = nil
565
1359
  @authority = nil
1360
+ @normalized_password = nil
566
1361
 
567
1362
  # Ensure we haven't created an invalid URI
568
1363
  validate()
569
1364
  end
570
-
571
- # Returns the username and password segment of this URI.
1365
+
1366
+ ##
1367
+ # The userinfo component for this URI.
1368
+ # Combines the user and password components.
1369
+ #
1370
+ # @return [String] The userinfo component.
572
1371
  def userinfo
573
- if !defined?(@userinfo) || @userinfo == nil
1372
+ @userinfo ||= (begin
574
1373
  current_user = self.user
575
1374
  current_password = self.password
576
1375
  if !current_user && !current_password
577
- @userinfo = nil
1376
+ nil
1377
+ elsif current_user && current_password
1378
+ "#{current_user}:#{current_password}"
1379
+ elsif current_user && !current_password
1380
+ "#{current_user}"
1381
+ end
1382
+ end)
1383
+ end
1384
+
1385
+ ##
1386
+ # The userinfo component for this URI, normalized.
1387
+ #
1388
+ # @return [String] The userinfo component, normalized.
1389
+ def normalized_userinfo
1390
+ @normalized_userinfo ||= (begin
1391
+ current_user = self.normalized_user
1392
+ current_password = self.normalized_password
1393
+ if !current_user && !current_password
1394
+ nil
578
1395
  elsif current_user && current_password
579
- @userinfo = "#{current_user}:#{current_password}"
1396
+ "#{current_user}:#{current_password}"
580
1397
  elsif current_user && !current_password
581
- @userinfo = "#{current_user}"
1398
+ "#{current_user}"
582
1399
  end
583
- end
584
- return @userinfo
1400
+ end)
585
1401
  end
586
-
587
- # Sets the username and password segment of this URI.
1402
+
1403
+ ##
1404
+ # Sets the userinfo component for this URI.
1405
+ #
1406
+ # @param [String, #to_str] new_userinfo The new userinfo component.
588
1407
  def userinfo=(new_userinfo)
589
- new_user = new_userinfo.to_s.strip.scan(/^(.*):/).flatten[0]
590
- new_password = new_userinfo.to_s.strip.scan(/:(.*)$/).flatten[0]
591
-
1408
+ new_user, new_password = if new_userinfo
1409
+ [
1410
+ new_userinfo.to_str.strip[/^(.*):/, 1],
1411
+ new_userinfo.to_str.strip[/:(.*)$/, 1]
1412
+ ]
1413
+ else
1414
+ [nil, nil]
1415
+ end
1416
+
592
1417
  # Password assigned first to ensure validity in case of nil
593
1418
  self.password = new_password
594
1419
  self.user = new_user
@@ -599,62 +1424,129 @@ module Addressable
599
1424
  # Ensure we haven't created an invalid URI
600
1425
  validate()
601
1426
  end
602
-
603
- # Returns the host for this URI.
1427
+
1428
+ ##
1429
+ # The host component for this URI.
1430
+ #
1431
+ # @return [String] The host component.
604
1432
  def host
605
1433
  return @host
606
1434
  end
607
-
608
- # Sets the host for this URI.
1435
+
1436
+ ##
1437
+ # The host component for this URI, normalized.
1438
+ #
1439
+ # @return [String] The host component, normalized.
1440
+ def normalized_host
1441
+ @normalized_host ||= (begin
1442
+ if self.host != nil
1443
+ if self.host.strip != ""
1444
+ result = ::Addressable::IDNA.to_ascii(
1445
+ self.class.unencode_component(self.host.strip.downcase)
1446
+ )
1447
+ if result[-1..-1] == "."
1448
+ # Trailing dots are unnecessary
1449
+ result = result[0...-1]
1450
+ end
1451
+ result
1452
+ else
1453
+ ""
1454
+ end
1455
+ else
1456
+ nil
1457
+ end
1458
+ end)
1459
+ end
1460
+
1461
+ ##
1462
+ # Sets the host component for this URI.
1463
+ #
1464
+ # @param [String, #to_str] new_host The new host component.
609
1465
  def host=(new_host)
610
- @host = new_host
1466
+ @host = new_host ? new_host.to_str : nil
611
1467
 
612
1468
  # Reset dependant values
613
1469
  @authority = nil
1470
+ @normalized_host = nil
614
1471
 
615
1472
  # Ensure we haven't created an invalid URI
616
1473
  validate()
617
1474
  end
618
-
619
- # Returns the authority segment of this URI.
1475
+
1476
+ ##
1477
+ # The authority component for this URI.
1478
+ # Combines the user, password, host, and port components.
1479
+ #
1480
+ # @return [String] The authority component.
620
1481
  def authority
621
- if !defined?(@authority) || @authority.nil?
622
- return nil if self.host.nil?
623
- @authority = ""
624
- if self.userinfo != nil
625
- @authority << "#{self.userinfo}@"
1482
+ @authority ||= (begin
1483
+ if self.host.nil?
1484
+ nil
1485
+ else
1486
+ authority = ""
1487
+ if self.userinfo != nil
1488
+ authority << "#{self.userinfo}@"
1489
+ end
1490
+ authority << self.host
1491
+ if self.port != nil
1492
+ authority << ":#{self.port}"
1493
+ end
1494
+ authority
626
1495
  end
627
- @authority << self.host
628
- if self.specified_port != nil
629
- @authority << ":#{self.specified_port}"
1496
+ end)
1497
+ end
1498
+
1499
+ ##
1500
+ # The authority component for this URI, normalized.
1501
+ #
1502
+ # @return [String] The authority component, normalized.
1503
+ def normalized_authority
1504
+ @normalized_authority ||= (begin
1505
+ if self.normalized_host.nil?
1506
+ nil
1507
+ else
1508
+ authority = ""
1509
+ if self.normalized_userinfo != nil
1510
+ authority << "#{self.normalized_userinfo}@"
1511
+ end
1512
+ authority << self.normalized_host
1513
+ if self.normalized_port != nil
1514
+ authority << ":#{self.normalized_port}"
1515
+ end
1516
+ authority
630
1517
  end
631
- end
632
- return @authority
1518
+ end)
633
1519
  end
634
-
635
- # Sets the authority segment of this URI.
1520
+
1521
+ ##
1522
+ # Sets the authority component for this URI.
1523
+ #
1524
+ # @param [String, #to_str] new_authority The new authority component.
636
1525
  def authority=(new_authority)
637
1526
  if new_authority
638
- new_userinfo = new_authority.scan(/^([^\[\]]*)@/).flatten[0]
1527
+ new_authority = new_authority.to_str
1528
+ new_userinfo = new_authority[/^([^\[\]]*)@/, 1]
639
1529
  if new_userinfo
640
- new_user = new_userinfo.strip.scan(/^([^:]*):?/).flatten[0]
641
- new_password = new_userinfo.strip.scan(/:(.*)$/).flatten[0]
1530
+ new_user = new_userinfo.strip[/^([^:]*):?/, 1]
1531
+ new_password = new_userinfo.strip[/:(.*)$/, 1]
642
1532
  end
643
1533
  new_host =
644
1534
  new_authority.gsub(/^([^\[\]]*)@/, "").gsub(/:([^:@\[\]]*?)$/, "")
645
1535
  new_port =
646
- new_authority.scan(/:([^:@\[\]]*?)$/).flatten[0]
1536
+ new_authority[/:([^:@\[\]]*?)$/, 1]
647
1537
  end
648
-
1538
+
649
1539
  # Password assigned first to ensure validity in case of nil
650
1540
  self.password = new_password
651
1541
  self.user = new_user
652
1542
  self.host = new_host
653
-
654
- # Port reset to allow port normalization
655
- @port = nil
656
- @specified_port = new_port
657
-
1543
+ self.port = new_port
1544
+
1545
+ # Reset dependant values
1546
+ @inferred_port = nil
1547
+ @userinfo = nil
1548
+ @normalized_userinfo = nil
1549
+
658
1550
  # Ensure we haven't created an invalid URI
659
1551
  validate()
660
1552
  end
@@ -663,112 +1555,311 @@ module Addressable
663
1555
  # use a similar URI form:
664
1556
  # //<user>:<password>@<host>:<port>/<url-path>
665
1557
  def self.ip_based_schemes
666
- return self.scheme_mapping.keys
1558
+ return self.port_mapping.keys
667
1559
  end
668
1560
 
669
1561
  # Returns a hash of common IP-based schemes and their default port
670
1562
  # numbers. Adding new schemes to this hash, as necessary, will allow
671
1563
  # for better URI normalization.
672
- def self.scheme_mapping
673
- if !defined?(@protocol_mapping) || @protocol_mapping.nil?
674
- @protocol_mapping = {
675
- "http" => 80,
676
- "https" => 443,
677
- "ftp" => 21,
678
- "tftp" => 69,
679
- "ssh" => 22,
680
- "svn+ssh" => 22,
681
- "telnet" => 23,
682
- "nntp" => 119,
683
- "gopher" => 70,
684
- "wais" => 210,
685
- "ldap" => 389,
686
- "prospero" => 1525
687
- }
688
- end
689
- return @protocol_mapping
690
- end
691
-
692
- # Returns the port number for this URI. This method will normalize to the
693
- # default port for the URI's scheme if the port isn't explicitly specified
694
- # in the URI.
1564
+ def self.port_mapping
1565
+ @port_mapping ||= {
1566
+ "http" => 80,
1567
+ "https" => 443,
1568
+ "ftp" => 21,
1569
+ "tftp" => 69,
1570
+ "sftp" => 22,
1571
+ "ssh" => 22,
1572
+ "svn+ssh" => 22,
1573
+ "telnet" => 23,
1574
+ "nntp" => 119,
1575
+ "gopher" => 70,
1576
+ "wais" => 210,
1577
+ "ldap" => 389,
1578
+ "prospero" => 1525
1579
+ }
1580
+ end
1581
+
1582
+ ##
1583
+ # The port component for this URI.
1584
+ # This is the port number actually given in the URI. This does not
1585
+ # infer port numbers from default values.
1586
+ #
1587
+ # @return [Integer] The port component.
695
1588
  def port
696
- if @port.to_i == 0
697
- if self.scheme
698
- @port = self.class.scheme_mapping[self.scheme.strip.downcase]
1589
+ return @port
1590
+ end
1591
+
1592
+ ##
1593
+ # The port component for this URI, normalized.
1594
+ #
1595
+ # @return [Integer] The port component, normalized.
1596
+ def normalized_port
1597
+ @normalized_port ||= (begin
1598
+ if self.class.port_mapping[normalized_scheme] == self.port
1599
+ nil
699
1600
  else
700
- @port = nil
1601
+ self.port
701
1602
  end
702
- return @port
703
- else
704
- @port = @port.to_i
705
- return @port
706
- end
1603
+ end)
707
1604
  end
708
-
709
- # Sets the port for this URI.
1605
+
1606
+ ##
1607
+ # Sets the port component for this URI.
1608
+ #
1609
+ # @param [String, Integer, #to_s] new_port The new port component.
710
1610
  def port=(new_port)
1611
+ if new_port != nil && !(new_port.to_s =~ /^\d+$/)
1612
+ raise InvalidURIError,
1613
+ "Invalid port number: #{new_port.inspect}"
1614
+ end
1615
+
711
1616
  @port = new_port.to_s.to_i
712
- @specified_port = @port
1617
+ @port = nil if @port == 0
1618
+
1619
+ # Reset dependant values
713
1620
  @authority = nil
1621
+ @inferred_port = nil
1622
+ @normalized_port = nil
1623
+
1624
+ # Ensure we haven't created an invalid URI
1625
+ validate()
714
1626
  end
715
-
716
- # Returns the port number that was actually specified in the URI string.
717
- def specified_port
718
- port = @specified_port.to_s.to_i
719
- if port == 0
720
- return nil
721
- else
722
- return port
723
- end
1627
+
1628
+ ##
1629
+ # The inferred port component for this URI.
1630
+ # This method will normalize to the default port for the URI's scheme if
1631
+ # the port isn't explicitly specified in the URI.
1632
+ #
1633
+ # @return [Integer] The inferred port component.
1634
+ def inferred_port
1635
+ @inferred_port ||= (begin
1636
+ if port.to_i == 0
1637
+ if scheme
1638
+ self.class.port_mapping[scheme.strip.downcase]
1639
+ else
1640
+ nil
1641
+ end
1642
+ else
1643
+ port.to_i
1644
+ end
1645
+ end)
724
1646
  end
725
-
726
- # Returns the path for this URI.
1647
+
1648
+ ##
1649
+ # The path component for this URI.
1650
+ #
1651
+ # @return [String] The path component.
727
1652
  def path
728
- return @path
1653
+ return (@path || "")
1654
+ end
1655
+
1656
+ ##
1657
+ # The path component for this URI, normalized.
1658
+ #
1659
+ # @return [String] The path component, normalized.
1660
+ def normalized_path
1661
+ @normalized_path ||= (begin
1662
+ result = self.class.normalize_path(self.path.strip)
1663
+ if result == "" &&
1664
+ ["http", "https", "ftp", "tftp"].include?(self.normalized_scheme)
1665
+ result = "/"
1666
+ end
1667
+ result
1668
+ end)
729
1669
  end
730
-
731
- # Sets the path for this URI.
1670
+
1671
+ ##
1672
+ # Sets the path component for this URI.
1673
+ #
1674
+ # @param [String, #to_str] new_path The new path component.
732
1675
  def path=(new_path)
733
- @path = (new_path || "")
1676
+ @path = (new_path || "").to_str
1677
+ if @path != "" && @path[0..0] != "/" && host != nil
1678
+ @path = "/#{@path}"
1679
+ end
1680
+
1681
+ # Reset dependant values
1682
+ @normalized_path = nil
734
1683
  end
735
1684
 
736
- # Returns the basename, if any, of the file at the path being referenced.
737
- # Returns nil if there is no path component.
1685
+ ##
1686
+ # The basename, if any, of the file in the path component.
1687
+ #
1688
+ # @return [String] The path's basename.
738
1689
  def basename
739
1690
  # Path cannot be nil
740
1691
  return File.basename(self.path).gsub(/;[^\/]*$/, "")
741
1692
  end
742
-
743
- # Returns the extension, if any, of the file at the path being referenced.
744
- # Returns "" if there is no extension or nil if there is no path
745
- # component.
1693
+
1694
+ ##
1695
+ # The extname, if any, of the file in the path component.
1696
+ # Empty string if there is no extension.
1697
+ #
1698
+ # @return [String] The path's extname.
746
1699
  def extname
747
1700
  return nil unless self.path
748
1701
  return File.extname(self.basename)
749
1702
  end
750
-
751
- # Returns the query string for this URI.
1703
+
1704
+ ##
1705
+ # The query component for this URI.
1706
+ #
1707
+ # @return [String] The query component.
752
1708
  def query
753
1709
  return @query
754
1710
  end
755
-
756
- # Sets the query string for this URI.
1711
+
1712
+ ##
1713
+ # The query component for this URI, normalized.
1714
+ #
1715
+ # @return [String] The query component, normalized.
1716
+ def normalized_query
1717
+ @normalized_query ||= (self.query ? self.query.strip : nil)
1718
+ end
1719
+
1720
+ ##
1721
+ # Sets the query component for this URI.
1722
+ #
1723
+ # @param [String, #to_str] new_query The new query component.
757
1724
  def query=(new_query)
758
- @query = new_query
1725
+ @query = new_query.to_str
1726
+
1727
+ # Reset dependant values
1728
+ @normalized_query = nil
1729
+ end
1730
+
1731
+ ##
1732
+ # Converts the query component to a Hash value.
1733
+ #
1734
+ # @option [Symbol] notation
1735
+ # May be one of <tt>:flat</tt>, <tt>:dot</tt>, or <tt>:subscript</tt>.
1736
+ # The <tt>:dot</tt> notation is not supported for assignment.
1737
+ # Default value is <tt>:subscript</tt>.
1738
+ #
1739
+ # @return [Hash] The query string parsed as a Hash object.
1740
+ #
1741
+ # @example
1742
+ # Addressable::URI.parse("?one=1&two=2&three=3").query_values
1743
+ # #=> {"one" => "1", "two" => "2", "three" => "3"}
1744
+ # Addressable::URI.parse("?one[two][three]=four").query_values
1745
+ # #=> {"one" => {"two" => {"three" => "four"}}}
1746
+ # Addressable::URI.parse("?one.two.three=four").query_values(
1747
+ # :notation => :dot
1748
+ # )
1749
+ # #=> {"one" => {"two" => {"three" => "four"}}}
1750
+ # Addressable::URI.parse("?one[two][three]=four").query_values(
1751
+ # :notation => :flat
1752
+ # )
1753
+ # #=> {"one[two][three]" => "four"}
1754
+ # Addressable::URI.parse("?one.two.three=four").query_values(
1755
+ # :notation => :flat
1756
+ # )
1757
+ # #=> {"one.two.three" => "four"}
1758
+ # Addressable::URI.parse(
1759
+ # "?one[two][three][]=four&one[two][three][]=five"
1760
+ # ).query_values
1761
+ # #=> {"one" => {"two" => {"three" => ["four", "five"]}}}
1762
+ def query_values(options={})
1763
+ defaults = {:notation => :subscript}
1764
+ options = defaults.merge(options)
1765
+ if ![:flat, :dot, :subscript].include?(options[:notation])
1766
+ raise ArgumentError,
1767
+ "Invalid notation. Must be one of: [:flat, :dot, :subscript]."
1768
+ end
1769
+ return nil if self.query == nil
1770
+ return (self.query.split("&").map do |pair|
1771
+ pair.split("=")
1772
+ end).inject({}) do |accumulator, pair|
1773
+ key, value = pair
1774
+ value = true if value.nil?
1775
+ key = self.class.unencode_component(key)
1776
+ if value != true
1777
+ value = self.class.unencode_component(value).gsub(/\+/, " ")
1778
+ end
1779
+ if options[:notation] == :flat
1780
+ if accumulator[key]
1781
+ raise ArgumentError, "Key was repeated: #{key.inspect}"
1782
+ end
1783
+ accumulator[key] = value
1784
+ else
1785
+ if options[:notation] == :dot
1786
+ array_value = false
1787
+ subkeys = key.split(".")
1788
+ elsif options[:notation] == :subscript
1789
+ array_value = !!(key =~ /\[\]$/)
1790
+ subkeys = key.split(/[\[\]]+/)
1791
+ end
1792
+ current_hash = accumulator
1793
+ for i in 0...(subkeys.size - 1)
1794
+ subkey = subkeys[i]
1795
+ current_hash[subkey] = {} unless current_hash[subkey]
1796
+ current_hash = current_hash[subkey]
1797
+ end
1798
+ if array_value
1799
+ current_hash[subkeys.last] = [] unless current_hash[subkeys.last]
1800
+ current_hash[subkeys.last] << value
1801
+ else
1802
+ current_hash[subkeys.last] = value
1803
+ end
1804
+ end
1805
+ accumulator
1806
+ end
1807
+ end
1808
+
1809
+ ##
1810
+ # Sets the query component for this URI from a Hash object.
1811
+ #
1812
+ # @param [Hash, #to_hash] new_query_values The new query values.
1813
+ def query_values=(new_query_values)
1814
+ @query = (new_query_values.to_hash.inject([]) do |accumulator, pair|
1815
+ key, value = pair
1816
+ key = self.class.encode_component(key, CharacterClasses::UNRESERVED)
1817
+ if value == true
1818
+ accumulator << "#{key}"
1819
+ else
1820
+ value = self.class.encode_component(
1821
+ value, CharacterClasses::UNRESERVED)
1822
+ accumulator << "#{key}=#{value}"
1823
+ end
1824
+ end).join("&")
1825
+
1826
+ # Reset dependant values
1827
+ @normalized_query = nil
759
1828
  end
760
-
761
- # Returns the fragment for this URI.
1829
+
1830
+ ##
1831
+ # The fragment component for this URI.
1832
+ #
1833
+ # @return [String] The fragment component.
762
1834
  def fragment
763
1835
  return @fragment
764
1836
  end
765
-
766
- # Sets the fragment for this URI.
1837
+
1838
+ ##
1839
+ # The fragment component for this URI, normalized.
1840
+ #
1841
+ # @return [String] The fragment component, normalized.
1842
+ def normalized_fragment
1843
+ @normalized_fragment ||= (self.fragment ? self.fragment.strip : nil)
1844
+ end
1845
+
1846
+ ##
1847
+ # Sets the fragment component for this URI.
1848
+ #
1849
+ # @param [String, #to_str] new_fragment The new fragment component.
767
1850
  def fragment=(new_fragment)
768
- @fragment = new_fragment
1851
+ @fragment = new_fragment ? new_fragment.to_str : nil
1852
+
1853
+ # Reset dependant values
1854
+ @normalized_fragment = nil
769
1855
  end
770
-
771
- # Returns true if the URI uses an IP-based protocol.
1856
+
1857
+ ##
1858
+ # Determines if the scheme indicates an IP-based protocol.
1859
+ #
1860
+ # @return [TrueClass, FalseClass]
1861
+ # <tt>true</tt> if the scheme indicates an IP-based protocol.
1862
+ # <tt>false</tt> otherwise.
772
1863
  def ip_based?
773
1864
  if self.scheme
774
1865
  return self.class.ip_based_schemes.include?(
@@ -776,26 +1867,45 @@ module Addressable
776
1867
  end
777
1868
  return false
778
1869
  end
779
-
780
- # Returns true if this URI is known to be relative.
1870
+
1871
+ ##
1872
+ # Determines if the URI is relative.
1873
+ #
1874
+ # @return [TrueClass, FalseClass]
1875
+ # <tt>true</tt> if the URI is relative.
1876
+ # <tt>false</tt> otherwise.
781
1877
  def relative?
782
1878
  return self.scheme.nil?
783
1879
  end
784
-
785
- # Returns true if this URI is known to be absolute.
1880
+
1881
+ ##
1882
+ # Determines if the URI is absolute.
1883
+ #
1884
+ # @return [TrueClass, FalseClass]
1885
+ # <tt>true</tt> if the URI is absolute.
1886
+ # <tt>false</tt> otherwise.
786
1887
  def absolute?
787
1888
  return !relative?
788
1889
  end
789
-
1890
+
1891
+ ##
790
1892
  # Joins two URIs together.
791
- def +(uri)
1893
+ #
1894
+ # @param [String, Addressable::URI, #to_str] The URI to join with.
1895
+ #
1896
+ # @return [Addressable::URI] The joined URI.
1897
+ def join(uri)
1898
+ if !uri.respond_to?(:to_str)
1899
+ raise TypeError, "Can't convert #{uri.class} into String."
1900
+ end
792
1901
  if !uri.kind_of?(self.class)
793
- uri = URI.parse(uri.to_s)
1902
+ # Otherwise, convert to a String, then parse.
1903
+ uri = self.class.parse(uri.to_str)
794
1904
  end
795
1905
  if uri.to_s == ""
796
1906
  return self.dup
797
1907
  end
798
-
1908
+
799
1909
  joined_scheme = nil
800
1910
  joined_user = nil
801
1911
  joined_password = nil
@@ -804,14 +1914,14 @@ module Addressable
804
1914
  joined_path = nil
805
1915
  joined_query = nil
806
1916
  joined_fragment = nil
807
-
1917
+
808
1918
  # Section 5.2.2 of RFC 3986
809
1919
  if uri.scheme != nil
810
1920
  joined_scheme = uri.scheme
811
1921
  joined_user = uri.user
812
1922
  joined_password = uri.password
813
1923
  joined_host = uri.host
814
- joined_port = uri.specified_port
1924
+ joined_port = uri.port
815
1925
  joined_path = self.class.normalize_path(uri.path)
816
1926
  joined_query = uri.query
817
1927
  else
@@ -819,7 +1929,7 @@ module Addressable
819
1929
  joined_user = uri.user
820
1930
  joined_password = uri.password
821
1931
  joined_host = uri.host
822
- joined_port = uri.specified_port
1932
+ joined_port = uri.port
823
1933
  joined_path = self.class.normalize_path(uri.path)
824
1934
  joined_query = uri.query
825
1935
  else
@@ -846,13 +1956,13 @@ module Addressable
846
1956
  else
847
1957
  base_path = ""
848
1958
  end
849
-
1959
+
850
1960
  # If the base path is empty and an authority segment has been
851
1961
  # defined, use a base path of "/"
852
1962
  if base_path == "" && self.authority != nil
853
1963
  base_path = "/"
854
1964
  end
855
-
1965
+
856
1966
  joined_path = self.class.normalize_path(base_path + uri.path)
857
1967
  end
858
1968
  joined_query = uri.query
@@ -860,39 +1970,124 @@ module Addressable
860
1970
  joined_user = self.user
861
1971
  joined_password = self.password
862
1972
  joined_host = self.host
863
- joined_port = self.specified_port
1973
+ joined_port = self.port
864
1974
  end
865
1975
  joined_scheme = self.scheme
866
1976
  end
867
1977
  joined_fragment = uri.fragment
868
-
1978
+
869
1979
  return Addressable::URI.new(
870
- joined_scheme,
871
- joined_user,
872
- joined_password,
873
- joined_host,
874
- joined_port,
875
- joined_path,
876
- joined_query,
877
- joined_fragment
1980
+ :scheme => joined_scheme,
1981
+ :user => joined_user,
1982
+ :password => joined_password,
1983
+ :host => joined_host,
1984
+ :port => joined_port,
1985
+ :path => joined_path,
1986
+ :query => joined_query,
1987
+ :fragment => joined_fragment
878
1988
  )
879
1989
  end
880
-
881
- # Merges two URIs together.
882
- def merge(uri)
883
- return self + uri
1990
+ alias_method :+, :join
1991
+
1992
+ ##
1993
+ # Destructive form of <tt>join</tt>.
1994
+ #
1995
+ # @param [String, Addressable::URI, #to_str] The URI to join with.
1996
+ #
1997
+ # @return [Addressable::URI] The joined URI.
1998
+ #
1999
+ # @see Addressable::URI#join
2000
+ def join!(uri)
2001
+ replace_self(self.join(uri))
2002
+ end
2003
+
2004
+ ##
2005
+ # Merges a URI with a <tt>Hash</tt> of components.
2006
+ # This method has different behavior from <tt>join</tt>. Any components
2007
+ # present in the <tt>hash</tt> parameter will override the original
2008
+ # components. The path component is not treated specially.
2009
+ #
2010
+ # @param [Hash, Addressable::URI, #to_hash] The components to merge with.
2011
+ #
2012
+ # @return [Addressable::URI] The merged URI.
2013
+ #
2014
+ # @see Hash#merge
2015
+ def merge(hash)
2016
+ if !hash.respond_to?(:to_hash)
2017
+ raise TypeError, "Can't convert #{hash.class} into Hash."
2018
+ end
2019
+ hash = hash.to_hash
2020
+
2021
+ if hash.has_key?(:authority)
2022
+ if (hash.keys & [:userinfo, :user, :password, :host, :port]).any?
2023
+ raise ArgumentError,
2024
+ "Cannot specify both an authority and any of the components " +
2025
+ "within the authority."
2026
+ end
2027
+ end
2028
+ if hash.has_key?(:userinfo)
2029
+ if (hash.keys & [:user, :password]).any?
2030
+ raise ArgumentError,
2031
+ "Cannot specify both a userinfo and either the user or password."
2032
+ end
2033
+ end
2034
+
2035
+ uri = Addressable::URI.new
2036
+ uri.validation_deferred = true
2037
+ uri.scheme =
2038
+ hash.has_key?(:scheme) ? hash[:scheme] : self.scheme
2039
+ if hash.has_key?(:authority)
2040
+ uri.authority =
2041
+ hash.has_key?(:authority) ? hash[:authority] : self.authority
2042
+ end
2043
+ if hash.has_key?(:userinfo)
2044
+ uri.userinfo =
2045
+ hash.has_key?(:userinfo) ? hash[:userinfo] : self.userinfo
2046
+ end
2047
+ if !hash.has_key?(:userinfo) && !hash.has_key?(:authority)
2048
+ uri.user =
2049
+ hash.has_key?(:user) ? hash[:user] : self.user
2050
+ uri.password =
2051
+ hash.has_key?(:password) ? hash[:password] : self.password
2052
+ end
2053
+ if !hash.has_key?(:authority)
2054
+ uri.host =
2055
+ hash.has_key?(:host) ? hash[:host] : self.host
2056
+ uri.port =
2057
+ hash.has_key?(:port) ? hash[:port] : self.port
2058
+ end
2059
+ uri.path =
2060
+ hash.has_key?(:path) ? hash[:path] : self.path
2061
+ uri.query =
2062
+ hash.has_key?(:query) ? hash[:query] : self.query
2063
+ uri.fragment =
2064
+ hash.has_key?(:fragment) ? hash[:fragment] : self.fragment
2065
+ uri.validation_deferred = false
2066
+
2067
+ return uri
884
2068
  end
885
- alias_method :join, :merge
886
-
887
- # Destructive form of merge.
2069
+
2070
+ ##
2071
+ # Destructive form of <tt>merge</tt>.
2072
+ #
2073
+ # @param [Hash, Addressable::URI, #to_hash] The components to merge with.
2074
+ #
2075
+ # @return [Addressable::URI] The merged URI.
2076
+ #
2077
+ # @see Addressable::URI#merge
888
2078
  def merge!(uri)
889
2079
  replace_self(self.merge(uri))
890
2080
  end
891
- alias_method :join!, :merge!
892
-
2081
+
2082
+ ##
893
2083
  # Returns the shortest normalized relative form of this URI that uses the
894
2084
  # supplied URI as a base for resolution. Returns an absolute URI if
895
- # necessary.
2085
+ # necessary. This is effectively the opposite of <tt>route_to</tt>.
2086
+ #
2087
+ # @param [String, Addressable::URI, #to_str] uri The URI to route from.
2088
+ #
2089
+ # @return [Addressable::URI]
2090
+ # The normalized relative URI that is equivalent to the original URI.
896
2091
  def route_from(uri)
897
2092
  uri = self.class.parse(uri).normalize
898
2093
  normalized_self = self.normalize
@@ -905,50 +2100,57 @@ module Addressable
905
2100
  if normalized_self == uri
906
2101
  return Addressable::URI.parse("##{normalized_self.fragment}")
907
2102
  end
908
- segments = normalized_self.to_hash
2103
+ components = normalized_self.to_hash
909
2104
  if normalized_self.scheme == uri.scheme
910
- segments[:scheme] = nil
2105
+ components[:scheme] = nil
911
2106
  if normalized_self.authority == uri.authority
912
- segments[:user] = nil
913
- segments[:password] = nil
914
- segments[:host] = nil
915
- segments[:port] = nil
2107
+ components[:user] = nil
2108
+ components[:password] = nil
2109
+ components[:host] = nil
2110
+ components[:port] = nil
916
2111
  if normalized_self.path == uri.path
917
- segments[:path] = nil
2112
+ components[:path] = nil
918
2113
  if normalized_self.query == uri.query
919
- segments[:query] = nil
2114
+ components[:query] = nil
920
2115
  end
921
2116
  else
922
2117
  if uri.path != "/"
923
- segments[:path].gsub!(
2118
+ components[:path].gsub!(
924
2119
  Regexp.new("^" + Regexp.escape(uri.path)), "")
925
2120
  end
926
2121
  end
927
2122
  end
928
2123
  end
929
2124
  # Avoid network-path references.
930
- if segments[:host] != nil
931
- segments[:scheme] = normalized_self.scheme
2125
+ if components[:host] != nil
2126
+ components[:scheme] = normalized_self.scheme
932
2127
  end
933
2128
  return Addressable::URI.new(
934
- segments[:scheme],
935
- segments[:user],
936
- segments[:password],
937
- segments[:host],
938
- segments[:port],
939
- segments[:path],
940
- segments[:query],
941
- segments[:fragment]
2129
+ :scheme => components[:scheme],
2130
+ :user => components[:user],
2131
+ :password => components[:password],
2132
+ :host => components[:host],
2133
+ :port => components[:port],
2134
+ :path => components[:path],
2135
+ :query => components[:query],
2136
+ :fragment => components[:fragment]
942
2137
  )
943
2138
  end
944
-
2139
+
2140
+ ##
945
2141
  # Returns the shortest normalized relative form of the supplied URI that
946
2142
  # uses this URI as a base for resolution. Returns an absolute URI if
947
- # necessary.
2143
+ # necessary. This is effectively the opposite of <tt>route_from</tt>.
2144
+ #
2145
+ # @param [String, Addressable::URI, #to_str] uri The URI to route to.
2146
+ #
2147
+ # @return [Addressable::URI]
2148
+ # The normalized relative URI that is equivalent to the supplied URI.
948
2149
  def route_to(uri)
949
2150
  return self.class.parse(uri).route_from(self)
950
2151
  end
951
-
2152
+
2153
+ ##
952
2154
  # Returns a normalized URI object.
953
2155
  #
954
2156
  # NOTE: This method does not attempt to fully conform to specifications.
@@ -956,154 +2158,176 @@ module Addressable
956
2158
  # specifications, and also to deal with caching issues since several
957
2159
  # different URIs may represent the same resource and should not be
958
2160
  # cached multiple times.
2161
+ #
2162
+ # @return [Addressable::URI] The normalized URI.
959
2163
  def normalize
960
- normalized_scheme = nil
961
- normalized_scheme = self.scheme.strip.downcase if self.scheme != nil
962
- normalized_scheme = "svn+ssh" if normalized_scheme == "ssh+svn"
2164
+ # This is a special exception for the frequently misused feed
2165
+ # URI scheme.
963
2166
  if normalized_scheme == "feed"
964
2167
  if self.to_s =~ /^feed:\/*http:\/*/
965
2168
  return self.class.parse(
966
- self.to_s.scan(/^feed:\/*(http:\/*.*)/).flatten[0]).normalize
967
- end
968
- end
969
- normalized_user = nil
970
- normalized_user = self.user.strip if self.user != nil
971
- normalized_password = nil
972
- normalized_password = self.password.strip if self.password != nil
973
-
974
- # If we are using http or https and user/password are blank,
975
- # then we remove them
976
- if normalized_scheme =~ /https?/ && normalized_user == "" &&
977
- (!normalized_password || normalized_password == "")
978
- normalized_user = nil
979
- normalized_password = nil
980
- end
981
-
982
- normalized_host = nil
983
- normalized_host = self.host.strip.downcase if self.host != nil
984
- if normalized_host != nil
985
- begin
986
- normalized_host = URI::IDNA.to_ascii(normalized_host)
987
- rescue Exception
988
- nil
989
- end
990
- if normalized_host[-1..-1] == "."
991
- normalized_host = normalized_host[0...-1]
992
- end
993
- end
994
-
995
- normalized_port = self.port
996
- if self.class.scheme_mapping[normalized_scheme] == normalized_port
997
- normalized_port = nil
998
- end
999
- normalized_path = nil
1000
- normalized_path = self.path.strip if self.path != nil
1001
- if normalized_path != nil
1002
- normalized_path = self.class.normalize_path(normalized_path)
1003
- end
1004
- if normalized_path == ""
1005
- if ["http", "https", "ftp", "tftp"].include?(normalized_scheme)
1006
- normalized_path = "/"
2169
+ self.to_s[/^feed:\/*(http:\/*.*)/, 1]
2170
+ ).normalize
1007
2171
  end
1008
2172
  end
1009
2173
 
1010
- normalized_query = nil
1011
- normalized_query = self.query.strip if self.query != nil
1012
-
1013
- normalized_fragment = nil
1014
- normalized_fragment = self.fragment.strip if self.fragment != nil
1015
- return Addressable::URI.parse(
1016
- Addressable::URI.normalized_encode(Addressable::URI.new(
1017
- normalized_scheme,
1018
- normalized_user,
1019
- normalized_password,
1020
- normalized_host,
1021
- normalized_port,
1022
- normalized_path,
1023
- normalized_query,
1024
- normalized_fragment
1025
- )))
2174
+ return Addressable::URI.normalized_encode(
2175
+ Addressable::URI.new(
2176
+ :scheme => normalized_scheme,
2177
+ :authority => normalized_authority,
2178
+ :path => normalized_path,
2179
+ :query => normalized_query,
2180
+ :fragment => normalized_fragment
2181
+ ),
2182
+ ::Addressable::URI
2183
+ )
1026
2184
  end
1027
2185
 
2186
+ ##
1028
2187
  # Destructively normalizes this URI object.
2188
+ #
2189
+ # @return [Addressable::URI] The normalized URI.
2190
+ #
2191
+ # @see Addressable::URI#normalize
1029
2192
  def normalize!
1030
2193
  replace_self(self.normalize)
1031
2194
  end
1032
-
2195
+
2196
+ ##
1033
2197
  # Creates a URI suitable for display to users. If semantic attacks are
1034
2198
  # likely, the application should try to detect these and warn the user.
1035
- # See RFC 3986 section 7.6 for more information.
2199
+ # See <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>,
2200
+ # section 7.6 for more information.
2201
+ #
2202
+ # @return [Addressable::URI] A URI suitable for display purposes.
1036
2203
  def display_uri
1037
2204
  display_uri = self.normalize
1038
- begin
1039
- display_uri.instance_variable_set("@host",
1040
- URI::IDNA.to_unicode(display_uri.host))
1041
- rescue Exception
1042
- nil
1043
- end
2205
+ display_uri.instance_variable_set("@host",
2206
+ ::Addressable::IDNA.to_unicode(display_uri.host))
1044
2207
  return display_uri
1045
2208
  end
1046
-
1047
- # Returns true if the URI objects are equal. This method normalizes
1048
- # both URIs before doing the comparison, and allows comparison against
1049
- # strings.
2209
+
2210
+ ##
2211
+ # Returns <tt>true</tt> if the URI objects are equal. This method
2212
+ # normalizes both URIs before doing the comparison, and allows comparison
2213
+ # against <tt>Strings</tt>.
2214
+ #
2215
+ # @param [Object] uri The URI to compare.
2216
+ #
2217
+ # @return [TrueClass, FalseClass]
2218
+ # <tt>true</tt> if the URIs are equivalent, <tt>false</tt> otherwise.
1050
2219
  def ===(uri)
1051
2220
  if uri.respond_to?(:normalize)
1052
2221
  uri_string = uri.normalize.to_s
1053
2222
  else
1054
2223
  begin
1055
- uri_string = URI.parse(uri.to_s).normalize.to_s
1056
- rescue InvalidURIError
2224
+ uri_string = ::Addressable::URI.parse(uri).normalize.to_s
2225
+ rescue InvalidURIError, TypeError
1057
2226
  return false
1058
2227
  end
1059
2228
  end
1060
2229
  return self.normalize.to_s == uri_string
1061
2230
  end
1062
-
1063
- # Returns true if the URI objects are equal. This method normalizes
1064
- # both URIs before doing the comparison.
2231
+
2232
+ ##
2233
+ # Returns <tt>true</tt> if the URI objects are equal. This method
2234
+ # normalizes both URIs before doing the comparison.
2235
+ #
2236
+ # @param [Object] uri The URI to compare.
2237
+ #
2238
+ # @return [TrueClass, FalseClass]
2239
+ # <tt>true</tt> if the URIs are equivalent, <tt>false</tt> otherwise.
1065
2240
  def ==(uri)
1066
- return false unless uri.kind_of?(self.class)
2241
+ return false unless uri.kind_of?(self.class)
1067
2242
  return self.normalize.to_s == uri.normalize.to_s
1068
2243
  end
1069
2244
 
1070
- # Returns true if the URI objects are equal. This method does NOT
1071
- # normalize either URI before doing the comparison.
2245
+ ##
2246
+ # Returns <tt>true</tt> if the URI objects are equal. This method
2247
+ # does NOT normalize either URI before doing the comparison.
2248
+ #
2249
+ # @param [Object] uri The URI to compare.
2250
+ #
2251
+ # @return [TrueClass, FalseClass]
2252
+ # <tt>true</tt> if the URIs are equivalent, <tt>false</tt> otherwise.
1072
2253
  def eql?(uri)
1073
- return false unless uri.kind_of?(self.class)
2254
+ return false unless uri.kind_of?(self.class)
1074
2255
  return self.to_s == uri.to_s
1075
2256
  end
1076
2257
 
1077
- # Returns a hash value that will make a URI equivalent to its normalized
2258
+ ##
2259
+ # A hash value that will make a URI equivalent to its normalized
1078
2260
  # form.
2261
+ #
2262
+ # @return [Integer] A hash of the URI.
1079
2263
  def hash
1080
2264
  return (self.normalize.to_s.hash * -1)
1081
2265
  end
1082
-
2266
+
2267
+ ##
1083
2268
  # Clones the URI object.
2269
+ #
2270
+ # @return [Addressable::URI] The cloned URI.
1084
2271
  def dup
1085
- duplicated_scheme = self.scheme ? self.scheme.dup : nil
1086
- duplicated_user = self.user ? self.user.dup : nil
1087
- duplicated_password = self.password ? self.password.dup : nil
1088
- duplicated_host = self.host ? self.host.dup : nil
1089
- duplicated_port = self.specified_port
1090
- duplicated_path = self.path ? self.path.dup : nil
1091
- duplicated_query = self.query ? self.query.dup : nil
1092
- duplicated_fragment = self.fragment ? self.fragment.dup : nil
1093
2272
  duplicated_uri = Addressable::URI.new(
1094
- duplicated_scheme,
1095
- duplicated_user,
1096
- duplicated_password,
1097
- duplicated_host,
1098
- duplicated_port,
1099
- duplicated_path,
1100
- duplicated_query,
1101
- duplicated_fragment
2273
+ :scheme => self.scheme ? self.scheme.dup : nil,
2274
+ :user => self.user ? self.user.dup : nil,
2275
+ :password => self.password ? self.password.dup : nil,
2276
+ :host => self.host ? self.host.dup : nil,
2277
+ :port => self.port,
2278
+ :path => self.path ? self.path.dup : nil,
2279
+ :query => self.query ? self.query.dup : nil,
2280
+ :fragment => self.fragment ? self.fragment.dup : nil
1102
2281
  )
1103
2282
  return duplicated_uri
1104
2283
  end
1105
-
1106
- # Returns the assembled URI as a string.
2284
+
2285
+ ##
2286
+ # Omits components from a URI.
2287
+ #
2288
+ # @param [Symbol] *components The components to be omitted.
2289
+ #
2290
+ # @return [Addressable::URI] The URI with components omitted.
2291
+ #
2292
+ # @example
2293
+ # uri = Addressable::URI.parse("http://example.com/path?query")
2294
+ # #=> #<Addressable::URI:0xcc5e7a URI:http://example.com/path?query>
2295
+ # uri.omit(:scheme, :authority)
2296
+ # #=> #<Addressable::URI:0xcc4d86 URI:/path?query>
2297
+ def omit(*components)
2298
+ invalid_components = components - [
2299
+ :scheme, :user, :password, :userinfo, :host, :port, :authority,
2300
+ :path, :query, :fragment
2301
+ ]
2302
+ unless invalid_components.empty?
2303
+ raise ArgumentError,
2304
+ "Invalid component names: #{invalid_components.inspect}."
2305
+ end
2306
+ duplicated_uri = self.dup
2307
+ duplicated_uri.validation_deferred = true
2308
+ components.each do |component|
2309
+ duplicated_uri.send((component.to_s + "=").to_sym, nil)
2310
+ end
2311
+ duplicated_uri.validation_deferred = false
2312
+ duplicated_uri
2313
+ end
2314
+
2315
+ ##
2316
+ # Destructive form of omit.
2317
+ #
2318
+ # @param [Symbol] *components The components to be omitted.
2319
+ #
2320
+ # @return [Addressable::URI] The URI with components omitted.
2321
+ #
2322
+ # @see Addressable::URI#omit
2323
+ def omit!(*components)
2324
+ replace_self(self.omit(*components))
2325
+ end
2326
+
2327
+ ##
2328
+ # Converts the URI to a <tt>String</tt>.
2329
+ #
2330
+ # @return [String] The URI's <tt>String</tt> representation.
1107
2331
  def to_s
1108
2332
  uri_string = ""
1109
2333
  uri_string << "#{self.scheme}:" if self.scheme != nil
@@ -1111,82 +2335,72 @@ module Addressable
1111
2335
  uri_string << self.path.to_s
1112
2336
  uri_string << "?#{self.query}" if self.query != nil
1113
2337
  uri_string << "##{self.fragment}" if self.fragment != nil
2338
+ if uri_string.respond_to?(:force_encoding)
2339
+ uri_string.force_encoding(Encoding::UTF_8)
2340
+ end
1114
2341
  return uri_string
1115
2342
  end
1116
-
1117
- # Returns a Hash of the URI segments.
2343
+
2344
+ ##
2345
+ # URI's are glorified <tt>Strings</tt>. Allow implicit conversion.
2346
+ alias_method :to_str, :to_s
2347
+
2348
+ ##
2349
+ # Returns a Hash of the URI components.
2350
+ #
2351
+ # @return [Hash] The URI as a <tt>Hash</tt> of components.
1118
2352
  def to_hash
1119
2353
  return {
1120
2354
  :scheme => self.scheme,
1121
2355
  :user => self.user,
1122
2356
  :password => self.password,
1123
2357
  :host => self.host,
1124
- :port => self.specified_port,
2358
+ :port => self.port,
1125
2359
  :path => self.path,
1126
2360
  :query => self.query,
1127
2361
  :fragment => self.fragment
1128
2362
  }
1129
2363
  end
1130
-
1131
- # Returns a string representation of the URI object's state.
2364
+
2365
+ ##
2366
+ # Returns a <tt>String</tt> representation of the URI object's state.
2367
+ #
2368
+ # @return [String] The URI object's state, as a <tt>String</tt>.
1132
2369
  def inspect
1133
2370
  sprintf("#<%s:%#0x URI:%s>", self.class.to_s, self.object_id, self.to_s)
1134
2371
  end
1135
-
1136
- # This module handles internationalized domain names. When Ruby has an
1137
- # implementation of nameprep, stringprep, punycode, etc, this
1138
- # module should contain an actual implementation of IDNA instead of
1139
- # returning nil if libidn can't be used.
1140
- module IDNA
1141
- # Returns the ascii representation of the label.
1142
- def self.to_ascii(label)
1143
- return nil if label.nil?
1144
- if self.use_libidn?
1145
- return IDN::Idna.toASCII(label)
1146
- else
1147
- raise NotImplementedError,
1148
- "There is no available pure-ruby implementation. " +
1149
- "Install libidn bindings."
1150
- end
1151
- end
1152
-
1153
- # Returns the unicode representation of the label.
1154
- def self.to_unicode(label)
1155
- return nil if label.nil?
1156
- if self.use_libidn?
1157
- return IDN::Idna.toUnicode(label)
1158
- else
1159
- raise NotImplementedError,
1160
- "There is no available pure-ruby implementation. " +
1161
- "Install libidn bindings."
1162
- end
1163
- end
1164
-
1165
- private
1166
- # Determines if the libidn bindings are available and able to be used.
1167
- def self.use_libidn?
1168
- if !defined?(@use_libidn) || @use_libidn.nil?
1169
- begin
1170
- require 'rubygems'
1171
- rescue LoadError
1172
- nil
1173
- end
1174
- begin
1175
- require 'idn'
1176
- rescue LoadError
1177
- nil
1178
- end
1179
- @use_libidn = !!(defined?(IDN::Idna))
1180
- end
1181
- return @use_libidn
1182
- end
2372
+
2373
+ ##
2374
+ # If URI validation needs to be disabled, this can be set to true.
2375
+ #
2376
+ # @return [TrueClass, FalseClass]
2377
+ # <tt>true</tt> if validation has been deferred,
2378
+ # <tt>false</tt> otherwise.
2379
+ def validation_deferred
2380
+ @validation_deferred ||= false
2381
+ end
2382
+
2383
+ ##
2384
+ # If URI validation needs to be disabled, this can be set to true.
2385
+ #
2386
+ # @param [TrueClass, FalseClass] new_validation_deferred
2387
+ # <tt>true</tt> if validation will be deferred,
2388
+ # <tt>false</tt> otherwise.
2389
+ def validation_deferred=(new_validation_deferred)
2390
+ @validation_deferred = new_validation_deferred
2391
+ validate unless @validation_deferred
1183
2392
  end
1184
-
2393
+
1185
2394
  private
2395
+ ##
1186
2396
  # Resolves paths to their simplest form.
2397
+ #
2398
+ # @param [String] path The path to normalize.
2399
+ #
2400
+ # @return [String] The normalized path.
1187
2401
  def self.normalize_path(path)
1188
2402
  # Section 5.2.4 of RFC 3986
1189
-
2403
+
1190
2404
  return nil if path.nil?
1191
2405
  normalized_path = path.dup
1192
2406
  previous_state = normalized_path.dup
@@ -1194,11 +2408,11 @@ module Addressable
1194
2408
  previous_state = normalized_path.dup
1195
2409
  normalized_path.gsub!(/\/\.\//, "/")
1196
2410
  normalized_path.gsub!(/\/\.$/, "/")
1197
- parent = normalized_path.scan(/\/([^\/]+)\/\.\.\//).flatten[0]
2411
+ parent = normalized_path[/\/([^\/]+)\/\.\.\//, 1]
1198
2412
  if parent != "." && parent != ".."
1199
2413
  normalized_path.gsub!(/\/#{parent}\/\.\.\//, "/")
1200
2414
  end
1201
- parent = normalized_path.scan(/\/([^\/]+)\/\.\.$/).flatten[0]
2415
+ parent = normalized_path[/\/([^\/]+)\/\.\.$/, 1]
1202
2416
  if parent != "." && parent != ".."
1203
2417
  normalized_path.gsub!(/\/#{parent}\/\.\.$/, "/")
1204
2418
  end
@@ -1208,36 +2422,44 @@ module Addressable
1208
2422
  return normalized_path
1209
2423
  end
1210
2424
 
2425
+ ##
1211
2426
  # Ensures that the URI is valid.
1212
2427
  def validate
2428
+ return if self.validation_deferred
1213
2429
  if self.scheme != nil &&
1214
2430
  (self.host == nil || self.host == "") &&
1215
2431
  (self.path == nil || self.path == "")
1216
2432
  raise InvalidURIError,
1217
- "Absolute URI missing hierarchical segment."
2433
+ "Absolute URI missing hierarchical segment: '#{self.to_s}'"
1218
2434
  end
1219
2435
  if self.host == nil
1220
- if self.specified_port != nil ||
2436
+ if self.port != nil ||
1221
2437
  self.user != nil ||
1222
2438
  self.password != nil
1223
- raise InvalidURIError, "Hostname not supplied."
2439
+ raise InvalidURIError, "Hostname not supplied: '#{self.to_s}'"
1224
2440
  end
1225
2441
  end
2442
+ return nil
1226
2443
  end
1227
-
2444
+
2445
+ ##
1228
2446
  # Replaces the internal state of self with the specified URI's state.
1229
2447
  # Used in destructive operations to avoid massive code repetition.
2448
+ #
2449
+ # @param [Addressable::URI] uri The URI to replace <tt>self</tt> with.
2450
+ #
2451
+ # @return [Addressable::URI] <tt>self</tt>.
1230
2452
  def replace_self(uri)
1231
2453
  # Reset dependant values
1232
- @userinfo = nil
1233
- @authority = nil
1234
-
2454
+ instance_variables.each do |var|
2455
+ instance_variable_set(var, nil)
2456
+ end
2457
+
1235
2458
  @scheme = uri.scheme
1236
2459
  @user = uri.user
1237
2460
  @password = uri.password
1238
2461
  @host = uri.host
1239
- @specified_port = uri.instance_variable_get("@specified_port")
1240
- @port = @specified_port.to_s.to_i
2462
+ @port = uri.port
1241
2463
  @path = uri.path
1242
2464
  @query = uri.query
1243
2465
  @fragment = uri.fragment