addressable 1.0.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
+ # coding:utf-8
1
2
  #--
2
- # Addressable, Copyright (c) 2006-2007 Bob Aman
3
+ # Addressable, Copyright (c) 2006-2008 Bob Aman
3
4
  #
4
5
  # Permission is hereby granted, free of charge, to any person obtaining
5
6
  # a copy of this software and associated documentation files (the
@@ -24,23 +25,43 @@
24
25
  $:.unshift(File.expand_path(File.join(File.dirname(__FILE__), '/..')))
25
26
  $:.uniq!
26
27
 
27
- require 'addressable/version'
28
+ require "addressable/version"
29
+ require "addressable/idna"
28
30
 
29
31
  module Addressable
30
- # This is an implementation of a URI parser based on RFC 3986, 3987.
32
+ ##
33
+ # This is an implementation of a URI parser based on
34
+ # <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>,
35
+ # <a href="http://www.ietf.org/rfc/rfc3987.txt">RFC 3987</a>.
31
36
  class URI
37
+ ##
32
38
  # Raised if something other than a uri is supplied.
33
39
  class InvalidURIError < StandardError
34
40
  end
35
-
41
+
42
+ ##
36
43
  # Raised if an invalid method option is supplied.
37
44
  class InvalidOptionError < StandardError
38
45
  end
39
-
40
- # Raised if an invalid method option is supplied.
41
- class InvalidTemplateValue < StandardError
46
+
47
+ ##
48
+ # Raised if an invalid template value is supplied.
49
+ class InvalidTemplateValueError < StandardError
50
+ end
51
+
52
+ ##
53
+ # Raised if an invalid template operator is used in a pattern.
54
+ class InvalidTemplateOperatorError < StandardError
42
55
  end
43
56
 
57
+ ##
58
+ # Raised if an invalid template operator is used in a pattern.
59
+ class TemplateOperatorAbortedError < StandardError
60
+ end
61
+
62
+ ##
63
+ # Container for the character classes specified in
64
+ # <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.
44
65
  module CharacterClasses
45
66
  ALPHA = "a-zA-Z"
46
67
  DIGIT = "0-9"
@@ -54,24 +75,39 @@ module Addressable
54
75
  PATH = PCHAR + "\\/"
55
76
  QUERY = PCHAR + "\\/\\?"
56
77
  FRAGMENT = PCHAR + "\\/\\?"
57
- end
58
-
78
+ end
79
+
80
+ ##
59
81
  # Returns a URI object based on the parsed string.
60
- def self.parse(uri_string)
61
- return nil if uri_string.nil?
62
-
82
+ #
83
+ # @param [String, Addressable::URI, #to_str] uri
84
+ # The URI string to parse. No parsing is performed if the object is
85
+ # already an <tt>Addressable::URI</tt>.
86
+ #
87
+ # @return [Addressable::URI] The parsed URI.
88
+ def self.parse(uri)
89
+ # If we were given nil, return nil.
90
+ return nil unless uri
63
91
  # If a URI object is passed, just return itself.
64
- return uri_string if uri_string.kind_of?(self)
65
-
92
+ return uri if uri.kind_of?(self)
93
+ if !uri.respond_to?(:to_str)
94
+ raise TypeError, "Can't convert #{uri.class} into String."
95
+ end
96
+ # Otherwise, convert to a String
97
+ uri = uri.to_str
98
+
66
99
  # If a URI object of the Ruby standard library variety is passed,
67
100
  # convert it to a string, then parse the string.
68
- if uri_string.class.name =~ /^URI::/
69
- uri_string = uri_string.to_s
101
+ # We do the check this way because we don't want to accidentally
102
+ # cause a missing constant exception to be thrown.
103
+ if uri.class.name =~ /^URI\b/
104
+ uri = uri.to_s
70
105
  end
71
-
106
+
107
+ # This Regexp supplied as an example in RFC 3986, and it works great.
72
108
  uri_regex =
73
109
  /^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/
74
- scan = uri_string.scan(uri_regex)
110
+ scan = uri.scan(uri_regex)
75
111
  fragments = scan[0]
76
112
  return nil if fragments.nil?
77
113
  scheme = fragments[1]
@@ -85,47 +121,74 @@ module Addressable
85
121
  host = nil
86
122
  port = nil
87
123
  if authority != nil
88
- userinfo = authority.scan(/^([^\[\]]*)@/).flatten[0]
124
+ # The Regexp above doesn't split apart the authority.
125
+ userinfo = authority[/^([^\[\]]*)@/, 1]
89
126
  if userinfo != nil
90
- user = userinfo.strip.scan(/^([^:]*):?/).flatten[0]
91
- password = userinfo.strip.scan(/:(.*)$/).flatten[0]
127
+ user = userinfo.strip[/^([^:]*):?/, 1]
128
+ password = userinfo.strip[/:(.*)$/, 1]
92
129
  end
93
130
  host = authority.gsub(/^([^\[\]]*)@/, "").gsub(/:([^:@\[\]]*?)$/, "")
94
- port = authority.scan(/:([^:@\[\]]*?)$/).flatten[0]
131
+ port = authority[/:([^:@\[\]]*?)$/, 1]
95
132
  end
96
133
  if port == ""
97
134
  port = nil
98
135
  end
99
-
136
+
100
137
  return Addressable::URI.new(
101
- scheme, user, password, host, port, path, query, fragment)
138
+ :scheme => scheme,
139
+ :user => user,
140
+ :password => password,
141
+ :host => host,
142
+ :port => port,
143
+ :path => path,
144
+ :query => query,
145
+ :fragment => fragment
146
+ )
102
147
  end
103
-
148
+
149
+ ##
104
150
  # Converts an input to a URI. The input does not have to be a valid
105
- # URI -- the method will use heuristics to guess what URI was intended.
106
- # This is not standards compliant, merely user-friendly.
107
- def self.heuristic_parse(input, hints={})
108
- input = input.dup
151
+ # URI the method will use heuristics to guess what URI was intended.
152
+ # This is not standards-compliant, merely user-friendly.
153
+ #
154
+ # @param [String, Addressable::URI, #to_str] uri
155
+ # The URI string to parse. No parsing is performed if the object is
156
+ # already an <tt>Addressable::URI</tt>.
157
+ # @param [Hash] hints
158
+ # A <tt>Hash</tt> of hints to the heuristic parser. Defaults to
159
+ # <tt>{:scheme => "http"}</tt>.
160
+ #
161
+ # @return [Addressable::URI] The parsed URI.
162
+ def self.heuristic_parse(uri, hints={})
163
+ # If we were given nil, return nil.
164
+ return nil unless uri
165
+ # If a URI object is passed, just return itself.
166
+ return uri if uri.kind_of?(self)
167
+ if !uri.respond_to?(:to_str)
168
+ raise TypeError, "Can't convert #{uri.class} into String."
169
+ end
170
+ # Otherwise, convert to a String
171
+ uri = uri.to_str.dup
109
172
  hints = {
110
173
  :scheme => "http"
111
174
  }.merge(hints)
112
- case input
175
+ case uri
113
176
  when /^http:\/+/
114
- input.gsub!(/^http:\/+/, "http://")
177
+ uri.gsub!(/^http:\/+/, "http://")
115
178
  when /^feed:\/+http:\/+/
116
- input.gsub!(/^feed:\/+http:\/+/, "feed:http://")
179
+ uri.gsub!(/^feed:\/+http:\/+/, "feed:http://")
117
180
  when /^feed:\/+/
118
- input.gsub!(/^feed:\/+/, "feed://")
181
+ uri.gsub!(/^feed:\/+/, "feed://")
119
182
  when /^file:\/+/
120
- input.gsub!(/^file:\/+/, "file:///")
183
+ uri.gsub!(/^file:\/+/, "file:///")
121
184
  end
122
- parsed = self.parse(input)
185
+ parsed = self.parse(uri)
123
186
  if parsed.scheme =~ /^[^\/?#\.]+\.[^\/?#]+$/
124
- parsed = self.parse(hints[:scheme] + "://" + input)
187
+ parsed = self.parse(hints[:scheme] + "://" + uri)
125
188
  end
126
189
  if parsed.authority == nil
127
190
  if parsed.path =~ /^[^\/]+\./
128
- new_host = parsed.path.scan(/^([^\/]+\.[^\/]*)/).flatten[0]
191
+ new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1]
129
192
  if new_host
130
193
  new_path = parsed.path.gsub(
131
194
  Regexp.new("^" + Regexp.escape(new_host)), "")
@@ -137,77 +200,167 @@ module Addressable
137
200
  end
138
201
  return parsed
139
202
  end
140
-
141
- # Converts a path to a file protocol URI. If the path supplied is
203
+
204
+ ##
205
+ # Converts a path to a file scheme URI. If the path supplied is
142
206
  # relative, it will be returned as a relative URI. If the path supplied
143
- # is actually a URI, it will return the parsed URI.
207
+ # is actually a non-file URI, it will parse the URI as if it had been
208
+ # parsed with <tt>Addressable::URI.parse</tt>. Handles all of the
209
+ # various Microsoft-specific formats for specifying paths.
210
+ #
211
+ # @param [String, Addressable::URI, #to_str] path
212
+ # Typically a <tt>String</tt> path to a file or directory, but
213
+ # will return a sensible return value if an absolute URI is supplied
214
+ # instead.
215
+ #
216
+ # @return [Addressable::URI]
217
+ # The parsed file scheme URI or the original URI if some other URI
218
+ # scheme was provided.
219
+ #
220
+ # @example
221
+ # base = Addressable::URI.convert_path("/absolute/path/")
222
+ # uri = Addressable::URI.convert_path("relative/path")
223
+ # (base + uri).to_s
224
+ # #=> "file:///absolute/path/relative/path"
225
+ #
226
+ # Addressable::URI.convert_path(
227
+ # "c:\\windows\\My Documents 100%20\\foo.txt"
228
+ # ).to_s
229
+ # #=> "file:///c:/windows/My%20Documents%20100%20/foo.txt"
230
+ #
231
+ # Addressable::URI.convert_path("http://example.com/").to_s
232
+ # #=> "http://example.com/"
144
233
  def self.convert_path(path)
145
- return nil if path.nil?
146
-
147
- converted_uri = path.strip
148
- if converted_uri.length > 0 && converted_uri[0..0] == "/"
149
- converted_uri = "file://" + converted_uri
150
- end
151
- if converted_uri.length > 0 &&
152
- converted_uri.scan(/^[a-zA-Z]:[\\\/]/).size > 0
153
- converted_uri = "file:///" + converted_uri
234
+ # If we were given nil, return nil.
235
+ return nil unless path
236
+ # If a URI object is passed, just return itself.
237
+ return path if path.kind_of?(self)
238
+ if !path.respond_to?(:to_str)
239
+ raise TypeError, "Can't convert #{path.class} into String."
154
240
  end
155
- converted_uri.gsub!(/^file:\/*/i, "file:///")
156
- if converted_uri =~ /^file:/i
241
+ # Otherwise, convert to a String
242
+ path = path.to_str.strip
243
+
244
+ path.gsub!(/^file:\/?\/?/, "") if path =~ /^file:\/?\/?/
245
+ path = "/" + path if path =~ /^([a-zA-Z])(\||:)/
246
+ uri = self.parse(path)
247
+
248
+ if uri.scheme == nil
157
249
  # Adjust windows-style uris
158
- converted_uri.gsub!(/^file:\/\/\/([a-zA-Z])\|/i, 'file:///\1:')
159
- converted_uri.gsub!(/\\/, '/')
160
- converted_uri = self.parse(converted_uri).normalize
161
- if File.exists?(converted_uri.path) &&
162
- File.stat(converted_uri.path).directory?
163
- converted_uri.path.gsub!(/\/$/, "")
164
- converted_uri.path = converted_uri.path + '/'
250
+ uri.path.gsub!(/^\/?([a-zA-Z])\|(\\|\/)/, "/\\1:/")
251
+ uri.path.gsub!(/\\/, "/")
252
+ if File.exists?(uri.path) &&
253
+ File.stat(uri.path).directory?
254
+ uri.path.gsub!(/\/$/, "")
255
+ uri.path = uri.path + '/'
165
256
  end
166
- else
167
- converted_uri = self.parse(converted_uri)
257
+
258
+ # If the path is absolute, set the scheme and host.
259
+ if uri.path =~ /^\//
260
+ uri.scheme = "file"
261
+ uri.host = ""
262
+ end
263
+ uri.normalize!
168
264
  end
169
-
170
- return converted_uri
265
+
266
+ return uri
171
267
  end
172
-
268
+
269
+ ##
173
270
  # Expands a URI template into a full URI.
174
271
  #
175
- # An optional processor object may be supplied. The object should
176
- # respond to either the :validate or :transform messages or both.
177
- # Both the :validate and :transform methods should take two parameters:
178
- # :name and :value. The :validate method should return true or false;
179
- # true if the value of the variable is valid, false otherwise. The
180
- # :transform method should return the transformed variable value as a
181
- # string.
182
- #
183
- # An example:
184
- #
185
- # class ExampleProcessor
186
- # def self.validate(name, value)
187
- # return !!(value =~ /^[\w ]+$/) if name == "query"
188
- # return true
189
- # end
190
- #
191
- # def self.transform(name, value)
192
- # return value.gsub(/ /, "+") if name == "query"
193
- # return value
194
- # end
195
- # end
196
- #
197
- # Addressable::URI.expand_template(
198
- # "http://example.com/search/{query}/",
199
- # {"query" => "an example search query"},
200
- # ExampleProcessor).to_s
201
- # => "http://example.com/search/an+example+search+query/"
272
+ # @param [String, #to_str] pattern The URI template pattern.
273
+ # @param [Hash] mapping The mapping that corresponds to the pattern.
274
+ # @param [#validate, #transform] processor
275
+ # An optional processor object may be supplied. The object should
276
+ # respond to either the <tt>validate</tt> or <tt>transform</tt> messages
277
+ # or both. Both the <tt>validate</tt> and <tt>transform</tt> methods
278
+ # should take two parameters: <tt>name</tt> and <tt>value</tt>. The
279
+ # <tt>validate</tt> method should return <tt>true</tt> or
280
+ # <tt>false</tt>; <tt>true</tt> if the value of the variable is valid,
281
+ # <tt>false</tt> otherwise. An <tt>InvalidTemplateValueError</tt>
282
+ # exception will be raised if the value is invalid. The
283
+ # <tt>transform</tt> method should return the transformed variable
284
+ # value as a <tt>String</tt>.
285
+ #
286
+ # @return [Addressable::URI] The expanded URI template.
287
+ #
288
+ # @example
289
+ # class ExampleProcessor
290
+ # def self.validate(name, value)
291
+ # return !!(value =~ /^[\w ]+$/) if name == "query"
292
+ # return true
293
+ # end
294
+ #
295
+ # def self.transform(name, value)
296
+ # return value.gsub(/ /, "+") if name == "query"
297
+ # return value
298
+ # end
299
+ # end
300
+ #
301
+ # Addressable::URI.expand_template(
302
+ # "http://example.com/search/{query}/",
303
+ # {"query" => "an example search query"},
304
+ # ExampleProcessor
305
+ # ).to_s
306
+ # #=> "http://example.com/search/an+example+search+query/"
307
+ #
308
+ # Addressable::URI.expand_template(
309
+ # "http://example.com/search/{-list|+|query}/",
310
+ # {"query" => "an example search query".split(" ")}
311
+ # ).to_s
312
+ # #=> "http://example.com/search/an+example+search+query/"
313
+ #
314
+ # Addressable::URI.expand_template(
315
+ # "http://example.com/search/{query}/",
316
+ # {"query" => "bogus!"},
317
+ # ExampleProcessor
318
+ # ).to_s
319
+ # #=> Addressable::URI::InvalidTemplateValueError
202
320
  def self.expand_template(pattern, mapping, processor=nil)
321
+
322
+ # FIXME: MUST REFACTOR!!!
323
+
203
324
  result = pattern.dup
204
- for name, value in mapping
205
- transformed_value = value
325
+
326
+ reserved = Addressable::URI::CharacterClasses::RESERVED
327
+ unreserved = Addressable::URI::CharacterClasses::UNRESERVED
328
+ anything = reserved + unreserved
329
+ operator_expansion =
330
+ /\{-([a-zA-Z]+)\|([#{anything}]+)\|([#{anything}]+)\}/
331
+ variable_expansion = /\{([#{anything}]+?)(=([#{anything}]+))?\}/
332
+
333
+ transformed_mapping = mapping.inject({}) do |accu, pair|
334
+ name, value = pair
335
+ unless value.respond_to?(:to_ary) || value.respond_to?(:to_str)
336
+ raise TypeError,
337
+ "Can't convert #{value.class} into String or Array."
338
+ end
339
+ transformed_value =
340
+ value.respond_to?(:to_ary) ? value.to_ary : value.to_str
341
+
342
+ # Handle percent escaping, and unicode normalization
343
+ if transformed_value.kind_of?(Array)
344
+ transformed_value.map! do |value|
345
+ self.encode_component(
346
+ Addressable::IDNA.unicode_normalize_kc(value),
347
+ Addressable::URI::CharacterClasses::UNRESERVED
348
+ )
349
+ end
350
+ else
351
+ transformed_value = self.encode_component(
352
+ Addressable::IDNA.unicode_normalize_kc(transformed_value),
353
+ Addressable::URI::CharacterClasses::UNRESERVED
354
+ )
355
+ end
356
+
357
+ # Process, if we've got a processor
206
358
  if processor != nil
207
359
  if processor.respond_to?(:validate)
208
360
  if !processor.validate(name, value)
209
- raise InvalidTemplateValue,
210
- "(#{name}, #{value}) is an invalid template value."
361
+ display_value = value.kind_of?(Array) ? value.inspect : value
362
+ raise InvalidTemplateValueError,
363
+ "#{name}=#{display_value} is an invalid template value."
211
364
  end
212
365
  end
213
366
  if processor.respond_to?(:transform)
@@ -215,229 +368,773 @@ module Addressable
215
368
  end
216
369
  end
217
370
 
218
- # Handle percent escaping
219
- transformed_value = self.encode_segment(transformed_value,
220
- Addressable::URI::CharacterClasses::RESERVED +
221
- Addressable::URI::CharacterClasses::UNRESERVED)
222
-
223
- result.gsub!(/\{#{Regexp.escape(name)}\}/, transformed_value)
371
+ accu[name] = transformed_value
372
+ accu
224
373
  end
225
374
  result.gsub!(
226
- /\{[#{Addressable::URI::CharacterClasses::UNRESERVED}]+\}/, "")
375
+ /#{operator_expansion}|#{variable_expansion}/
376
+ ) do |capture|
377
+ if capture =~ operator_expansion
378
+ operator, argument, variables, default_mapping =
379
+ parse_template_expansion(capture, transformed_mapping)
380
+ expand_method = "expand_#{operator}_operator"
381
+ if ([expand_method, expand_method.to_sym] & private_methods).empty?
382
+ raise InvalidTemplateOperatorError,
383
+ "Invalid template operator: #{operator}"
384
+ else
385
+ send(expand_method.to_sym, argument, variables, default_mapping)
386
+ end
387
+ else
388
+ varname, _, vardefault = capture.scan(/^\{(.+?)(=(.*))?\}$/)[0]
389
+ transformed_mapping[varname] || vardefault
390
+ end
391
+ end
227
392
  return Addressable::URI.parse(result)
228
393
  end
229
-
394
+
395
+ ##
396
+ # Expands a URI Template opt operator.
397
+ #
398
+ # @param [String] argument The argument to the operator.
399
+ # @param [Array] variables The variables the operator is working on.
400
+ # @param [Hash] mapping The mapping of variables to values.
401
+ #
402
+ # @return [String] The expanded result.
403
+ def self.expand_opt_operator(argument, variables, mapping)
404
+ if (variables.any? do |variable|
405
+ mapping[variable] != [] &&
406
+ mapping[variable]
407
+ end)
408
+ argument
409
+ else
410
+ ""
411
+ end
412
+ end
413
+ class <<self; private :expand_opt_operator; end
414
+
415
+ ##
416
+ # Expands a URI Template neg operator.
417
+ #
418
+ # @param [String] argument The argument to the operator.
419
+ # @param [Array] variables The variables the operator is working on.
420
+ # @param [Hash] mapping The mapping of variables to values.
421
+ #
422
+ # @return [String] The expanded result.
423
+ def self.expand_neg_operator(argument, variables, mapping)
424
+ if (variables.any? do |variable|
425
+ mapping[variable] != [] &&
426
+ mapping[variable]
427
+ end)
428
+ ""
429
+ else
430
+ argument
431
+ end
432
+ end
433
+ class <<self; private :expand_neg_operator; end
434
+
435
+ ##
436
+ # Expands a URI Template prefix operator.
437
+ #
438
+ # @param [String] argument The argument to the operator.
439
+ # @param [Array] variables The variables the operator is working on.
440
+ # @param [Hash] mapping The mapping of variables to values.
441
+ #
442
+ # @return [String] The expanded result.
443
+ def self.expand_prefix_operator(argument, variables, mapping)
444
+ if variables.size != 1
445
+ raise InvalidTemplateOperatorError,
446
+ "Template operator 'prefix' takes exactly one variable."
447
+ end
448
+ value = mapping[variables.first]
449
+ if value.kind_of?(Array)
450
+ (value.map { |list_value| argument + list_value }).join("")
451
+ else
452
+ argument + value.to_s
453
+ end
454
+ end
455
+ class <<self; private :expand_prefix_operator; end
456
+
457
+ ##
458
+ # Expands a URI Template suffix operator.
459
+ #
460
+ # @param [String] argument The argument to the operator.
461
+ # @param [Array] variables The variables the operator is working on.
462
+ # @param [Hash] mapping The mapping of variables to values.
463
+ #
464
+ # @return [String] The expanded result.
465
+ def self.expand_suffix_operator(argument, variables, mapping)
466
+ if variables.size != 1
467
+ raise InvalidTemplateOperatorError,
468
+ "Template operator 'suffix' takes exactly one variable."
469
+ end
470
+ value = mapping[variables.first]
471
+ if value.kind_of?(Array)
472
+ (value.map { |list_value| list_value + argument }).join("")
473
+ else
474
+ value.to_s + argument
475
+ end
476
+ end
477
+ class <<self; private :expand_suffix_operator; end
478
+
479
+ ##
480
+ # Expands a URI Template join operator.
481
+ #
482
+ # @param [String] argument The argument to the operator.
483
+ # @param [Array] variables The variables the operator is working on.
484
+ # @param [Hash] mapping The mapping of variables to values.
485
+ #
486
+ # @return [String] The expanded result.
487
+ def self.expand_join_operator(argument, variables, mapping)
488
+ variable_values = variables.inject([]) do |accu, variable|
489
+ if !mapping[variable].kind_of?(Array)
490
+ if mapping[variable]
491
+ accu << variable + "=" + (mapping[variable])
492
+ end
493
+ else
494
+ raise InvalidTemplateOperatorError,
495
+ "Template operator 'join' does not accept Array values."
496
+ end
497
+ accu
498
+ end
499
+ variable_values.join(argument)
500
+ end
501
+ class <<self; private :expand_join_operator; end
502
+
503
+ ##
504
+ # Expands a URI Template list operator.
505
+ #
506
+ # @param [String] argument The argument to the operator.
507
+ # @param [Array] variables The variables the operator is working on.
508
+ # @param [Hash] mapping The mapping of variables to values.
509
+ #
510
+ # @return [String] The expanded result.
511
+ def self.expand_list_operator(argument, variables, mapping)
512
+ if variables.size != 1
513
+ raise InvalidTemplateOperatorError,
514
+ "Template operator 'list' takes exactly one variable."
515
+ end
516
+ mapping[variables.first].join(argument)
517
+ end
518
+ class <<self; private :expand_list_operator; end
519
+
520
+ ##
521
+ # Parses a URI template expansion <tt>String</tt>.
522
+ #
523
+ # @param [String] expansion The operator <tt>String</tt>.
524
+ # @param [Hash] mapping The mapping to merge defaults into.
525
+ #
526
+ # @return [Array]
527
+ # A tuple of the operator, argument, variables, and mapping.
528
+ def self.parse_template_expansion(capture, mapping)
529
+ operator, argument, variables = capture[1...-1].split("|")
530
+ operator.gsub!(/^\-/, "")
531
+ variables = variables.split(",")
532
+ mapping = (variables.inject({}) do |accu, var|
533
+ varname, _, vardefault = var.scan(/^(.+?)(=(.*))?$/)[0]
534
+ accu[varname] = vardefault
535
+ accu
536
+ end).merge(mapping)
537
+ variables = variables.map { |var| var.gsub(/=.*$/, "") }
538
+ return operator, argument, variables, mapping
539
+ end
540
+ class <<self; private :parse_template_expansion; end
541
+
542
+ ##
230
543
  # Extracts a mapping from the URI using a URI Template pattern.
231
- # Returns nil if the pattern doesn't match the URI.
232
- #
233
- # An optional processor object may be supplied. The object should
234
- # respond to either the :restore or :match messages or both.
235
- # The :restore method should take two parameters: :name and :value.
236
- # The :restore method should reverse any transformations that have been
237
- # performed on the value to ensure a valid URI. The :match method
238
- # should take a single parameter: :name. The :match method should
239
- # return a String containing a regular expression capture group for
240
- # matching on that particular variable. The default value is ".*".
241
- #
242
- # An example:
243
- #
244
- # class ExampleProcessor
245
- # def self.restore(name, value)
246
- # return value.gsub(/\+/, " ") if name == "query"
247
- # return value
248
- # end
249
- #
250
- # def self.match(name)
251
- # return ".*?" if name == "first"
252
- # return ".*"
253
- # end
254
- # end
255
- #
256
- # uri = Addressable::URI.parse(
257
- # "http://example.com/search/an+example+search+query/")
258
- # uri.extract_mapping("http://example.com/search/{query}/",
259
- # ExampleProcessor)
260
- # => {"query" => "an example search query"}
261
- #
262
- # uri = Addressable::URI.parse(
263
- # "http://example.com/a/b/c/")
264
- # uri.extract_mapping("http://example.com/{first}/{second}/",
265
- # ExampleProcessor)
266
- # => {"first" => "a", "second" => "b/c"}
544
+ #
545
+ # @param [String] pattern
546
+ # A URI template pattern.
547
+ # @param [#restore, #match] processor
548
+ # A template processor object may optionally be supplied.
549
+ # The object should respond to either the <tt>restore</tt> or
550
+ # <tt>match</tt> messages or both. The <tt>restore</tt> method should
551
+ # take two parameters: [String] name and [String] value. The
552
+ # <tt>restore</tt> method should reverse any transformations that have
553
+ # been performed on the value to ensure a valid URI. The
554
+ # <tt>match</tt> method should take a single parameter: [String] name.
555
+ # The <tt>match</tt> method should return a <tt>String</tt> containing
556
+ # a regular expression capture group for matching on that particular
557
+ # variable. The default value is ".*?". The <tt>match</tt> method has
558
+ # no effect on multivariate operator expansions.
559
+ # @return [Hash, NilClass]
560
+ # The <tt>Hash</tt> mapping that was extracted from the URI, or
561
+ # <tt>nil</tt> if the URI didn't match the template.
562
+ #
563
+ # @example
564
+ # class ExampleProcessor
565
+ # def self.restore(name, value)
566
+ # return value.gsub(/\+/, " ") if name == "query"
567
+ # return value
568
+ # end
569
+ #
570
+ # def self.match(name)
571
+ # return ".*?" if name == "first"
572
+ # return ".*"
573
+ # end
574
+ # end
575
+ #
576
+ # uri = Addressable::URI.parse(
577
+ # "http://example.com/search/an+example+search+query/"
578
+ # )
579
+ # uri.extract_mapping(
580
+ # "http://example.com/search/{query}/",
581
+ # ExampleProcessor
582
+ # )
583
+ # #=> {"query" => "an example search query"}
584
+ #
585
+ # uri = Addressable::URI.parse("http://example.com/a/b/c/")
586
+ # uri.extract_mapping(
587
+ # "http://example.com/{first}/{second}/",
588
+ # ExampleProcessor
589
+ # )
590
+ # #=> {"first" => "a", "second" => "b/c"}
591
+ #
592
+ # uri = Addressable::URI.parse("http://example.com/a/b/c/")
593
+ # uri.extract_mapping(
594
+ # "http://example.com/{first}/{-list|/|second}/"
595
+ # )
596
+ # #=> {"first" => "a", "second" => ["b", "c"]}
267
597
  def extract_mapping(pattern, processor=nil)
598
+ reserved = Addressable::URI::CharacterClasses::RESERVED
599
+ unreserved = Addressable::URI::CharacterClasses::UNRESERVED
600
+ anything = reserved + unreserved
601
+ operator_expansion =
602
+ /\{-([a-zA-Z]+)\|([#{anything}]+)\|([#{anything}]+)\}/
603
+ variable_expansion = /\{([#{anything}]+?)(=([#{anything}]+))?\}/
604
+
605
+ # First, we need to process the pattern, and extract the values.
606
+ expansions, expansion_regexp =
607
+ parse_template_pattern(pattern, processor)
608
+ unparsed_values = self.to_s.scan(expansion_regexp).flatten
609
+
268
610
  mapping = {}
269
- variable_regexp =
270
- /\{([#{Addressable::URI::CharacterClasses::UNRESERVED}]+)\}/
271
-
272
- # Get all the variables in the pattern
273
- variables = pattern.scan(variable_regexp).flatten
274
-
275
- # Initialize all result values to the empty string
276
- variables.each { |v| mapping[v] = "" }
277
-
278
- # Escape the pattern
279
- escaped_pattern =
280
- Regexp.escape(pattern).gsub(/\\\{/, "{").gsub(/\\\}/, "}")
281
-
611
+
612
+ if self.to_s == pattern
613
+ return mapping
614
+ elsif expansions.size > 0 && expansions.size == unparsed_values.size
615
+ expansions.each_with_index do |expansion, index|
616
+ unparsed_value = unparsed_values[index]
617
+ if expansion =~ operator_expansion
618
+ operator, argument, variables =
619
+ parse_template_expansion(expansion)
620
+ extract_method = "extract_#{operator}_operator"
621
+ if ([extract_method, extract_method.to_sym] &
622
+ private_methods).empty?
623
+ raise InvalidTemplateOperatorError,
624
+ "Invalid template operator: #{operator}"
625
+ else
626
+ begin
627
+ send(
628
+ extract_method.to_sym, unparsed_value, processor,
629
+ argument, variables, mapping
630
+ )
631
+ rescue TemplateOperatorAbortedError
632
+ return nil
633
+ end
634
+ end
635
+ else
636
+ name = expansion[variable_expansion, 1]
637
+ value = unparsed_value
638
+ if processor != nil && processor.respond_to?(:restore)
639
+ value = processor.restore(name, value)
640
+ end
641
+ mapping[name] = value
642
+ end
643
+ end
644
+ return mapping
645
+ else
646
+ return nil
647
+ end
648
+ end
649
+
650
+ ##
651
+ # Generates the <tt>Regexp</tt> that parses a template pattern.
652
+ #
653
+ # @param [String] pattern The URI template pattern.
654
+ # @param [#match] processor The template processor to use.
655
+ #
656
+ # @return [Regexp]
657
+ # A regular expression which may be used to parse a template pattern.
658
+ def parse_template_pattern(pattern, processor)
659
+ reserved = Addressable::URI::CharacterClasses::RESERVED
660
+ unreserved = Addressable::URI::CharacterClasses::UNRESERVED
661
+ anything = reserved + unreserved
662
+ operator_expansion =
663
+ /\{-[a-zA-Z]+\|[#{anything}]+\|[#{anything}]+\}/
664
+ variable_expansion = /\{([#{anything}]+?)(=([#{anything}]+))?\}/
665
+
666
+ # Escape the pattern. The two gsubs restore the escaped curly braces
667
+ # back to their original form. Basically, escape everything that isn't
668
+ # within an expansion.
669
+ escaped_pattern = Regexp.escape(
670
+ pattern
671
+ ).gsub(/\\\{(.*?)\\\}/) do |escaped|
672
+ escaped.gsub(/\\(.)/, "\\1")
673
+ end
674
+
675
+ expansions = []
676
+
282
677
  # Create a regular expression that captures the values of the
283
678
  # variables in the URI.
284
- regexp_string = escaped_pattern.gsub(variable_regexp) do |v|
285
- capture_group = "(.*)"
286
-
287
- if processor != nil
288
- if processor.respond_to?(:match)
289
- name = v.scan(variable_regexp).flatten[0]
679
+ regexp_string = escaped_pattern.gsub(
680
+ /#{operator_expansion}|#{variable_expansion}/
681
+ ) do |expansion|
682
+ expansions << expansion
683
+ if expansion =~ operator_expansion
684
+ capture_group = "(.*)"
685
+ if processor != nil && processor.respond_to?(:match)
686
+ # We can only lookup the match values for single variable
687
+ # operator expansions. Besides, ".*" is usually the only
688
+ # reasonable value for multivariate operators anyways.
689
+ operator, _, names, _ =
690
+ parse_template_expansion(expansion)
691
+ if ["prefix", "suffix", "list"].include?(operator)
692
+ capture_group = "(#{processor.match(names.first)})"
693
+ end
694
+ end
695
+ capture_group
696
+ else
697
+ capture_group = "(.*?)"
698
+ if processor != nil && processor.respond_to?(:match)
699
+ name = expansion[/\{([^\}=]+)(=[^\}]+)?\}/, 1]
290
700
  capture_group = "(#{processor.match(name)})"
291
701
  end
702
+ capture_group
292
703
  end
293
-
294
- capture_group
295
704
  end
296
-
705
+
297
706
  # Ensure that the regular expression matches the whole URI.
298
707
  regexp_string = "^#{regexp_string}$"
299
-
300
- regexp = Regexp.new(regexp_string)
301
- values = self.to_s.scan(regexp).flatten
302
-
303
- if variables.size == values.size && variables.size > 0
304
- # We have a match.
305
- for i in 0...variables.size
306
- name = variables[i]
307
- value = values[i]
308
-
309
- if processor != nil
310
- if processor.respond_to?(:restore)
311
- value = processor.restore(name, value)
312
- end
313
- end
314
-
315
- mapping[name] = value
708
+
709
+ return expansions, Regexp.new(regexp_string)
710
+ end
711
+ private :parse_template_pattern
712
+
713
+ ##
714
+ # Parses a URI template expansion <tt>String</tt>.
715
+ #
716
+ # @param [String] expansion The operator <tt>String</tt>.
717
+ #
718
+ # @return [Array]
719
+ # A tuple of the operator, argument, variables.
720
+ def parse_template_expansion(capture)
721
+ operator, argument, variables = capture[1...-1].split("|")
722
+ operator.gsub!(/^\-/, "")
723
+ variables = variables.split(",").map { |var| var.gsub(/=.*$/, "") }
724
+ return operator, argument, variables
725
+ end
726
+ private :parse_template_expansion
727
+
728
+
729
+ ##
730
+ # Extracts a URI Template opt operator.
731
+ #
732
+ # @param [String] value The unparsed value to extract from.
733
+ # @param [#restore] processor The processor object.
734
+ # @param [String] argument The argument to the operator.
735
+ # @param [Array] variables The variables the operator is working on.
736
+ # @param [Hash] mapping The mapping of variables to values.
737
+ #
738
+ # @return [String] The extracted result.
739
+ def extract_opt_operator(
740
+ value, processor, argument, variables, mapping)
741
+ if value != "" && value != argument
742
+ raise TemplateOperatorAbortedError,
743
+ "Value for template operator 'neg' was unexpected."
744
+ end
745
+ end
746
+ private :extract_opt_operator
747
+
748
+ ##
749
+ # Extracts a URI Template neg operator.
750
+ #
751
+ # @param [String] value The unparsed value to extract from.
752
+ # @param [#restore] processor The processor object.
753
+ # @param [String] argument The argument to the operator.
754
+ # @param [Array] variables The variables the operator is working on.
755
+ # @param [Hash] mapping The mapping of variables to values.
756
+ #
757
+ # @return [String] The extracted result.
758
+ def extract_neg_operator(
759
+ value, processor, argument, variables, mapping)
760
+ if value != "" && value != argument
761
+ raise TemplateOperatorAbortedError,
762
+ "Value for template operator 'neg' was unexpected."
763
+ end
764
+ end
765
+ private :extract_neg_operator
766
+
767
+ ##
768
+ # Extracts a URI Template prefix operator.
769
+ #
770
+ # @param [String] value The unparsed value to extract from.
771
+ # @param [#restore] processor The processor object.
772
+ # @param [String] argument The argument to the operator.
773
+ # @param [Array] variables The variables the operator is working on.
774
+ # @param [Hash] mapping The mapping of variables to values.
775
+ #
776
+ # @return [String] The extracted result.
777
+ def extract_prefix_operator(
778
+ value, processor, argument, variables, mapping)
779
+ if variables.size != 1
780
+ raise InvalidTemplateOperatorError,
781
+ "Template operator 'suffix' takes exactly one variable."
782
+ end
783
+ if value[0...argument.size] != argument
784
+ raise TemplateOperatorAbortedError,
785
+ "Value for template operator 'prefix' missing expected prefix."
786
+ end
787
+ values = value.split(argument)
788
+ # Compensate for the crappy result from split.
789
+ if value[-argument.size..-1] == argument
790
+ values << ""
791
+ end
792
+ if values[0] == ""
793
+ values.shift
794
+ end
795
+ if processor && processor.respond_to?(:restore)
796
+ values.map! { |value| processor.restore(variables.first, value) }
797
+ end
798
+ mapping[variables.first] = values
799
+ end
800
+ private :extract_prefix_operator
801
+
802
+ ##
803
+ # Extracts a URI Template suffix operator.
804
+ #
805
+ # @param [String] value The unparsed value to extract from.
806
+ # @param [#restore] processor The processor object.
807
+ # @param [String] argument The argument to the operator.
808
+ # @param [Array] variables The variables the operator is working on.
809
+ # @param [Hash] mapping The mapping of variables to values.
810
+ #
811
+ # @return [String] The extracted result.
812
+ def extract_suffix_operator(
813
+ value, processor, argument, variables, mapping)
814
+ if variables.size != 1
815
+ raise InvalidTemplateOperatorError,
816
+ "Template operator 'suffix' takes exactly one variable."
817
+ end
818
+ if value[-argument.size..-1] != argument
819
+ raise TemplateOperatorAbortedError,
820
+ "Value for template operator 'suffix' missing expected suffix."
821
+ end
822
+ values = value.split(argument)
823
+ # Compensate for the crappy result from split.
824
+ if value[-argument.size..-1] == argument
825
+ values << ""
826
+ end
827
+ if values[-1] == ""
828
+ values.pop
829
+ end
830
+ if processor && processor.respond_to?(:restore)
831
+ values.map! { |value| processor.restore(variables.first, value) }
832
+ end
833
+ mapping[variables.first] = values
834
+ end
835
+ private :extract_suffix_operator
836
+
837
+ ##
838
+ # Extracts a URI Template join operator.
839
+ #
840
+ # @param [String] value The unparsed value to extract from.
841
+ # @param [#restore] processor The processor object.
842
+ # @param [String] argument The argument to the operator.
843
+ # @param [Array] variables The variables the operator is working on.
844
+ # @param [Hash] mapping The mapping of variables to values.
845
+ #
846
+ # @return [String] The extracted result.
847
+ def extract_join_operator(value, processor, argument, variables, mapping)
848
+ unparsed_values = value.split(argument)
849
+ parsed_variables = []
850
+ for unparsed_value in unparsed_values
851
+ name = unparsed_value[/^(.+?)=(.+)$/, 1]
852
+ parsed_variables << name
853
+ parsed_value = unparsed_value[/^(.+?)=(.+)$/, 2]
854
+ if processor && processor.respond_to?(:restore)
855
+ parsed_value = processor.restore(name, parsed_value)
316
856
  end
317
- return mapping
318
- elsif self.to_s == pattern
319
- # The pattern contained no variables but still matched.
320
- return mapping
321
- else
322
- # Pattern failed to match URI.
323
- return nil
857
+ mapping[name] = parsed_value
858
+ end
859
+ if (parsed_variables & variables) != parsed_variables
860
+ raise TemplateOperatorAbortedError,
861
+ "Template operator 'join' variable mismatch: " +
862
+ "#{parsed_variables.inspect}, #{variables.inspect}"
324
863
  end
325
864
  end
326
-
327
- # Joins several uris together.
865
+ private :extract_join_operator
866
+
867
+ ##
868
+ # Extracts a URI Template list operator.
869
+ #
870
+ # @param [String] value The unparsed value to extract from.
871
+ # @param [#restore] processor The processor object.
872
+ # @param [String] argument The argument to the operator.
873
+ # @param [Array] variables The variables the operator is working on.
874
+ # @param [Hash] mapping The mapping of variables to values.
875
+ #
876
+ # @return [String] The extracted result.
877
+ def extract_list_operator(value, processor, argument, variables, mapping)
878
+ if variables.size != 1
879
+ raise InvalidTemplateOperatorError,
880
+ "Template operator 'list' takes exactly one variable."
881
+ end
882
+ values = value.split(argument)
883
+ if processor && processor.respond_to?(:restore)
884
+ values.map! { |value| processor.restore(variables.first, value) }
885
+ end
886
+ mapping[variables.first] = values
887
+ end
888
+ private :extract_list_operator
889
+
890
+ ##
891
+ # Joins several URIs together.
892
+ #
893
+ # @param [String, Addressable::URI, #to_str] *uris
894
+ # The URIs to join.
895
+ #
896
+ # @return [Addressable::URI] The joined URI.
897
+ #
898
+ # @example
899
+ # base = "http://example.com/"
900
+ # uri = Addressable::URI.parse("relative/path")
901
+ # Addressable::URI.join(base, uri)
902
+ # #=> #<Addressable::URI:0xcab390 URI:http://example.com/relative/path>
328
903
  def self.join(*uris)
329
904
  uri_objects = uris.collect do |uri|
330
- uri.kind_of?(self) ? uri : self.parse(uri.to_s)
905
+ if !uri.respond_to?(:to_str)
906
+ raise TypeError, "Can't convert #{uri.class} into String."
907
+ end
908
+ uri.kind_of?(self) ? uri : self.parse(uri.to_str)
331
909
  end
332
910
  result = uri_objects.shift.dup
333
911
  for uri in uri_objects
334
- result.merge!(uri)
912
+ result.join!(uri)
335
913
  end
336
914
  return result
337
915
  end
338
-
339
- # Percent encodes a URI segment. Returns a string. Takes an optional
340
- # character class parameter, which should be specified as a string
341
- # containing a regular expression character class (not including the
342
- # surrounding square brackets). The character class parameter defaults
343
- # to the reserved plus unreserved character classes specified in
344
- # RFC 3986. Usage of the constants within the CharacterClasses module is
345
- # highly recommended when using this method.
346
- #
347
- # An example:
348
- #
349
- # Addressable::URI.escape_segment("simple-example", "b-zB-Z0-9")
350
- # => "simple%2Dex%61mple"
351
- def self.encode_segment(segment, character_class=
352
- Addressable::URI::CharacterClasses::RESERVED +
353
- Addressable::URI::CharacterClasses::UNRESERVED)
354
- return nil if segment.nil?
355
- return segment.gsub(
356
- /[^#{character_class}]/
357
- ) do |sequence|
358
- ("%" + sequence.unpack('C')[0].to_s(16).upcase)
359
- end
360
- end
361
-
362
- # Unencodes any percent encoded characters within a URI segment.
363
- # Returns a string.
364
- def self.unencode_segment(segment)
365
- return nil if segment.nil?
366
- return segment.to_s.gsub(/%[0-9a-f]{2}/i) do |sequence|
916
+
917
+ ##
918
+ # Percent encodes a URI component.
919
+ #
920
+ # @param [String, #to_str] component The URI component to encode.
921
+ #
922
+ # @param [String, Regexp] character_class
923
+ # The characters which are not percent encoded. If a <tt>String</tt>
924
+ # is passed, the <tt>String</tt> must be formatted as a regular
925
+ # expression character class. (Do not include the surrounding square
926
+ # brackets.) For example, <tt>"b-zB-Z0-9"</tt> would cause everything
927
+ # but the letters 'b' through 'z' and the numbers '0' through '9' to be
928
+ # percent encoded. If a <tt>Regexp</tt> is passed, the value
929
+ # <tt>/[^b-zB-Z0-9]/</tt> would have the same effect.
930
+ # A set of useful <tt>String</tt> values may be found in the
931
+ # <tt>Addressable::URI::CharacterClasses</tt> module. The default value
932
+ # is the reserved plus unreserved character classes specified in
933
+ # <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.
934
+ #
935
+ # @return [String] The encoded component.
936
+ #
937
+ # @example
938
+ # Addressable::URI.encode_component("simple/example", "b-zB-Z0-9")
939
+ # => "simple%2Fex%61mple"
940
+ # Addressable::URI.encode_component("simple/example", /[^b-zB-Z0-9]/)
941
+ # => "simple%2Fex%61mple"
942
+ # Addressable::URI.encode_component(
943
+ # "simple/example", Addressable::URI::CharacterClasses::UNRESERVED
944
+ # )
945
+ # => "simple%2Fexample"
946
+ def self.encode_component(component, character_class=
947
+ CharacterClasses::RESERVED + CharacterClasses::UNRESERVED)
948
+ return nil if component.nil?
949
+ if !component.respond_to?(:to_str)
950
+ raise TypeError, "Can't convert #{component.class} into String."
951
+ end
952
+ component = component.to_str
953
+ if ![String, Regexp].include?(character_class.class)
954
+ raise TypeError,
955
+ "Expected String or Regexp, got #{character_class.inspect}"
956
+ end
957
+ if character_class.kind_of?(String)
958
+ character_class = /[^#{character_class}]/
959
+ end
960
+ return component.gsub(character_class) do |sequence|
961
+ (sequence.unpack('C*').map { |c| "%#{c.to_s(16).upcase}" }).join("")
962
+ end
963
+ end
964
+
965
+ class << self
966
+ alias_method :encode_component, :encode_component
967
+ end
968
+
969
+ ##
970
+ # Unencodes any percent encoded characters within a URI component.
971
+ # This method may be used for unencoding either components or full URIs,
972
+ # however, it is recommended to use the <tt>unencode_component</tt> alias
973
+ # when unencoding components.
974
+ #
975
+ # @param [String, Addressable::URI, #to_str] uri
976
+ # The URI or component to unencode.
977
+ #
978
+ # @param [Class] returning
979
+ # The type of object to return. This value may only be set to
980
+ # <tt>String</tt> or <tt>Addressable::URI</tt>. All other values
981
+ # are invalid. Defaults to <tt>String</tt>.
982
+ #
983
+ # @return [String, Addressable::URI]
984
+ # The unencoded component or URI. The return type is determined by
985
+ # the <tt>returning</tt> parameter.
986
+ def self.unencode(uri, returning=String)
987
+ return nil if uri.nil?
988
+ if !uri.respond_to?(:to_str)
989
+ raise TypeError, "Can't convert #{uri.class} into String."
990
+ end
991
+ if ![String, ::Addressable::URI].include?(returning)
992
+ raise TypeError,
993
+ "Expected String or Addressable::URI, got #{returning.inspect}"
994
+ end
995
+ result = uri.to_str.gsub(/%[0-9a-f]{2}/i) do |sequence|
367
996
  sequence[1..3].to_i(16).chr
368
997
  end
998
+ result.force_encoding("utf-8") if result.respond_to?(:force_encoding)
999
+ if returning == String
1000
+ return result
1001
+ elsif returning == ::Addressable::URI
1002
+ return ::Addressable::URI.parse(result)
1003
+ end
369
1004
  end
370
-
371
- # Percent encodes any special characters in the URI. This method does
372
- # not take IRIs or IDNs into account.
373
- def self.encode(uri)
374
- uri_object = uri.kind_of?(self) ? uri : self.parse(uri.to_s)
375
- return Addressable::URI.new(
376
- self.encode_segment(uri_object.scheme,
1005
+
1006
+ class << self
1007
+ alias_method :unescape, :unencode
1008
+ alias_method :unencode_component, :unencode
1009
+ alias_method :unescape_component, :unencode
1010
+ end
1011
+
1012
+ ##
1013
+ # Percent encodes any special characters in the URI.
1014
+ #
1015
+ # @param [String, Addressable::URI, #to_str] uri
1016
+ # The URI to encode.
1017
+ #
1018
+ # @param [Class] returning
1019
+ # The type of object to return. This value may only be set to
1020
+ # <tt>String</tt> or <tt>Addressable::URI</tt>. All other values
1021
+ # are invalid. Defaults to <tt>String</tt>.
1022
+ #
1023
+ # @return [String, Addressable::URI]
1024
+ # The encoded URI. The return type is determined by
1025
+ # the <tt>returning</tt> parameter.
1026
+ def self.encode(uri, returning=String)
1027
+ return nil if uri.nil?
1028
+ if !uri.respond_to?(:to_str)
1029
+ raise TypeError, "Can't convert #{uri.class} into String."
1030
+ end
1031
+ if ![String, ::Addressable::URI].include?(returning)
1032
+ raise TypeError,
1033
+ "Expected String or Addressable::URI, got #{returning.inspect}"
1034
+ end
1035
+ uri_object = uri.kind_of?(self) ? uri : self.parse(uri.to_str)
1036
+ encoded_uri = Addressable::URI.new(
1037
+ :scheme => self.encode_component(uri_object.scheme,
377
1038
  Addressable::URI::CharacterClasses::SCHEME),
378
- self.encode_segment(uri_object.user,
379
- Addressable::URI::CharacterClasses::AUTHORITY),
380
- self.encode_segment(uri_object.password,
381
- Addressable::URI::CharacterClasses::AUTHORITY),
382
- self.encode_segment(uri_object.host,
383
- Addressable::URI::CharacterClasses::AUTHORITY),
384
- self.encode_segment(uri_object.specified_port,
1039
+ :authority => self.encode_component(uri_object.authority,
385
1040
  Addressable::URI::CharacterClasses::AUTHORITY),
386
- self.encode_segment(uri_object.path,
1041
+ :path => self.encode_component(uri_object.path,
387
1042
  Addressable::URI::CharacterClasses::PATH),
388
- self.encode_segment(uri_object.query,
1043
+ :query => self.encode_component(uri_object.query,
389
1044
  Addressable::URI::CharacterClasses::QUERY),
390
- self.encode_segment(uri_object.fragment,
1045
+ :fragment => self.encode_component(uri_object.fragment,
391
1046
  Addressable::URI::CharacterClasses::FRAGMENT)
392
- ).to_s
1047
+ )
1048
+ if returning == String
1049
+ return encoded_uri.to_s
1050
+ elsif returning == ::Addressable::URI
1051
+ return encoded_uri
1052
+ end
393
1053
  end
394
-
1054
+
395
1055
  class << self
396
1056
  alias_method :escape, :encode
397
1057
  end
398
-
1058
+
1059
+ ##
399
1060
  # Normalizes the encoding of a URI. Characters within a hostname are
400
1061
  # not percent encoded to allow for internationalized domain names.
401
- def self.normalized_encode(uri)
402
- uri_object = uri.kind_of?(self) ? uri : self.parse(uri.to_s)
403
- segments = {
404
- :scheme => self.unencode_segment(uri_object.scheme),
405
- :user => self.unencode_segment(uri_object.user),
406
- :password => self.unencode_segment(uri_object.password),
407
- :host => self.unencode_segment(uri_object.host),
408
- :port => self.unencode_segment(uri_object.specified_port),
409
- :path => self.unencode_segment(uri_object.path),
410
- :query => self.unencode_segment(uri_object.query),
411
- :fragment => self.unencode_segment(uri_object.fragment)
1062
+ #
1063
+ # @param [String, Addressable::URI, #to_str] uri
1064
+ # The URI to encode.
1065
+ #
1066
+ # @param [Class] returning
1067
+ # The type of object to return. This value may only be set to
1068
+ # <tt>String</tt> or <tt>Addressable::URI</tt>. All other values
1069
+ # are invalid. Defaults to <tt>String</tt>.
1070
+ #
1071
+ # @return [String, Addressable::URI]
1072
+ # The encoded URI. The return type is determined by
1073
+ # the <tt>returning</tt> parameter.
1074
+ def self.normalized_encode(uri, returning=String)
1075
+ if !uri.respond_to?(:to_str)
1076
+ raise TypeError, "Can't convert #{uri.class} into String."
1077
+ end
1078
+ if ![String, ::Addressable::URI].include?(returning)
1079
+ raise TypeError,
1080
+ "Expected String or Addressable::URI, got #{returning.inspect}"
1081
+ end
1082
+ uri_object = uri.kind_of?(self) ? uri : self.parse(uri.to_str)
1083
+ components = {
1084
+ :scheme => self.unencode_component(uri_object.scheme),
1085
+ :user => self.unencode_component(uri_object.user),
1086
+ :password => self.unencode_component(uri_object.password),
1087
+ :host => self.unencode_component(uri_object.host),
1088
+ :port => uri_object.port,
1089
+ :path => self.unencode_component(uri_object.path),
1090
+ :query => self.unencode_component(uri_object.query),
1091
+ :fragment => self.unencode_component(uri_object.fragment)
412
1092
  }
413
- if URI::IDNA.send(:use_libidn?)
414
- segments.each do |key, value|
415
- if value != nil
416
- segments[key] = IDN::Stringprep.nfkc_normalize(value.to_s)
417
- end
1093
+ components.each do |key, value|
1094
+ if value != nil
1095
+ components[key] = Addressable::IDNA.unicode_normalize_kc(value.to_s)
418
1096
  end
419
1097
  end
420
- return Addressable::URI.new(
421
- self.encode_segment(segments[:scheme],
1098
+ encoded_uri = Addressable::URI.new(
1099
+ :scheme => self.encode_component(components[:scheme],
422
1100
  Addressable::URI::CharacterClasses::SCHEME),
423
- self.encode_segment(segments[:user],
1101
+ :user => self.encode_component(components[:user],
424
1102
  Addressable::URI::CharacterClasses::AUTHORITY),
425
- self.encode_segment(segments[:password],
1103
+ :password => self.encode_component(components[:password],
426
1104
  Addressable::URI::CharacterClasses::AUTHORITY),
427
- segments[:host],
428
- segments[:port],
429
- self.encode_segment(segments[:path],
1105
+ :host => components[:host],
1106
+ :port => components[:port],
1107
+ :path => self.encode_component(components[:path],
430
1108
  Addressable::URI::CharacterClasses::PATH),
431
- self.encode_segment(segments[:query],
1109
+ :query => self.encode_component(components[:query],
432
1110
  Addressable::URI::CharacterClasses::QUERY),
433
- self.encode_segment(segments[:fragment],
1111
+ :fragment => self.encode_component(components[:fragment],
434
1112
  Addressable::URI::CharacterClasses::FRAGMENT)
435
- ).to_s
1113
+ )
1114
+ if returning == String
1115
+ return encoded_uri.to_s
1116
+ elsif returning == ::Addressable::URI
1117
+ return encoded_uri
1118
+ end
436
1119
  end
437
1120
 
1121
+ ##
438
1122
  # Extracts uris from an arbitrary body of text.
1123
+ #
1124
+ # @param [String, #to_str] text
1125
+ # The body of text to extract URIs from.
1126
+ #
1127
+ # @option [String, Addressable::URI, #to_str] base
1128
+ # Causes any relative URIs to be resolved against the base URI.
1129
+ #
1130
+ # @option [TrueClass, FalseClass] parse
1131
+ # If parse is true, all extracted URIs will be parsed. If parse is
1132
+ # false, the return value with be an <tt>Array</tt> of <tt>Strings</aa>.
1133
+ # Defaults to false.
1134
+ #
1135
+ # @return [Array] The extracted URIs.
439
1136
  def self.extract(text, options={})
440
- defaults = {:base => nil, :parse => false}
1137
+ defaults = {:base => nil, :parse => false}
441
1138
  options = defaults.merge(options)
442
1139
  raise InvalidOptionError unless (options.keys - defaults.keys).empty?
443
1140
  # This regular expression needs to be less forgiving or else it would
@@ -470,16 +1167,10 @@ module Addressable
470
1167
  nil
471
1168
  end
472
1169
  end
473
- parsed_uris.reject! do |uri|
474
- (uri.scheme =~ /T\d+/ ||
475
- uri.scheme == "xmlns" ||
476
- uri.scheme == "xml" ||
477
- uri.scheme == "thr" ||
478
- uri.scheme == "this" ||
479
- uri.scheme == "float" ||
480
- uri.scheme == "user" ||
481
- uri.scheme == "username" ||
482
- uri.scheme == "out")
1170
+ parsed_uris = parsed_uris.select do |uri|
1171
+ (self.ip_based_schemes | [
1172
+ "file", "git", "svn", "mailto", "tel"
1173
+ ]).include?(uri.normalized_scheme)
483
1174
  end
484
1175
  if options[:parse]
485
1176
  return parsed_uris
@@ -487,51 +1178,126 @@ module Addressable
487
1178
  return parsed_uris.collect { |uri| uri.to_s }
488
1179
  end
489
1180
  end
490
-
491
- # Creates a new uri object from component parts. Passing nil for
492
- # any of these parameters is acceptable.
493
- def initialize(scheme, user, password, host, port, path, query, fragment)
494
- @scheme = scheme
495
- @scheme = nil if @scheme.to_s.strip == ""
496
- @user = user
497
- @password = password
498
- @host = host
499
- @specified_port = port.to_s
500
- @port = port.kind_of?(Fixnum) ? port.to_s : port
501
- if @port != nil && !(@port =~ /^\d+$/)
502
- raise InvalidURIError,
503
- "Invalid port number: #{@port.inspect}"
1181
+
1182
+ ##
1183
+ # Creates a new uri object from component parts.
1184
+ #
1185
+ # @option [String, #to_str] scheme The scheme component.
1186
+ # @option [String, #to_str] user The user component.
1187
+ # @option [String, #to_str] password The password component.
1188
+ # @option [String, #to_str] userinfo
1189
+ # The userinfo component. If this is supplied, the user and password
1190
+ # components must be omitted.
1191
+ # @option [String, #to_str] host The host component.
1192
+ # @option [String, #to_str] port The port component.
1193
+ # @option [String, #to_str] authority
1194
+ # The authority component. If this is supplied, the user, password,
1195
+ # userinfo, host, and port components must be omitted.
1196
+ # @option [String, #to_str] path The path component.
1197
+ # @option [String, #to_str] query The query component.
1198
+ # @option [String, #to_str] fragment The fragment component.
1199
+ #
1200
+ # @return [Addressable::URI] The constructed URI object.
1201
+ def initialize(options={})
1202
+ if options.has_key?(:authority)
1203
+ if (options.keys & [:userinfo, :user, :password, :host, :port]).any?
1204
+ raise ArgumentError,
1205
+ "Cannot specify both an authority and any of the components " +
1206
+ "within the authority."
1207
+ end
504
1208
  end
505
- @port = @port.to_i
506
- @port = nil if @port == 0
507
- @path = (path || "")
508
- if @path != "" && @path[0..0] != "/" && @host != nil
509
- @path = "/#{@path}"
1209
+ if options.has_key?(:userinfo)
1210
+ if (options.keys & [:user, :password]).any?
1211
+ raise ArgumentError,
1212
+ "Cannot specify both a userinfo and either the user or password."
1213
+ end
510
1214
  end
511
- @query = query
512
- @fragment = fragment
513
1215
 
514
- validate()
1216
+ self.validation_deferred = true
1217
+ self.scheme = options[:scheme] if options[:scheme]
1218
+ self.user = options[:user] if options[:user]
1219
+ self.password = options[:password] if options[:password]
1220
+ self.userinfo = options[:userinfo] if options[:userinfo]
1221
+ self.host = options[:host] if options[:host]
1222
+ self.port = options[:port] if options[:port]
1223
+ self.authority = options[:authority] if options[:authority]
1224
+ self.path = options[:path] if options[:path]
1225
+ self.query = options[:query] if options[:query]
1226
+ self.fragment = options[:fragment] if options[:fragment]
1227
+ self.validation_deferred = false
515
1228
  end
516
-
517
- # Returns the scheme (protocol) for this URI.
1229
+
1230
+ ##
1231
+ # The scheme component for this URI.
1232
+ #
1233
+ # @return [String] The scheme component.
518
1234
  def scheme
519
1235
  return @scheme
520
1236
  end
521
-
522
- # Sets the scheme (protocol for this URI.)
1237
+
1238
+ ##
1239
+ # The scheme component for this URI, normalized.
1240
+ #
1241
+ # @return [String] The scheme component, normalized.
1242
+ def normalized_scheme
1243
+ @normalized_scheme ||= (begin
1244
+ if self.scheme != nil
1245
+ if self.scheme =~ /^\s*ssh\+svn\s*$/i
1246
+ "svn+ssh"
1247
+ else
1248
+ self.scheme.strip.downcase
1249
+ end
1250
+ else
1251
+ nil
1252
+ end
1253
+ end)
1254
+ end
1255
+
1256
+ ##
1257
+ # Sets the scheme component for this URI.
1258
+ #
1259
+ # @param [String, #to_str] new_scheme The new scheme component.
523
1260
  def scheme=(new_scheme)
524
- @scheme = new_scheme
1261
+ @scheme = new_scheme ? new_scheme.to_str : nil
1262
+ @scheme = nil if @scheme.to_s.strip == ""
1263
+
1264
+ # Reset dependant values
1265
+ @normalized_scheme = nil
525
1266
  end
526
-
527
- # Returns the user for this URI.
1267
+
1268
+ ##
1269
+ # The user component for this URI.
1270
+ #
1271
+ # @return [String] The user component.
528
1272
  def user
529
1273
  return @user
530
1274
  end
531
-
532
- # Sets the user for this URI.
1275
+
1276
+ ##
1277
+ # The user component for this URI, normalized.
1278
+ #
1279
+ # @return [String] The user component, normalized.
1280
+ def normalized_user
1281
+ @normalized_user ||= (begin
1282
+ if self.user
1283
+ if normalized_scheme =~ /https?/ && self.user.strip == "" &&
1284
+ (!self.password || self.password.strip == "")
1285
+ nil
1286
+ else
1287
+ self.user.strip
1288
+ end
1289
+ else
1290
+ nil
1291
+ end
1292
+ end)
1293
+ end
1294
+
1295
+ ##
1296
+ # Sets the user component for this URI.
1297
+ #
1298
+ # @param [String, #to_str] new_user The new user component.
533
1299
  def user=(new_user)
534
- @user = new_user
1300
+ @user = new_user ? new_user.to_str : nil
535
1301
 
536
1302
  # You can't have a nil user with a non-nil password
537
1303
  if @password != nil
@@ -540,20 +1306,47 @@ module Addressable
540
1306
 
541
1307
  # Reset dependant values
542
1308
  @userinfo = nil
1309
+ @normalized_userinfo = nil
543
1310
  @authority = nil
1311
+ @normalized_user = nil
544
1312
 
545
1313
  # Ensure we haven't created an invalid URI
546
1314
  validate()
547
1315
  end
548
-
549
- # Returns the password for this URI.
1316
+
1317
+ ##
1318
+ # The password component for this URI.
1319
+ #
1320
+ # @return [String] The password component.
550
1321
  def password
551
1322
  return @password
552
1323
  end
553
1324
 
554
- # Sets the password for this URI.
1325
+ ##
1326
+ # The password component for this URI, normalized.
1327
+ #
1328
+ # @return [String] The password component, normalized.
1329
+ def normalized_password
1330
+ @normalized_password ||= (begin
1331
+ if self.password
1332
+ if normalized_scheme =~ /https?/ && self.password.strip == "" &&
1333
+ (!self.user || self.user.strip == "")
1334
+ nil
1335
+ else
1336
+ self.password.strip
1337
+ end
1338
+ else
1339
+ nil
1340
+ end
1341
+ end)
1342
+ end
1343
+
1344
+ ##
1345
+ # Sets the password component for this URI.
1346
+ #
1347
+ # @param [String, #to_str] new_password The new password component.
555
1348
  def password=(new_password)
556
- @password = new_password
1349
+ @password = new_password ? new_password.to_str : nil
557
1350
 
558
1351
  # You can't have a nil user with a non-nil password
559
1352
  if @password != nil
@@ -562,33 +1355,65 @@ module Addressable
562
1355
 
563
1356
  # Reset dependant values
564
1357
  @userinfo = nil
1358
+ @normalized_userinfo = nil
565
1359
  @authority = nil
1360
+ @normalized_password = nil
566
1361
 
567
1362
  # Ensure we haven't created an invalid URI
568
1363
  validate()
569
1364
  end
570
-
571
- # Returns the username and password segment of this URI.
1365
+
1366
+ ##
1367
+ # The userinfo component for this URI.
1368
+ # Combines the user and password components.
1369
+ #
1370
+ # @return [String] The userinfo component.
572
1371
  def userinfo
573
- if !defined?(@userinfo) || @userinfo == nil
1372
+ @userinfo ||= (begin
574
1373
  current_user = self.user
575
1374
  current_password = self.password
576
1375
  if !current_user && !current_password
577
- @userinfo = nil
1376
+ nil
1377
+ elsif current_user && current_password
1378
+ "#{current_user}:#{current_password}"
1379
+ elsif current_user && !current_password
1380
+ "#{current_user}"
1381
+ end
1382
+ end)
1383
+ end
1384
+
1385
+ ##
1386
+ # The userinfo component for this URI, normalized.
1387
+ #
1388
+ # @return [String] The userinfo component, normalized.
1389
+ def normalized_userinfo
1390
+ @normalized_userinfo ||= (begin
1391
+ current_user = self.normalized_user
1392
+ current_password = self.normalized_password
1393
+ if !current_user && !current_password
1394
+ nil
578
1395
  elsif current_user && current_password
579
- @userinfo = "#{current_user}:#{current_password}"
1396
+ "#{current_user}:#{current_password}"
580
1397
  elsif current_user && !current_password
581
- @userinfo = "#{current_user}"
1398
+ "#{current_user}"
582
1399
  end
583
- end
584
- return @userinfo
1400
+ end)
585
1401
  end
586
-
587
- # Sets the username and password segment of this URI.
1402
+
1403
+ ##
1404
+ # Sets the userinfo component for this URI.
1405
+ #
1406
+ # @param [String, #to_str] new_userinfo The new userinfo component.
588
1407
  def userinfo=(new_userinfo)
589
- new_user = new_userinfo.to_s.strip.scan(/^(.*):/).flatten[0]
590
- new_password = new_userinfo.to_s.strip.scan(/:(.*)$/).flatten[0]
591
-
1408
+ new_user, new_password = if new_userinfo
1409
+ [
1410
+ new_userinfo.to_str.strip[/^(.*):/, 1],
1411
+ new_userinfo.to_str.strip[/:(.*)$/, 1]
1412
+ ]
1413
+ else
1414
+ [nil, nil]
1415
+ end
1416
+
592
1417
  # Password assigned first to ensure validity in case of nil
593
1418
  self.password = new_password
594
1419
  self.user = new_user
@@ -599,62 +1424,129 @@ module Addressable
599
1424
  # Ensure we haven't created an invalid URI
600
1425
  validate()
601
1426
  end
602
-
603
- # Returns the host for this URI.
1427
+
1428
+ ##
1429
+ # The host component for this URI.
1430
+ #
1431
+ # @return [String] The host component.
604
1432
  def host
605
1433
  return @host
606
1434
  end
607
-
608
- # Sets the host for this URI.
1435
+
1436
+ ##
1437
+ # The host component for this URI, normalized.
1438
+ #
1439
+ # @return [String] The host component, normalized.
1440
+ def normalized_host
1441
+ @normalized_host ||= (begin
1442
+ if self.host != nil
1443
+ if self.host.strip != ""
1444
+ result = ::Addressable::IDNA.to_ascii(
1445
+ self.class.unencode_component(self.host.strip.downcase)
1446
+ )
1447
+ if result[-1..-1] == "."
1448
+ # Trailing dots are unnecessary
1449
+ result = result[0...-1]
1450
+ end
1451
+ result
1452
+ else
1453
+ ""
1454
+ end
1455
+ else
1456
+ nil
1457
+ end
1458
+ end)
1459
+ end
1460
+
1461
+ ##
1462
+ # Sets the host component for this URI.
1463
+ #
1464
+ # @param [String, #to_str] new_host The new host component.
609
1465
  def host=(new_host)
610
- @host = new_host
1466
+ @host = new_host ? new_host.to_str : nil
611
1467
 
612
1468
  # Reset dependant values
613
1469
  @authority = nil
1470
+ @normalized_host = nil
614
1471
 
615
1472
  # Ensure we haven't created an invalid URI
616
1473
  validate()
617
1474
  end
618
-
619
- # Returns the authority segment of this URI.
1475
+
1476
+ ##
1477
+ # The authority component for this URI.
1478
+ # Combines the user, password, host, and port components.
1479
+ #
1480
+ # @return [String] The authority component.
620
1481
  def authority
621
- if !defined?(@authority) || @authority.nil?
622
- return nil if self.host.nil?
623
- @authority = ""
624
- if self.userinfo != nil
625
- @authority << "#{self.userinfo}@"
1482
+ @authority ||= (begin
1483
+ if self.host.nil?
1484
+ nil
1485
+ else
1486
+ authority = ""
1487
+ if self.userinfo != nil
1488
+ authority << "#{self.userinfo}@"
1489
+ end
1490
+ authority << self.host
1491
+ if self.port != nil
1492
+ authority << ":#{self.port}"
1493
+ end
1494
+ authority
626
1495
  end
627
- @authority << self.host
628
- if self.specified_port != nil
629
- @authority << ":#{self.specified_port}"
1496
+ end)
1497
+ end
1498
+
1499
+ ##
1500
+ # The authority component for this URI, normalized.
1501
+ #
1502
+ # @return [String] The authority component, normalized.
1503
+ def normalized_authority
1504
+ @normalized_authority ||= (begin
1505
+ if self.normalized_host.nil?
1506
+ nil
1507
+ else
1508
+ authority = ""
1509
+ if self.normalized_userinfo != nil
1510
+ authority << "#{self.normalized_userinfo}@"
1511
+ end
1512
+ authority << self.normalized_host
1513
+ if self.normalized_port != nil
1514
+ authority << ":#{self.normalized_port}"
1515
+ end
1516
+ authority
630
1517
  end
631
- end
632
- return @authority
1518
+ end)
633
1519
  end
634
-
635
- # Sets the authority segment of this URI.
1520
+
1521
+ ##
1522
+ # Sets the authority component for this URI.
1523
+ #
1524
+ # @param [String, #to_str] new_authority The new authority component.
636
1525
  def authority=(new_authority)
637
1526
  if new_authority
638
- new_userinfo = new_authority.scan(/^([^\[\]]*)@/).flatten[0]
1527
+ new_authority = new_authority.to_str
1528
+ new_userinfo = new_authority[/^([^\[\]]*)@/, 1]
639
1529
  if new_userinfo
640
- new_user = new_userinfo.strip.scan(/^([^:]*):?/).flatten[0]
641
- new_password = new_userinfo.strip.scan(/:(.*)$/).flatten[0]
1530
+ new_user = new_userinfo.strip[/^([^:]*):?/, 1]
1531
+ new_password = new_userinfo.strip[/:(.*)$/, 1]
642
1532
  end
643
1533
  new_host =
644
1534
  new_authority.gsub(/^([^\[\]]*)@/, "").gsub(/:([^:@\[\]]*?)$/, "")
645
1535
  new_port =
646
- new_authority.scan(/:([^:@\[\]]*?)$/).flatten[0]
1536
+ new_authority[/:([^:@\[\]]*?)$/, 1]
647
1537
  end
648
-
1538
+
649
1539
  # Password assigned first to ensure validity in case of nil
650
1540
  self.password = new_password
651
1541
  self.user = new_user
652
1542
  self.host = new_host
653
-
654
- # Port reset to allow port normalization
655
- @port = nil
656
- @specified_port = new_port
657
-
1543
+ self.port = new_port
1544
+
1545
+ # Reset dependant values
1546
+ @inferred_port = nil
1547
+ @userinfo = nil
1548
+ @normalized_userinfo = nil
1549
+
658
1550
  # Ensure we haven't created an invalid URI
659
1551
  validate()
660
1552
  end
@@ -663,112 +1555,311 @@ module Addressable
663
1555
  # use a similar URI form:
664
1556
  # //<user>:<password>@<host>:<port>/<url-path>
665
1557
  def self.ip_based_schemes
666
- return self.scheme_mapping.keys
1558
+ return self.port_mapping.keys
667
1559
  end
668
1560
 
669
1561
  # Returns a hash of common IP-based schemes and their default port
670
1562
  # numbers. Adding new schemes to this hash, as necessary, will allow
671
1563
  # for better URI normalization.
672
- def self.scheme_mapping
673
- if !defined?(@protocol_mapping) || @protocol_mapping.nil?
674
- @protocol_mapping = {
675
- "http" => 80,
676
- "https" => 443,
677
- "ftp" => 21,
678
- "tftp" => 69,
679
- "ssh" => 22,
680
- "svn+ssh" => 22,
681
- "telnet" => 23,
682
- "nntp" => 119,
683
- "gopher" => 70,
684
- "wais" => 210,
685
- "ldap" => 389,
686
- "prospero" => 1525
687
- }
688
- end
689
- return @protocol_mapping
690
- end
691
-
692
- # Returns the port number for this URI. This method will normalize to the
693
- # default port for the URI's scheme if the port isn't explicitly specified
694
- # in the URI.
1564
+ def self.port_mapping
1565
+ @port_mapping ||= {
1566
+ "http" => 80,
1567
+ "https" => 443,
1568
+ "ftp" => 21,
1569
+ "tftp" => 69,
1570
+ "sftp" => 22,
1571
+ "ssh" => 22,
1572
+ "svn+ssh" => 22,
1573
+ "telnet" => 23,
1574
+ "nntp" => 119,
1575
+ "gopher" => 70,
1576
+ "wais" => 210,
1577
+ "ldap" => 389,
1578
+ "prospero" => 1525
1579
+ }
1580
+ end
1581
+
1582
+ ##
1583
+ # The port component for this URI.
1584
+ # This is the port number actually given in the URI. This does not
1585
+ # infer port numbers from default values.
1586
+ #
1587
+ # @return [Integer] The port component.
695
1588
  def port
696
- if @port.to_i == 0
697
- if self.scheme
698
- @port = self.class.scheme_mapping[self.scheme.strip.downcase]
1589
+ return @port
1590
+ end
1591
+
1592
+ ##
1593
+ # The port component for this URI, normalized.
1594
+ #
1595
+ # @return [Integer] The port component, normalized.
1596
+ def normalized_port
1597
+ @normalized_port ||= (begin
1598
+ if self.class.port_mapping[normalized_scheme] == self.port
1599
+ nil
699
1600
  else
700
- @port = nil
1601
+ self.port
701
1602
  end
702
- return @port
703
- else
704
- @port = @port.to_i
705
- return @port
706
- end
1603
+ end)
707
1604
  end
708
-
709
- # Sets the port for this URI.
1605
+
1606
+ ##
1607
+ # Sets the port component for this URI.
1608
+ #
1609
+ # @param [String, Integer, #to_s] new_port The new port component.
710
1610
  def port=(new_port)
1611
+ if new_port != nil && !(new_port.to_s =~ /^\d+$/)
1612
+ raise InvalidURIError,
1613
+ "Invalid port number: #{new_port.inspect}"
1614
+ end
1615
+
711
1616
  @port = new_port.to_s.to_i
712
- @specified_port = @port
1617
+ @port = nil if @port == 0
1618
+
1619
+ # Reset dependant values
713
1620
  @authority = nil
1621
+ @inferred_port = nil
1622
+ @normalized_port = nil
1623
+
1624
+ # Ensure we haven't created an invalid URI
1625
+ validate()
714
1626
  end
715
-
716
- # Returns the port number that was actually specified in the URI string.
717
- def specified_port
718
- port = @specified_port.to_s.to_i
719
- if port == 0
720
- return nil
721
- else
722
- return port
723
- end
1627
+
1628
+ ##
1629
+ # The inferred port component for this URI.
1630
+ # This method will normalize to the default port for the URI's scheme if
1631
+ # the port isn't explicitly specified in the URI.
1632
+ #
1633
+ # @return [Integer] The inferred port component.
1634
+ def inferred_port
1635
+ @inferred_port ||= (begin
1636
+ if port.to_i == 0
1637
+ if scheme
1638
+ self.class.port_mapping[scheme.strip.downcase]
1639
+ else
1640
+ nil
1641
+ end
1642
+ else
1643
+ port.to_i
1644
+ end
1645
+ end)
724
1646
  end
725
-
726
- # Returns the path for this URI.
1647
+
1648
+ ##
1649
+ # The path component for this URI.
1650
+ #
1651
+ # @return [String] The path component.
727
1652
  def path
728
- return @path
1653
+ return (@path || "")
1654
+ end
1655
+
1656
+ ##
1657
+ # The path component for this URI, normalized.
1658
+ #
1659
+ # @return [String] The path component, normalized.
1660
+ def normalized_path
1661
+ @normalized_path ||= (begin
1662
+ result = self.class.normalize_path(self.path.strip)
1663
+ if result == "" &&
1664
+ ["http", "https", "ftp", "tftp"].include?(self.normalized_scheme)
1665
+ result = "/"
1666
+ end
1667
+ result
1668
+ end)
729
1669
  end
730
-
731
- # Sets the path for this URI.
1670
+
1671
+ ##
1672
+ # Sets the path component for this URI.
1673
+ #
1674
+ # @param [String, #to_str] new_path The new path component.
732
1675
  def path=(new_path)
733
- @path = (new_path || "")
1676
+ @path = (new_path || "").to_str
1677
+ if @path != "" && @path[0..0] != "/" && host != nil
1678
+ @path = "/#{@path}"
1679
+ end
1680
+
1681
+ # Reset dependant values
1682
+ @normalized_path = nil
734
1683
  end
735
1684
 
736
- # Returns the basename, if any, of the file at the path being referenced.
737
- # Returns nil if there is no path component.
1685
+ ##
1686
+ # The basename, if any, of the file in the path component.
1687
+ #
1688
+ # @return [String] The path's basename.
738
1689
  def basename
739
1690
  # Path cannot be nil
740
1691
  return File.basename(self.path).gsub(/;[^\/]*$/, "")
741
1692
  end
742
-
743
- # Returns the extension, if any, of the file at the path being referenced.
744
- # Returns "" if there is no extension or nil if there is no path
745
- # component.
1693
+
1694
+ ##
1695
+ # The extname, if any, of the file in the path component.
1696
+ # Empty string if there is no extension.
1697
+ #
1698
+ # @return [String] The path's extname.
746
1699
  def extname
747
1700
  return nil unless self.path
748
1701
  return File.extname(self.basename)
749
1702
  end
750
-
751
- # Returns the query string for this URI.
1703
+
1704
+ ##
1705
+ # The query component for this URI.
1706
+ #
1707
+ # @return [String] The query component.
752
1708
  def query
753
1709
  return @query
754
1710
  end
755
-
756
- # Sets the query string for this URI.
1711
+
1712
+ ##
1713
+ # The query component for this URI, normalized.
1714
+ #
1715
+ # @return [String] The query component, normalized.
1716
+ def normalized_query
1717
+ @normalized_query ||= (self.query ? self.query.strip : nil)
1718
+ end
1719
+
1720
+ ##
1721
+ # Sets the query component for this URI.
1722
+ #
1723
+ # @param [String, #to_str] new_query The new query component.
757
1724
  def query=(new_query)
758
- @query = new_query
1725
+ @query = new_query.to_str
1726
+
1727
+ # Reset dependant values
1728
+ @normalized_query = nil
1729
+ end
1730
+
1731
+ ##
1732
+ # Converts the query component to a Hash value.
1733
+ #
1734
+ # @option [Symbol] notation
1735
+ # May be one of <tt>:flat</tt>, <tt>:dot</tt>, or <tt>:subscript</tt>.
1736
+ # The <tt>:dot</tt> notation is not supported for assignment.
1737
+ # Default value is <tt>:subscript</tt>.
1738
+ #
1739
+ # @return [Hash] The query string parsed as a Hash object.
1740
+ #
1741
+ # @example
1742
+ # Addressable::URI.parse("?one=1&two=2&three=3").query_values
1743
+ # #=> {"one" => "1", "two" => "2", "three" => "3"}
1744
+ # Addressable::URI.parse("?one[two][three]=four").query_values
1745
+ # #=> {"one" => {"two" => {"three" => "four"}}}
1746
+ # Addressable::URI.parse("?one.two.three=four").query_values(
1747
+ # :notation => :dot
1748
+ # )
1749
+ # #=> {"one" => {"two" => {"three" => "four"}}}
1750
+ # Addressable::URI.parse("?one[two][three]=four").query_values(
1751
+ # :notation => :flat
1752
+ # )
1753
+ # #=> {"one[two][three]" => "four"}
1754
+ # Addressable::URI.parse("?one.two.three=four").query_values(
1755
+ # :notation => :flat
1756
+ # )
1757
+ # #=> {"one.two.three" => "four"}
1758
+ # Addressable::URI.parse(
1759
+ # "?one[two][three][]=four&one[two][three][]=five"
1760
+ # ).query_values
1761
+ # #=> {"one" => {"two" => {"three" => ["four", "five"]}}}
1762
+ def query_values(options={})
1763
+ defaults = {:notation => :subscript}
1764
+ options = defaults.merge(options)
1765
+ if ![:flat, :dot, :subscript].include?(options[:notation])
1766
+ raise ArgumentError,
1767
+ "Invalid notation. Must be one of: [:flat, :dot, :subscript]."
1768
+ end
1769
+ return nil if self.query == nil
1770
+ return (self.query.split("&").map do |pair|
1771
+ pair.split("=")
1772
+ end).inject({}) do |accumulator, pair|
1773
+ key, value = pair
1774
+ value = true if value.nil?
1775
+ key = self.class.unencode_component(key)
1776
+ if value != true
1777
+ value = self.class.unencode_component(value).gsub(/\+/, " ")
1778
+ end
1779
+ if options[:notation] == :flat
1780
+ if accumulator[key]
1781
+ raise ArgumentError, "Key was repeated: #{key.inspect}"
1782
+ end
1783
+ accumulator[key] = value
1784
+ else
1785
+ if options[:notation] == :dot
1786
+ array_value = false
1787
+ subkeys = key.split(".")
1788
+ elsif options[:notation] == :subscript
1789
+ array_value = !!(key =~ /\[\]$/)
1790
+ subkeys = key.split(/[\[\]]+/)
1791
+ end
1792
+ current_hash = accumulator
1793
+ for i in 0...(subkeys.size - 1)
1794
+ subkey = subkeys[i]
1795
+ current_hash[subkey] = {} unless current_hash[subkey]
1796
+ current_hash = current_hash[subkey]
1797
+ end
1798
+ if array_value
1799
+ current_hash[subkeys.last] = [] unless current_hash[subkeys.last]
1800
+ current_hash[subkeys.last] << value
1801
+ else
1802
+ current_hash[subkeys.last] = value
1803
+ end
1804
+ end
1805
+ accumulator
1806
+ end
1807
+ end
1808
+
1809
+ ##
1810
+ # Sets the query component for this URI from a Hash object.
1811
+ #
1812
+ # @param [Hash, #to_hash] new_query_values The new query values.
1813
+ def query_values=(new_query_values)
1814
+ @query = (new_query_values.to_hash.inject([]) do |accumulator, pair|
1815
+ key, value = pair
1816
+ key = self.class.encode_component(key, CharacterClasses::UNRESERVED)
1817
+ if value == true
1818
+ accumulator << "#{key}"
1819
+ else
1820
+ value = self.class.encode_component(
1821
+ value, CharacterClasses::UNRESERVED)
1822
+ accumulator << "#{key}=#{value}"
1823
+ end
1824
+ end).join("&")
1825
+
1826
+ # Reset dependant values
1827
+ @normalized_query = nil
759
1828
  end
760
-
761
- # Returns the fragment for this URI.
1829
+
1830
+ ##
1831
+ # The fragment component for this URI.
1832
+ #
1833
+ # @return [String] The fragment component.
762
1834
  def fragment
763
1835
  return @fragment
764
1836
  end
765
-
766
- # Sets the fragment for this URI.
1837
+
1838
+ ##
1839
+ # The fragment component for this URI, normalized.
1840
+ #
1841
+ # @return [String] The fragment component, normalized.
1842
+ def normalized_fragment
1843
+ @normalized_fragment ||= (self.fragment ? self.fragment.strip : nil)
1844
+ end
1845
+
1846
+ ##
1847
+ # Sets the fragment component for this URI.
1848
+ #
1849
+ # @param [String, #to_str] new_fragment The new fragment component.
767
1850
  def fragment=(new_fragment)
768
- @fragment = new_fragment
1851
+ @fragment = new_fragment ? new_fragment.to_str : nil
1852
+
1853
+ # Reset dependant values
1854
+ @normalized_fragment = nil
769
1855
  end
770
-
771
- # Returns true if the URI uses an IP-based protocol.
1856
+
1857
+ ##
1858
+ # Determines if the scheme indicates an IP-based protocol.
1859
+ #
1860
+ # @return [TrueClass, FalseClass]
1861
+ # <tt>true</tt> if the scheme indicates an IP-based protocol.
1862
+ # <tt>false</tt> otherwise.
772
1863
  def ip_based?
773
1864
  if self.scheme
774
1865
  return self.class.ip_based_schemes.include?(
@@ -776,26 +1867,45 @@ module Addressable
776
1867
  end
777
1868
  return false
778
1869
  end
779
-
780
- # Returns true if this URI is known to be relative.
1870
+
1871
+ ##
1872
+ # Determines if the URI is relative.
1873
+ #
1874
+ # @return [TrueClass, FalseClass]
1875
+ # <tt>true</tt> if the URI is relative.
1876
+ # <tt>false</tt> otherwise.
781
1877
  def relative?
782
1878
  return self.scheme.nil?
783
1879
  end
784
-
785
- # Returns true if this URI is known to be absolute.
1880
+
1881
+ ##
1882
+ # Determines if the URI is absolute.
1883
+ #
1884
+ # @return [TrueClass, FalseClass]
1885
+ # <tt>true</tt> if the URI is absolute.
1886
+ # <tt>false</tt> otherwise.
786
1887
  def absolute?
787
1888
  return !relative?
788
1889
  end
789
-
1890
+
1891
+ ##
790
1892
  # Joins two URIs together.
791
- def +(uri)
1893
+ #
1894
+ # @param [String, Addressable::URI, #to_str] The URI to join with.
1895
+ #
1896
+ # @return [Addressable::URI] The joined URI.
1897
+ def join(uri)
1898
+ if !uri.respond_to?(:to_str)
1899
+ raise TypeError, "Can't convert #{uri.class} into String."
1900
+ end
792
1901
  if !uri.kind_of?(self.class)
793
- uri = URI.parse(uri.to_s)
1902
+ # Otherwise, convert to a String, then parse.
1903
+ uri = self.class.parse(uri.to_str)
794
1904
  end
795
1905
  if uri.to_s == ""
796
1906
  return self.dup
797
1907
  end
798
-
1908
+
799
1909
  joined_scheme = nil
800
1910
  joined_user = nil
801
1911
  joined_password = nil
@@ -804,14 +1914,14 @@ module Addressable
804
1914
  joined_path = nil
805
1915
  joined_query = nil
806
1916
  joined_fragment = nil
807
-
1917
+
808
1918
  # Section 5.2.2 of RFC 3986
809
1919
  if uri.scheme != nil
810
1920
  joined_scheme = uri.scheme
811
1921
  joined_user = uri.user
812
1922
  joined_password = uri.password
813
1923
  joined_host = uri.host
814
- joined_port = uri.specified_port
1924
+ joined_port = uri.port
815
1925
  joined_path = self.class.normalize_path(uri.path)
816
1926
  joined_query = uri.query
817
1927
  else
@@ -819,7 +1929,7 @@ module Addressable
819
1929
  joined_user = uri.user
820
1930
  joined_password = uri.password
821
1931
  joined_host = uri.host
822
- joined_port = uri.specified_port
1932
+ joined_port = uri.port
823
1933
  joined_path = self.class.normalize_path(uri.path)
824
1934
  joined_query = uri.query
825
1935
  else
@@ -846,13 +1956,13 @@ module Addressable
846
1956
  else
847
1957
  base_path = ""
848
1958
  end
849
-
1959
+
850
1960
  # If the base path is empty and an authority segment has been
851
1961
  # defined, use a base path of "/"
852
1962
  if base_path == "" && self.authority != nil
853
1963
  base_path = "/"
854
1964
  end
855
-
1965
+
856
1966
  joined_path = self.class.normalize_path(base_path + uri.path)
857
1967
  end
858
1968
  joined_query = uri.query
@@ -860,39 +1970,124 @@ module Addressable
860
1970
  joined_user = self.user
861
1971
  joined_password = self.password
862
1972
  joined_host = self.host
863
- joined_port = self.specified_port
1973
+ joined_port = self.port
864
1974
  end
865
1975
  joined_scheme = self.scheme
866
1976
  end
867
1977
  joined_fragment = uri.fragment
868
-
1978
+
869
1979
  return Addressable::URI.new(
870
- joined_scheme,
871
- joined_user,
872
- joined_password,
873
- joined_host,
874
- joined_port,
875
- joined_path,
876
- joined_query,
877
- joined_fragment
1980
+ :scheme => joined_scheme,
1981
+ :user => joined_user,
1982
+ :password => joined_password,
1983
+ :host => joined_host,
1984
+ :port => joined_port,
1985
+ :path => joined_path,
1986
+ :query => joined_query,
1987
+ :fragment => joined_fragment
878
1988
  )
879
1989
  end
880
-
881
- # Merges two URIs together.
882
- def merge(uri)
883
- return self + uri
1990
+ alias_method :+, :join
1991
+
1992
+ ##
1993
+ # Destructive form of <tt>join</tt>.
1994
+ #
1995
+ # @param [String, Addressable::URI, #to_str] The URI to join with.
1996
+ #
1997
+ # @return [Addressable::URI] The joined URI.
1998
+ #
1999
+ # @see Addressable::URI#join
2000
+ def join!(uri)
2001
+ replace_self(self.join(uri))
2002
+ end
2003
+
2004
+ ##
2005
+ # Merges a URI with a <tt>Hash</tt> of components.
2006
+ # This method has different behavior from <tt>join</tt>. Any components
2007
+ # present in the <tt>hash</tt> parameter will override the original
2008
+ # components. The path component is not treated specially.
2009
+ #
2010
+ # @param [Hash, Addressable::URI, #to_hash] The components to merge with.
2011
+ #
2012
+ # @return [Addressable::URI] The merged URI.
2013
+ #
2014
+ # @see Hash#merge
2015
+ def merge(hash)
2016
+ if !hash.respond_to?(:to_hash)
2017
+ raise TypeError, "Can't convert #{hash.class} into Hash."
2018
+ end
2019
+ hash = hash.to_hash
2020
+
2021
+ if hash.has_key?(:authority)
2022
+ if (hash.keys & [:userinfo, :user, :password, :host, :port]).any?
2023
+ raise ArgumentError,
2024
+ "Cannot specify both an authority and any of the components " +
2025
+ "within the authority."
2026
+ end
2027
+ end
2028
+ if hash.has_key?(:userinfo)
2029
+ if (hash.keys & [:user, :password]).any?
2030
+ raise ArgumentError,
2031
+ "Cannot specify both a userinfo and either the user or password."
2032
+ end
2033
+ end
2034
+
2035
+ uri = Addressable::URI.new
2036
+ uri.validation_deferred = true
2037
+ uri.scheme =
2038
+ hash.has_key?(:scheme) ? hash[:scheme] : self.scheme
2039
+ if hash.has_key?(:authority)
2040
+ uri.authority =
2041
+ hash.has_key?(:authority) ? hash[:authority] : self.authority
2042
+ end
2043
+ if hash.has_key?(:userinfo)
2044
+ uri.userinfo =
2045
+ hash.has_key?(:userinfo) ? hash[:userinfo] : self.userinfo
2046
+ end
2047
+ if !hash.has_key?(:userinfo) && !hash.has_key?(:authority)
2048
+ uri.user =
2049
+ hash.has_key?(:user) ? hash[:user] : self.user
2050
+ uri.password =
2051
+ hash.has_key?(:password) ? hash[:password] : self.password
2052
+ end
2053
+ if !hash.has_key?(:authority)
2054
+ uri.host =
2055
+ hash.has_key?(:host) ? hash[:host] : self.host
2056
+ uri.port =
2057
+ hash.has_key?(:port) ? hash[:port] : self.port
2058
+ end
2059
+ uri.path =
2060
+ hash.has_key?(:path) ? hash[:path] : self.path
2061
+ uri.query =
2062
+ hash.has_key?(:query) ? hash[:query] : self.query
2063
+ uri.fragment =
2064
+ hash.has_key?(:fragment) ? hash[:fragment] : self.fragment
2065
+ uri.validation_deferred = false
2066
+
2067
+ return uri
884
2068
  end
885
- alias_method :join, :merge
886
-
887
- # Destructive form of merge.
2069
+
2070
+ ##
2071
+ # Destructive form of <tt>merge</tt>.
2072
+ #
2073
+ # @param [Hash, Addressable::URI, #to_hash] The components to merge with.
2074
+ #
2075
+ # @return [Addressable::URI] The merged URI.
2076
+ #
2077
+ # @see Addressable::URI#merge
888
2078
  def merge!(uri)
889
2079
  replace_self(self.merge(uri))
890
2080
  end
891
- alias_method :join!, :merge!
892
-
2081
+
2082
+ ##
893
2083
  # Returns the shortest normalized relative form of this URI that uses the
894
2084
  # supplied URI as a base for resolution. Returns an absolute URI if
895
- # necessary.
2085
+ # necessary. This is effectively the opposite of <tt>route_to</tt>.
2086
+ #
2087
+ # @param [String, Addressable::URI, #to_str] uri The URI to route from.
2088
+ #
2089
+ # @return [Addressable::URI]
2090
+ # The normalized relative URI that is equivalent to the original URI.
896
2091
  def route_from(uri)
897
2092
  uri = self.class.parse(uri).normalize
898
2093
  normalized_self = self.normalize
@@ -905,50 +2100,57 @@ module Addressable
905
2100
  if normalized_self == uri
906
2101
  return Addressable::URI.parse("##{normalized_self.fragment}")
907
2102
  end
908
- segments = normalized_self.to_hash
2103
+ components = normalized_self.to_hash
909
2104
  if normalized_self.scheme == uri.scheme
910
- segments[:scheme] = nil
2105
+ components[:scheme] = nil
911
2106
  if normalized_self.authority == uri.authority
912
- segments[:user] = nil
913
- segments[:password] = nil
914
- segments[:host] = nil
915
- segments[:port] = nil
2107
+ components[:user] = nil
2108
+ components[:password] = nil
2109
+ components[:host] = nil
2110
+ components[:port] = nil
916
2111
  if normalized_self.path == uri.path
917
- segments[:path] = nil
2112
+ components[:path] = nil
918
2113
  if normalized_self.query == uri.query
919
- segments[:query] = nil
2114
+ components[:query] = nil
920
2115
  end
921
2116
  else
922
2117
  if uri.path != "/"
923
- segments[:path].gsub!(
2118
+ components[:path].gsub!(
924
2119
  Regexp.new("^" + Regexp.escape(uri.path)), "")
925
2120
  end
926
2121
  end
927
2122
  end
928
2123
  end
929
2124
  # Avoid network-path references.
930
- if segments[:host] != nil
931
- segments[:scheme] = normalized_self.scheme
2125
+ if components[:host] != nil
2126
+ components[:scheme] = normalized_self.scheme
932
2127
  end
933
2128
  return Addressable::URI.new(
934
- segments[:scheme],
935
- segments[:user],
936
- segments[:password],
937
- segments[:host],
938
- segments[:port],
939
- segments[:path],
940
- segments[:query],
941
- segments[:fragment]
2129
+ :scheme => components[:scheme],
2130
+ :user => components[:user],
2131
+ :password => components[:password],
2132
+ :host => components[:host],
2133
+ :port => components[:port],
2134
+ :path => components[:path],
2135
+ :query => components[:query],
2136
+ :fragment => components[:fragment]
942
2137
  )
943
2138
  end
944
-
2139
+
2140
+ ##
945
2141
  # Returns the shortest normalized relative form of the supplied URI that
946
2142
  # uses this URI as a base for resolution. Returns an absolute URI if
947
- # necessary.
2143
+ # necessary. This is effectively the opposite of <tt>route_from</tt>.
2144
+ #
2145
+ # @param [String, Addressable::URI, #to_str] uri The URI to route to.
2146
+ #
2147
+ # @return [Addressable::URI]
2148
+ # The normalized relative URI that is equivalent to the supplied URI.
948
2149
  def route_to(uri)
949
2150
  return self.class.parse(uri).route_from(self)
950
2151
  end
951
-
2152
+
2153
+ ##
952
2154
  # Returns a normalized URI object.
953
2155
  #
954
2156
  # NOTE: This method does not attempt to fully conform to specifications.
@@ -956,154 +2158,176 @@ module Addressable
956
2158
  # specifications, and also to deal with caching issues since several
957
2159
  # different URIs may represent the same resource and should not be
958
2160
  # cached multiple times.
2161
+ #
2162
+ # @return [Addressable::URI] The normalized URI.
959
2163
  def normalize
960
- normalized_scheme = nil
961
- normalized_scheme = self.scheme.strip.downcase if self.scheme != nil
962
- normalized_scheme = "svn+ssh" if normalized_scheme == "ssh+svn"
2164
+ # This is a special exception for the frequently misused feed
2165
+ # URI scheme.
963
2166
  if normalized_scheme == "feed"
964
2167
  if self.to_s =~ /^feed:\/*http:\/*/
965
2168
  return self.class.parse(
966
- self.to_s.scan(/^feed:\/*(http:\/*.*)/).flatten[0]).normalize
967
- end
968
- end
969
- normalized_user = nil
970
- normalized_user = self.user.strip if self.user != nil
971
- normalized_password = nil
972
- normalized_password = self.password.strip if self.password != nil
973
-
974
- # If we are using http or https and user/password are blank,
975
- # then we remove them
976
- if normalized_scheme =~ /https?/ && normalized_user == "" &&
977
- (!normalized_password || normalized_password == "")
978
- normalized_user = nil
979
- normalized_password = nil
980
- end
981
-
982
- normalized_host = nil
983
- normalized_host = self.host.strip.downcase if self.host != nil
984
- if normalized_host != nil
985
- begin
986
- normalized_host = URI::IDNA.to_ascii(normalized_host)
987
- rescue Exception
988
- nil
989
- end
990
- if normalized_host[-1..-1] == "."
991
- normalized_host = normalized_host[0...-1]
992
- end
993
- end
994
-
995
- normalized_port = self.port
996
- if self.class.scheme_mapping[normalized_scheme] == normalized_port
997
- normalized_port = nil
998
- end
999
- normalized_path = nil
1000
- normalized_path = self.path.strip if self.path != nil
1001
- if normalized_path != nil
1002
- normalized_path = self.class.normalize_path(normalized_path)
1003
- end
1004
- if normalized_path == ""
1005
- if ["http", "https", "ftp", "tftp"].include?(normalized_scheme)
1006
- normalized_path = "/"
2169
+ self.to_s[/^feed:\/*(http:\/*.*)/, 1]
2170
+ ).normalize
1007
2171
  end
1008
2172
  end
1009
2173
 
1010
- normalized_query = nil
1011
- normalized_query = self.query.strip if self.query != nil
1012
-
1013
- normalized_fragment = nil
1014
- normalized_fragment = self.fragment.strip if self.fragment != nil
1015
- return Addressable::URI.parse(
1016
- Addressable::URI.normalized_encode(Addressable::URI.new(
1017
- normalized_scheme,
1018
- normalized_user,
1019
- normalized_password,
1020
- normalized_host,
1021
- normalized_port,
1022
- normalized_path,
1023
- normalized_query,
1024
- normalized_fragment
1025
- )))
2174
+ return Addressable::URI.normalized_encode(
2175
+ Addressable::URI.new(
2176
+ :scheme => normalized_scheme,
2177
+ :authority => normalized_authority,
2178
+ :path => normalized_path,
2179
+ :query => normalized_query,
2180
+ :fragment => normalized_fragment
2181
+ ),
2182
+ ::Addressable::URI
2183
+ )
1026
2184
  end
1027
2185
 
2186
+ ##
1028
2187
  # Destructively normalizes this URI object.
2188
+ #
2189
+ # @return [Addressable::URI] The normalized URI.
2190
+ #
2191
+ # @see Addressable::URI#normalize
1029
2192
  def normalize!
1030
2193
  replace_self(self.normalize)
1031
2194
  end
1032
-
2195
+
2196
+ ##
1033
2197
  # Creates a URI suitable for display to users. If semantic attacks are
1034
2198
  # likely, the application should try to detect these and warn the user.
1035
- # See RFC 3986 section 7.6 for more information.
2199
+ # See <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>,
2200
+ # section 7.6 for more information.
2201
+ #
2202
+ # @return [Addressable::URI] A URI suitable for display purposes.
1036
2203
  def display_uri
1037
2204
  display_uri = self.normalize
1038
- begin
1039
- display_uri.instance_variable_set("@host",
1040
- URI::IDNA.to_unicode(display_uri.host))
1041
- rescue Exception
1042
- nil
1043
- end
2205
+ display_uri.instance_variable_set("@host",
2206
+ ::Addressable::IDNA.to_unicode(display_uri.host))
1044
2207
  return display_uri
1045
2208
  end
1046
-
1047
- # Returns true if the URI objects are equal. This method normalizes
1048
- # both URIs before doing the comparison, and allows comparison against
1049
- # strings.
2209
+
2210
+ ##
2211
+ # Returns <tt>true</tt> if the URI objects are equal. This method
2212
+ # normalizes both URIs before doing the comparison, and allows comparison
2213
+ # against <tt>Strings</tt>.
2214
+ #
2215
+ # @param [Object] uri The URI to compare.
2216
+ #
2217
+ # @return [TrueClass, FalseClass]
2218
+ # <tt>true</tt> if the URIs are equivalent, <tt>false</tt> otherwise.
1050
2219
  def ===(uri)
1051
2220
  if uri.respond_to?(:normalize)
1052
2221
  uri_string = uri.normalize.to_s
1053
2222
  else
1054
2223
  begin
1055
- uri_string = URI.parse(uri.to_s).normalize.to_s
1056
- rescue InvalidURIError
2224
+ uri_string = ::Addressable::URI.parse(uri).normalize.to_s
2225
+ rescue InvalidURIError, TypeError
1057
2226
  return false
1058
2227
  end
1059
2228
  end
1060
2229
  return self.normalize.to_s == uri_string
1061
2230
  end
1062
-
1063
- # Returns true if the URI objects are equal. This method normalizes
1064
- # both URIs before doing the comparison.
2231
+
2232
+ ##
2233
+ # Returns <tt>true</tt> if the URI objects are equal. This method
2234
+ # normalizes both URIs before doing the comparison.
2235
+ #
2236
+ # @param [Object] uri The URI to compare.
2237
+ #
2238
+ # @return [TrueClass, FalseClass]
2239
+ # <tt>true</tt> if the URIs are equivalent, <tt>false</tt> otherwise.
1065
2240
  def ==(uri)
1066
- return false unless uri.kind_of?(self.class)
2241
+ return false unless uri.kind_of?(self.class)
1067
2242
  return self.normalize.to_s == uri.normalize.to_s
1068
2243
  end
1069
2244
 
1070
- # Returns true if the URI objects are equal. This method does NOT
1071
- # normalize either URI before doing the comparison.
2245
+ ##
2246
+ # Returns <tt>true</tt> if the URI objects are equal. This method
2247
+ # does NOT normalize either URI before doing the comparison.
2248
+ #
2249
+ # @param [Object] uri The URI to compare.
2250
+ #
2251
+ # @return [TrueClass, FalseClass]
2252
+ # <tt>true</tt> if the URIs are equivalent, <tt>false</tt> otherwise.
1072
2253
  def eql?(uri)
1073
- return false unless uri.kind_of?(self.class)
2254
+ return false unless uri.kind_of?(self.class)
1074
2255
  return self.to_s == uri.to_s
1075
2256
  end
1076
2257
 
1077
- # Returns a hash value that will make a URI equivalent to its normalized
2258
+ ##
2259
+ # A hash value that will make a URI equivalent to its normalized
1078
2260
  # form.
2261
+ #
2262
+ # @return [Integer] A hash of the URI.
1079
2263
  def hash
1080
2264
  return (self.normalize.to_s.hash * -1)
1081
2265
  end
1082
-
2266
+
2267
+ ##
1083
2268
  # Clones the URI object.
2269
+ #
2270
+ # @return [Addressable::URI] The cloned URI.
1084
2271
  def dup
1085
- duplicated_scheme = self.scheme ? self.scheme.dup : nil
1086
- duplicated_user = self.user ? self.user.dup : nil
1087
- duplicated_password = self.password ? self.password.dup : nil
1088
- duplicated_host = self.host ? self.host.dup : nil
1089
- duplicated_port = self.specified_port
1090
- duplicated_path = self.path ? self.path.dup : nil
1091
- duplicated_query = self.query ? self.query.dup : nil
1092
- duplicated_fragment = self.fragment ? self.fragment.dup : nil
1093
2272
  duplicated_uri = Addressable::URI.new(
1094
- duplicated_scheme,
1095
- duplicated_user,
1096
- duplicated_password,
1097
- duplicated_host,
1098
- duplicated_port,
1099
- duplicated_path,
1100
- duplicated_query,
1101
- duplicated_fragment
2273
+ :scheme => self.scheme ? self.scheme.dup : nil,
2274
+ :user => self.user ? self.user.dup : nil,
2275
+ :password => self.password ? self.password.dup : nil,
2276
+ :host => self.host ? self.host.dup : nil,
2277
+ :port => self.port,
2278
+ :path => self.path ? self.path.dup : nil,
2279
+ :query => self.query ? self.query.dup : nil,
2280
+ :fragment => self.fragment ? self.fragment.dup : nil
1102
2281
  )
1103
2282
  return duplicated_uri
1104
2283
  end
1105
-
1106
- # Returns the assembled URI as a string.
2284
+
2285
+ ##
2286
+ # Omits components from a URI.
2287
+ #
2288
+ # @param [Symbol] *components The components to be omitted.
2289
+ #
2290
+ # @return [Addressable::URI] The URI with components omitted.
2291
+ #
2292
+ # @example
2293
+ # uri = Addressable::URI.parse("http://example.com/path?query")
2294
+ # #=> #<Addressable::URI:0xcc5e7a URI:http://example.com/path?query>
2295
+ # uri.omit(:scheme, :authority)
2296
+ # #=> #<Addressable::URI:0xcc4d86 URI:/path?query>
2297
+ def omit(*components)
2298
+ invalid_components = components - [
2299
+ :scheme, :user, :password, :userinfo, :host, :port, :authority,
2300
+ :path, :query, :fragment
2301
+ ]
2302
+ unless invalid_components.empty?
2303
+ raise ArgumentError,
2304
+ "Invalid component names: #{invalid_components.inspect}."
2305
+ end
2306
+ duplicated_uri = self.dup
2307
+ duplicated_uri.validation_deferred = true
2308
+ components.each do |component|
2309
+ duplicated_uri.send((component.to_s + "=").to_sym, nil)
2310
+ end
2311
+ duplicated_uri.validation_deferred = false
2312
+ duplicated_uri
2313
+ end
2314
+
2315
+ ##
2316
+ # Destructive form of omit.
2317
+ #
2318
+ # @param [Symbol] *components The components to be omitted.
2319
+ #
2320
+ # @return [Addressable::URI] The URI with components omitted.
2321
+ #
2322
+ # @see Addressable::URI#omit
2323
+ def omit!(*components)
2324
+ replace_self(self.omit(*components))
2325
+ end
2326
+
2327
+ ##
2328
+ # Converts the URI to a <tt>String</tt>.
2329
+ #
2330
+ # @return [String] The URI's <tt>String</tt> representation.
1107
2331
  def to_s
1108
2332
  uri_string = ""
1109
2333
  uri_string << "#{self.scheme}:" if self.scheme != nil
@@ -1111,82 +2335,72 @@ module Addressable
1111
2335
  uri_string << self.path.to_s
1112
2336
  uri_string << "?#{self.query}" if self.query != nil
1113
2337
  uri_string << "##{self.fragment}" if self.fragment != nil
2338
+ if uri_string.respond_to?(:force_encoding)
2339
+ uri_string.force_encoding(Encoding::UTF_8)
2340
+ end
1114
2341
  return uri_string
1115
2342
  end
1116
-
1117
- # Returns a Hash of the URI segments.
2343
+
2344
+ ##
2345
+ # URI's are glorified <tt>Strings</tt>. Allow implicit conversion.
2346
+ alias_method :to_str, :to_s
2347
+
2348
+ ##
2349
+ # Returns a Hash of the URI components.
2350
+ #
2351
+ # @return [Hash] The URI as a <tt>Hash</tt> of components.
1118
2352
  def to_hash
1119
2353
  return {
1120
2354
  :scheme => self.scheme,
1121
2355
  :user => self.user,
1122
2356
  :password => self.password,
1123
2357
  :host => self.host,
1124
- :port => self.specified_port,
2358
+ :port => self.port,
1125
2359
  :path => self.path,
1126
2360
  :query => self.query,
1127
2361
  :fragment => self.fragment
1128
2362
  }
1129
2363
  end
1130
-
1131
- # Returns a string representation of the URI object's state.
2364
+
2365
+ ##
2366
+ # Returns a <tt>String</tt> representation of the URI object's state.
2367
+ #
2368
+ # @return [String] The URI object's state, as a <tt>String</tt>.
1132
2369
  def inspect
1133
2370
  sprintf("#<%s:%#0x URI:%s>", self.class.to_s, self.object_id, self.to_s)
1134
2371
  end
1135
-
1136
- # This module handles internationalized domain names. When Ruby has an
1137
- # implementation of nameprep, stringprep, punycode, etc, this
1138
- # module should contain an actual implementation of IDNA instead of
1139
- # returning nil if libidn can't be used.
1140
- module IDNA
1141
- # Returns the ascii representation of the label.
1142
- def self.to_ascii(label)
1143
- return nil if label.nil?
1144
- if self.use_libidn?
1145
- return IDN::Idna.toASCII(label)
1146
- else
1147
- raise NotImplementedError,
1148
- "There is no available pure-ruby implementation. " +
1149
- "Install libidn bindings."
1150
- end
1151
- end
1152
-
1153
- # Returns the unicode representation of the label.
1154
- def self.to_unicode(label)
1155
- return nil if label.nil?
1156
- if self.use_libidn?
1157
- return IDN::Idna.toUnicode(label)
1158
- else
1159
- raise NotImplementedError,
1160
- "There is no available pure-ruby implementation. " +
1161
- "Install libidn bindings."
1162
- end
1163
- end
1164
-
1165
- private
1166
- # Determines if the libidn bindings are available and able to be used.
1167
- def self.use_libidn?
1168
- if !defined?(@use_libidn) || @use_libidn.nil?
1169
- begin
1170
- require 'rubygems'
1171
- rescue LoadError
1172
- nil
1173
- end
1174
- begin
1175
- require 'idn'
1176
- rescue LoadError
1177
- nil
1178
- end
1179
- @use_libidn = !!(defined?(IDN::Idna))
1180
- end
1181
- return @use_libidn
1182
- end
2372
+
2373
+ ##
2374
+ # If URI validation needs to be disabled, this can be set to true.
2375
+ #
2376
+ # @return [TrueClass, FalseClass]
2377
+ # <tt>true</tt> if validation has been deferred,
2378
+ # <tt>false</tt> otherwise.
2379
+ def validation_deferred
2380
+ @validation_deferred ||= false
2381
+ end
2382
+
2383
+ ##
2384
+ # If URI validation needs to be disabled, this can be set to true.
2385
+ #
2386
+ # @param [TrueClass, FalseClass] new_validation_deferred
2387
+ # <tt>true</tt> if validation will be deferred,
2388
+ # <tt>false</tt> otherwise.
2389
+ def validation_deferred=(new_validation_deferred)
2390
+ @validation_deferred = new_validation_deferred
2391
+ validate unless @validation_deferred
1183
2392
  end
1184
-
2393
+
1185
2394
  private
2395
+ ##
1186
2396
  # Resolves paths to their simplest form.
2397
+ #
2398
+ # @param [String] path The path to normalize.
2399
+ #
2400
+ # @return [String] The normalized path.
1187
2401
  def self.normalize_path(path)
1188
2402
  # Section 5.2.4 of RFC 3986
1189
-
2403
+
1190
2404
  return nil if path.nil?
1191
2405
  normalized_path = path.dup
1192
2406
  previous_state = normalized_path.dup
@@ -1194,11 +2408,11 @@ module Addressable
1194
2408
  previous_state = normalized_path.dup
1195
2409
  normalized_path.gsub!(/\/\.\//, "/")
1196
2410
  normalized_path.gsub!(/\/\.$/, "/")
1197
- parent = normalized_path.scan(/\/([^\/]+)\/\.\.\//).flatten[0]
2411
+ parent = normalized_path[/\/([^\/]+)\/\.\.\//, 1]
1198
2412
  if parent != "." && parent != ".."
1199
2413
  normalized_path.gsub!(/\/#{parent}\/\.\.\//, "/")
1200
2414
  end
1201
- parent = normalized_path.scan(/\/([^\/]+)\/\.\.$/).flatten[0]
2415
+ parent = normalized_path[/\/([^\/]+)\/\.\.$/, 1]
1202
2416
  if parent != "." && parent != ".."
1203
2417
  normalized_path.gsub!(/\/#{parent}\/\.\.$/, "/")
1204
2418
  end
@@ -1208,36 +2422,44 @@ module Addressable
1208
2422
  return normalized_path
1209
2423
  end
1210
2424
 
2425
+ ##
1211
2426
  # Ensures that the URI is valid.
1212
2427
  def validate
2428
+ return if self.validation_deferred
1213
2429
  if self.scheme != nil &&
1214
2430
  (self.host == nil || self.host == "") &&
1215
2431
  (self.path == nil || self.path == "")
1216
2432
  raise InvalidURIError,
1217
- "Absolute URI missing hierarchical segment."
2433
+ "Absolute URI missing hierarchical segment: '#{self.to_s}'"
1218
2434
  end
1219
2435
  if self.host == nil
1220
- if self.specified_port != nil ||
2436
+ if self.port != nil ||
1221
2437
  self.user != nil ||
1222
2438
  self.password != nil
1223
- raise InvalidURIError, "Hostname not supplied."
2439
+ raise InvalidURIError, "Hostname not supplied: '#{self.to_s}'"
1224
2440
  end
1225
2441
  end
2442
+ return nil
1226
2443
  end
1227
-
2444
+
2445
+ ##
1228
2446
  # Replaces the internal state of self with the specified URI's state.
1229
2447
  # Used in destructive operations to avoid massive code repetition.
2448
+ #
2449
+ # @param [Addressable::URI] uri The URI to replace <tt>self</tt> with.
2450
+ #
2451
+ # @return [Addressable::URI] <tt>self</tt>.
1230
2452
  def replace_self(uri)
1231
2453
  # Reset dependant values
1232
- @userinfo = nil
1233
- @authority = nil
1234
-
2454
+ instance_variables.each do |var|
2455
+ instance_variable_set(var, nil)
2456
+ end
2457
+
1235
2458
  @scheme = uri.scheme
1236
2459
  @user = uri.user
1237
2460
  @password = uri.password
1238
2461
  @host = uri.host
1239
- @specified_port = uri.instance_variable_get("@specified_port")
1240
- @port = @specified_port.to_s.to_i
2462
+ @port = uri.port
1241
2463
  @path = uri.path
1242
2464
  @query = uri.query
1243
2465
  @fragment = uri.fragment