addressable 2.3.7 → 2.7.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of addressable might be problematic. Click here for more details.

@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # encoding:utf-8
2
4
  #--
3
- # Copyright (C) 2006-2013 Bob Aman
5
+ # Copyright (C) Bob Aman
4
6
  #
5
7
  # Licensed under the Apache License, Version 2.0 (the "License");
6
8
  # you may not use this file except in compliance with the License.
@@ -234,7 +236,18 @@ module Addressable
234
236
  if !pattern.respond_to?(:to_str)
235
237
  raise TypeError, "Can't convert #{pattern.class} into String."
236
238
  end
237
- @pattern = pattern.to_str.freeze
239
+ @pattern = pattern.to_str.dup.freeze
240
+ end
241
+
242
+ ##
243
+ # Freeze URI, initializing instance variables.
244
+ #
245
+ # @return [Addressable::URI] The frozen URI object.
246
+ def freeze
247
+ self.variables
248
+ self.variable_defaults
249
+ self.named_captures
250
+ super
238
251
  end
239
252
 
240
253
  ##
@@ -477,6 +490,8 @@ module Addressable
477
490
  # @param [Hash] mapping The mapping that corresponds to the pattern.
478
491
  # @param [#validate, #transform] processor
479
492
  # An optional processor object may be supplied.
493
+ # @param [Boolean] normalize_values
494
+ # Optional flag to enable/disable unicode normalization. Default: true
480
495
  #
481
496
  # The object should respond to either the <tt>validate</tt> or
482
497
  # <tt>transform</tt> messages or both. Both the <tt>validate</tt> and
@@ -507,11 +522,11 @@ module Addressable
507
522
  # "http://example.com/{?one,two,three}/"
508
523
  # ).partial_expand({"one" => "1", "three" => 3}).pattern
509
524
  # #=> "http://example.com/?one=1{&two}&three=3"
510
- def partial_expand(mapping, processor=nil)
525
+ def partial_expand(mapping, processor=nil, normalize_values=true)
511
526
  result = self.pattern.dup
512
527
  mapping = normalize_keys(mapping)
513
528
  result.gsub!( EXPRESSION ) do |capture|
514
- transform_partial_capture(mapping, capture, processor)
529
+ transform_partial_capture(mapping, capture, processor, normalize_values)
515
530
  end
516
531
  return Addressable::Template.new(result)
517
532
  end
@@ -522,6 +537,8 @@ module Addressable
522
537
  # @param [Hash] mapping The mapping that corresponds to the pattern.
523
538
  # @param [#validate, #transform] processor
524
539
  # An optional processor object may be supplied.
540
+ # @param [Boolean] normalize_values
541
+ # Optional flag to enable/disable unicode normalization. Default: true
525
542
  #
526
543
  # The object should respond to either the <tt>validate</tt> or
527
544
  # <tt>transform</tt> messages or both. Both the <tt>validate</tt> and
@@ -572,11 +589,11 @@ module Addressable
572
589
  # ExampleProcessor
573
590
  # ).to_str
574
591
  # #=> Addressable::Template::InvalidTemplateValueError
575
- def expand(mapping, processor=nil)
592
+ def expand(mapping, processor=nil, normalize_values=true)
576
593
  result = self.pattern.dup
577
594
  mapping = normalize_keys(mapping)
578
595
  result.gsub!( EXPRESSION ) do |capture|
579
- transform_capture(mapping, capture, processor)
596
+ transform_capture(mapping, capture, processor, normalize_values)
580
597
  end
581
598
  return Addressable::URI.parse(result)
582
599
  end
@@ -592,6 +609,7 @@ module Addressable
592
609
  @variables ||= ordered_variable_defaults.map { |var, val| var }.uniq
593
610
  end
594
611
  alias_method :keys, :variables
612
+ alias_method :names, :variables
595
613
 
596
614
  ##
597
615
  # Returns a mapping of variables to their default values specified
@@ -603,9 +621,75 @@ module Addressable
603
621
  Hash[*ordered_variable_defaults.reject { |k, v| v.nil? }.flatten]
604
622
  end
605
623
 
624
+ ##
625
+ # Coerces a template into a `Regexp` object. This regular expression will
626
+ # behave very similarly to the actual template, and should match the same
627
+ # URI values, but it cannot fully handle, for example, values that would
628
+ # extract to an `Array`.
629
+ #
630
+ # @return [Regexp] A regular expression which should match the template.
631
+ def to_regexp
632
+ _, source = parse_template_pattern(pattern)
633
+ Regexp.new(source)
634
+ end
635
+
636
+ ##
637
+ # Returns the source of the coerced `Regexp`.
638
+ #
639
+ # @return [String] The source of the `Regexp` given by {#to_regexp}.
640
+ #
641
+ # @api private
642
+ def source
643
+ self.to_regexp.source
644
+ end
645
+
646
+ ##
647
+ # Returns the named captures of the coerced `Regexp`.
648
+ #
649
+ # @return [Hash] The named captures of the `Regexp` given by {#to_regexp}.
650
+ #
651
+ # @api private
652
+ def named_captures
653
+ self.to_regexp.named_captures
654
+ end
655
+
656
+ ##
657
+ # Generates a route result for a given set of parameters.
658
+ # Should only be used by rack-mount.
659
+ #
660
+ # @param params [Hash] The set of parameters used to expand the template.
661
+ # @param recall [Hash] Default parameters used to expand the template.
662
+ # @param options [Hash] Either a `:processor` or a `:parameterize` block.
663
+ #
664
+ # @api private
665
+ def generate(params={}, recall={}, options={})
666
+ merged = recall.merge(params)
667
+ if options[:processor]
668
+ processor = options[:processor]
669
+ elsif options[:parameterize]
670
+ # TODO: This is sending me into fits trying to shoe-horn this into
671
+ # the existing API. I think I've got this backwards and processors
672
+ # should be a set of 4 optional blocks named :validate, :transform,
673
+ # :match, and :restore. Having to use a singleton here is a huge
674
+ # code smell.
675
+ processor = Object.new
676
+ class <<processor
677
+ attr_accessor :block
678
+ def transform(name, value)
679
+ block.call(name, value)
680
+ end
681
+ end
682
+ processor.block = options[:parameterize]
683
+ else
684
+ processor = nil
685
+ end
686
+ result = self.expand(merged, processor)
687
+ result.to_s if result
688
+ end
689
+
606
690
  private
607
691
  def ordered_variable_defaults
608
- @ordered_variable_defaults ||= (
692
+ @ordered_variable_defaults ||= begin
609
693
  expansions, _ = parse_template_pattern(pattern)
610
694
  expansions.map do |capture|
611
695
  _, _, varlist = *capture.match(EXPRESSION)
@@ -613,7 +697,7 @@ module Addressable
613
697
  varspec[VARSPEC, 1]
614
698
  end
615
699
  end.flatten
616
- )
700
+ end
617
701
  end
618
702
 
619
703
 
@@ -626,6 +710,8 @@ module Addressable
626
710
  # The expression to expand
627
711
  # @param [#validate, #transform] processor
628
712
  # An optional processor object may be supplied.
713
+ # @param [Boolean] normalize_values
714
+ # Optional flag to enable/disable unicode normalization. Default: true
629
715
  #
630
716
  # The object should respond to either the <tt>validate</tt> or
631
717
  # <tt>transform</tt> messages or both. Both the <tt>validate</tt> and
@@ -640,21 +726,36 @@ module Addressable
640
726
  # after sending the value to the transform method.
641
727
  #
642
728
  # @return [String] The expanded expression
643
- def transform_partial_capture(mapping, capture, processor = nil)
729
+ def transform_partial_capture(mapping, capture, processor = nil,
730
+ normalize_values = true)
644
731
  _, operator, varlist = *capture.match(EXPRESSION)
645
- is_first = true
646
- varlist.split(',').inject('') do |acc, varspec|
647
- _, name, _ = *varspec.match(VARSPEC)
648
- value = mapping[name]
649
- if value
650
- operator = '&' if !is_first && operator == '?'
651
- acc << transform_capture(mapping, "{#{operator}#{varspec}}", processor)
652
- else
653
- operator = '&' if !is_first && operator == '?'
654
- acc << "{#{operator}#{varspec}}"
655
- end
656
- is_first = false
657
- acc
732
+
733
+ vars = varlist.split(",")
734
+
735
+ if operator == "?"
736
+ # partial expansion of form style query variables sometimes requires a
737
+ # slight reordering of the variables to produce a valid url.
738
+ first_to_expand = vars.find { |varspec|
739
+ _, name, _ = *varspec.match(VARSPEC)
740
+ mapping.key?(name) && !mapping[name].nil?
741
+ }
742
+
743
+ vars = [first_to_expand] + vars.reject {|varspec| varspec == first_to_expand} if first_to_expand
744
+ end
745
+
746
+ vars.
747
+ inject("".dup) do |acc, varspec|
748
+ _, name, _ = *varspec.match(VARSPEC)
749
+ next_val = if mapping.key? name
750
+ transform_capture(mapping, "{#{operator}#{varspec}}",
751
+ processor, normalize_values)
752
+ else
753
+ "{#{operator}#{varspec}}"
754
+ end
755
+ # If we've already expanded at least one '?' operator with non-empty
756
+ # value, change to '&'
757
+ operator = "&" if (operator == "?") && (next_val != "")
758
+ acc << next_val
658
759
  end
659
760
  end
660
761
 
@@ -667,6 +768,9 @@ module Addressable
667
768
  # The expression to replace
668
769
  # @param [#validate, #transform] processor
669
770
  # An optional processor object may be supplied.
771
+ # @param [Boolean] normalize_values
772
+ # Optional flag to enable/disable unicode normalization. Default: true
773
+ #
670
774
  #
671
775
  # The object should respond to either the <tt>validate</tt> or
672
776
  # <tt>transform</tt> messages or both. Both the <tt>validate</tt> and
@@ -681,7 +785,8 @@ module Addressable
681
785
  # after sending the value to the transform method.
682
786
  #
683
787
  # @return [String] The expanded expression
684
- def transform_capture(mapping, capture, processor=nil)
788
+ def transform_capture(mapping, capture, processor=nil,
789
+ normalize_values=true)
685
790
  _, operator, varlist = *capture.match(EXPRESSION)
686
791
  return_value = varlist.split(',').inject([]) do |acc, varspec|
687
792
  _, name, modifier = *varspec.match(VARSPEC)
@@ -701,7 +806,7 @@ module Addressable
701
806
  "Can't convert #{value.class} into String or Array."
702
807
  end
703
808
 
704
- value = normalize_value(value)
809
+ value = normalize_value(value) if normalize_values
705
810
 
706
811
  if processor == nil || !processor.respond_to?(:transform)
707
812
  # Handle percent escaping
@@ -764,7 +869,9 @@ module Addressable
764
869
  end
765
870
  if processor.respond_to?(:transform)
766
871
  transformed_value = processor.transform(name, value)
767
- transformed_value = normalize_value(transformed_value)
872
+ if normalize_values
873
+ transformed_value = normalize_value(transformed_value)
874
+ end
768
875
  end
769
876
  end
770
877
  acc << [name, transformed_value]
@@ -899,7 +1006,7 @@ module Addressable
899
1006
 
900
1007
  result = processor && processor.respond_to?(:match) ? processor.match(name) : nil
901
1008
  if result
902
- "(#{ result })"
1009
+ "(?<#{name}>#{ result })"
903
1010
  else
904
1011
  group = case operator
905
1012
  when '+'
@@ -920,9 +1027,9 @@ module Addressable
920
1027
  "#{ UNRESERVED }*?"
921
1028
  end
922
1029
  if modifier == '*'
923
- "(#{group}(?:#{joiner}?#{group})*)?"
1030
+ "(?<#{name}>#{group}(?:#{joiner}?#{group})*)?"
924
1031
  else
925
- "(#{group})?"
1032
+ "(?<#{name}>#{group})?"
926
1033
  end
927
1034
  end
928
1035
  end.join("#{joiner}?")
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # encoding:utf-8
2
4
  #--
3
- # Copyright (C) 2006-2013 Bob Aman
5
+ # Copyright (C) Bob Aman
4
6
  #
5
7
  # Licensed under the Apache License, Version 2.0 (the "License");
6
8
  # you may not use this file except in compliance with the License.
@@ -18,6 +20,7 @@
18
20
 
19
21
  require "addressable/version"
20
22
  require "addressable/idna"
23
+ require "public_suffix"
21
24
 
22
25
  ##
23
26
  # Addressable is a library for processing links and URIs.
@@ -44,6 +47,7 @@ module Addressable
44
47
  UNRESERVED = ALPHA + DIGIT + "\\-\\.\\_\\~"
45
48
  PCHAR = UNRESERVED + SUB_DELIMS + "\\:\\@"
46
49
  SCHEME = ALPHA + DIGIT + "\\-\\+\\."
50
+ HOST = UNRESERVED + SUB_DELIMS + "\\[\\:\\]"
47
51
  AUTHORITY = PCHAR
48
52
  PATH = PCHAR + "\\/"
49
53
  QUERY = PCHAR + "\\/\\?"
@@ -120,9 +124,9 @@ module Addressable
120
124
  user = userinfo.strip[/^([^:]*):?/, 1]
121
125
  password = userinfo.strip[/:(.*)$/, 1]
122
126
  end
123
- host = authority.gsub(
127
+ host = authority.sub(
124
128
  /^([^\[\]]*)@/, EMPTY_STR
125
- ).gsub(
129
+ ).sub(
126
130
  /:([^:@\[\]]*?)$/, EMPTY_STR
127
131
  )
128
132
  port = authority[/:([^:@\[\]]*?)$/, 1]
@@ -175,33 +179,50 @@ module Addressable
175
179
  raise TypeError, "Can't convert #{uri.class} into String."
176
180
  end
177
181
  # Otherwise, convert to a String
178
- uri = uri.to_str.dup
182
+ uri = uri.to_str.dup.strip
179
183
  hints = {
180
184
  :scheme => "http"
181
185
  }.merge(hints)
182
186
  case uri
183
- when /^http:\/+/
184
- uri.gsub!(/^http:\/+/, "http://")
185
- when /^https:\/+/
186
- uri.gsub!(/^https:\/+/, "https://")
187
- when /^feed:\/+http:\/+/
188
- uri.gsub!(/^feed:\/+http:\/+/, "feed:http://")
189
- when /^feed:\/+/
190
- uri.gsub!(/^feed:\/+/, "feed://")
191
- when /^file:\/+/
192
- uri.gsub!(/^file:\/+/, "file:///")
187
+ when /^http:\//i
188
+ uri.sub!(/^http:\/+/i, "http://")
189
+ when /^https:\//i
190
+ uri.sub!(/^https:\/+/i, "https://")
191
+ when /^feed:\/+http:\//i
192
+ uri.sub!(/^feed:\/+http:\/+/i, "feed:http://")
193
+ when /^feed:\//i
194
+ uri.sub!(/^feed:\/+/i, "feed://")
195
+ when %r[^file:/{4}]i
196
+ uri.sub!(%r[^file:/+]i, "file:////")
197
+ when %r[^file://localhost/]i
198
+ uri.sub!(%r[^file://localhost/+]i, "file:///")
199
+ when %r[^file:/+]i
200
+ uri.sub!(%r[^file:/+]i, "file:///")
193
201
  when /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/
194
- uri.gsub!(/^/, hints[:scheme] + "://")
202
+ uri.sub!(/^/, hints[:scheme] + "://")
203
+ when /\A\d+\..*:\d+\z/
204
+ uri = "#{hints[:scheme]}://#{uri}"
205
+ end
206
+ match = uri.match(URIREGEX)
207
+ fragments = match.captures
208
+ authority = fragments[3]
209
+ if authority && authority.length > 0
210
+ new_authority = authority.tr("\\", "/").gsub(" ", "%20")
211
+ # NOTE: We want offset 4, not 3!
212
+ offset = match.offset(4)
213
+ uri = uri.dup
214
+ uri[offset[0]...offset[1]] = new_authority
195
215
  end
196
216
  parsed = self.parse(uri)
197
217
  if parsed.scheme =~ /^[^\/?#\.]+\.[^\/?#]+$/
198
218
  parsed = self.parse(hints[:scheme] + "://" + uri)
199
219
  end
200
220
  if parsed.path.include?(".")
201
- new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1]
202
- if new_host
221
+ if parsed.path[/\b@\b/]
222
+ parsed.scheme = "mailto" unless parsed.scheme
223
+ elsif new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1]
203
224
  parsed.defer_validation do
204
- new_path = parsed.path.gsub(
225
+ new_path = parsed.path.sub(
205
226
  Regexp.new("^" + Regexp.escape(new_host)), EMPTY_STR)
206
227
  parsed.host = new_host
207
228
  parsed.path = new_path
@@ -252,24 +273,24 @@ module Addressable
252
273
  # Otherwise, convert to a String
253
274
  path = path.to_str.strip
254
275
 
255
- path.gsub!(/^file:\/?\/?/, EMPTY_STR) if path =~ /^file:\/?\/?/
276
+ path.sub!(/^file:\/?\/?/, EMPTY_STR) if path =~ /^file:\/?\/?/
256
277
  path = SLASH + path if path =~ /^([a-zA-Z])[\|:]/
257
278
  uri = self.parse(path)
258
279
 
259
280
  if uri.scheme == nil
260
281
  # Adjust windows-style uris
261
- uri.path.gsub!(/^\/?([a-zA-Z])[\|:][\\\/]/) do
282
+ uri.path.sub!(/^\/?([a-zA-Z])[\|:][\\\/]/) do
262
283
  "/#{$1.downcase}:/"
263
284
  end
264
- uri.path.gsub!(/\\/, SLASH)
265
- if File.exists?(uri.path) &&
285
+ uri.path.tr!("\\", SLASH)
286
+ if File.exist?(uri.path) &&
266
287
  File.stat(uri.path).directory?
267
- uri.path.gsub!(/\/$/, EMPTY_STR)
288
+ uri.path.chomp!(SLASH)
268
289
  uri.path = uri.path + '/'
269
290
  end
270
291
 
271
292
  # If the path is absolute, set the scheme and host.
272
- if uri.path =~ /^\//
293
+ if uri.path.start_with?(SLASH)
273
294
  uri.scheme = "file"
274
295
  uri.host = EMPTY_STR
275
296
  end
@@ -306,6 +327,21 @@ module Addressable
306
327
  return result
307
328
  end
308
329
 
330
+ ##
331
+ # Tables used to optimize encoding operations in `self.encode_component`
332
+ # and `self.normalize_component`
333
+ SEQUENCE_ENCODING_TABLE = Hash.new do |hash, sequence|
334
+ hash[sequence] = sequence.unpack("C*").map do |c|
335
+ format("%02x", c)
336
+ end.join
337
+ end
338
+
339
+ SEQUENCE_UPCASED_PERCENT_ENCODING_TABLE = Hash.new do |hash, sequence|
340
+ hash[sequence] = sequence.unpack("C*").map do |c|
341
+ format("%%%02X", c)
342
+ end.join
343
+ end
344
+
309
345
  ##
310
346
  # Percent encodes a URI component.
311
347
  #
@@ -366,20 +402,20 @@ module Addressable
366
402
  if character_class.kind_of?(String)
367
403
  character_class = /[^#{character_class}]/
368
404
  end
369
- if component.respond_to?(:force_encoding)
370
- # We can't perform regexps on invalid UTF sequences, but
371
- # here we need to, so switch to ASCII.
372
- component = component.dup
373
- component.force_encoding(Encoding::ASCII_8BIT)
374
- end
405
+ # We can't perform regexps on invalid UTF sequences, but
406
+ # here we need to, so switch to ASCII.
407
+ component = component.dup
408
+ component.force_encoding(Encoding::ASCII_8BIT)
375
409
  # Avoiding gsub! because there are edge cases with frozen strings
376
410
  component = component.gsub(character_class) do |sequence|
377
- (sequence.unpack('C*').map { |c| "%" + ("%02x" % c).upcase }).join
411
+ SEQUENCE_UPCASED_PERCENT_ENCODING_TABLE[sequence]
378
412
  end
379
413
  if upcase_encoded.length > 0
380
- component = component.gsub(/%(#{upcase_encoded.chars.map do |char|
381
- char.unpack('C*').map { |c| '%02x' % c }.join
382
- end.join('|')})/i) { |s| s.upcase }
414
+ upcase_encoded_chars = upcase_encoded.chars.map do |char|
415
+ SEQUENCE_ENCODING_TABLE[char]
416
+ end
417
+ component = component.gsub(/%(#{upcase_encoded_chars.join('|')})/,
418
+ &:upcase)
383
419
  end
384
420
  return component
385
421
  end
@@ -426,14 +462,14 @@ module Addressable
426
462
  end
427
463
  uri = uri.dup
428
464
  # Seriously, only use UTF-8. I'm really not kidding!
429
- uri.force_encoding("utf-8") if uri.respond_to?(:force_encoding)
430
- leave_encoded.force_encoding("utf-8") if leave_encoded.respond_to?(:force_encoding)
465
+ uri.force_encoding("utf-8")
466
+ leave_encoded = leave_encoded.dup.force_encoding("utf-8")
431
467
  result = uri.gsub(/%[0-9a-f]{2}/iu) do |sequence|
432
468
  c = sequence[1..3].to_i(16).chr
433
- c.force_encoding("utf-8") if c.respond_to?(:force_encoding)
469
+ c.force_encoding("utf-8")
434
470
  leave_encoded.include?(c) ? sequence : c
435
471
  end
436
- result.force_encoding("utf-8") if result.respond_to?(:force_encoding)
472
+ result.force_encoding("utf-8")
437
473
  if return_type == String
438
474
  return result
439
475
  elsif return_type == ::Addressable::URI
@@ -513,19 +549,17 @@ module Addressable
513
549
  character_class = "#{character_class}%" unless character_class.include?('%')
514
550
 
515
551
  "|%(?!#{leave_encoded.chars.map do |char|
516
- seq = char.unpack('C*').map { |c| '%02x' % c }.join
552
+ seq = SEQUENCE_ENCODING_TABLE[char]
517
553
  [seq.upcase, seq.downcase]
518
554
  end.flatten.join('|')})"
519
555
  end
520
556
 
521
557
  character_class = /[^#{character_class}]#{leave_re}/
522
558
  end
523
- if component.respond_to?(:force_encoding)
524
- # We can't perform regexps on invalid UTF sequences, but
525
- # here we need to, so switch to ASCII.
526
- component = component.dup
527
- component.force_encoding(Encoding::ASCII_8BIT)
528
- end
559
+ # We can't perform regexps on invalid UTF sequences, but
560
+ # here we need to, so switch to ASCII.
561
+ component = component.dup
562
+ component.force_encoding(Encoding::ASCII_8BIT)
529
563
  unencoded = self.unencode_component(component, String, leave_encoded)
530
564
  begin
531
565
  encoded = self.encode_component(
@@ -536,9 +570,7 @@ module Addressable
536
570
  rescue ArgumentError
537
571
  encoded = self.encode_component(unencoded)
538
572
  end
539
- if encoded.respond_to?(:force_encoding)
540
- encoded.force_encoding(Encoding::UTF_8)
541
- end
573
+ encoded.force_encoding(Encoding::UTF_8)
542
574
  return encoded
543
575
  end
544
576
 
@@ -720,9 +752,9 @@ module Addressable
720
752
  ).gsub("%20", "+")
721
753
  ]
722
754
  end
723
- return (escaped_form_values.map do |(key, value)|
755
+ return escaped_form_values.map do |(key, value)|
724
756
  "#{key}=#{value}"
725
- end).join("&")
757
+ end.join("&")
726
758
  end
727
759
 
728
760
  ##
@@ -831,7 +863,7 @@ module Addressable
831
863
  #
832
864
  # @return [String] The scheme component.
833
865
  def scheme
834
- return instance_variable_defined?(:@scheme) ? @scheme : nil
866
+ return defined?(@scheme) ? @scheme : nil
835
867
  end
836
868
 
837
869
  ##
@@ -839,16 +871,20 @@ module Addressable
839
871
  #
840
872
  # @return [String] The scheme component, normalized.
841
873
  def normalized_scheme
842
- self.scheme && @normalized_scheme ||= (begin
874
+ return nil unless self.scheme
875
+ @normalized_scheme ||= begin
843
876
  if self.scheme =~ /^\s*ssh\+svn\s*$/i
844
- "svn+ssh"
877
+ "svn+ssh".dup
845
878
  else
846
879
  Addressable::URI.normalize_component(
847
880
  self.scheme.strip.downcase,
848
881
  Addressable::URI::CharacterClasses::SCHEME
849
882
  )
850
883
  end
851
- end)
884
+ end
885
+ # All normalized values should be UTF-8
886
+ @normalized_scheme.force_encoding(Encoding::UTF_8) if @normalized_scheme
887
+ @normalized_scheme
852
888
  end
853
889
 
854
890
  ##
@@ -861,16 +897,15 @@ module Addressable
861
897
  elsif new_scheme
862
898
  new_scheme = new_scheme.to_str
863
899
  end
864
- if new_scheme && new_scheme !~ /[a-z][a-z0-9\.\+\-]*/i
865
- raise InvalidURIError, "Invalid scheme format."
900
+ if new_scheme && new_scheme !~ /\A[a-z][a-z0-9\.\+\-]*\z/i
901
+ raise InvalidURIError, "Invalid scheme format: #{new_scheme}"
866
902
  end
867
903
  @scheme = new_scheme
868
904
  @scheme = nil if @scheme.to_s.strip.empty?
869
905
 
870
- # Reset dependant values
871
- @normalized_scheme = nil
872
- @uri_string = nil
873
- @hash = nil
906
+ # Reset dependent values
907
+ remove_instance_variable(:@normalized_scheme) if defined?(@normalized_scheme)
908
+ remove_composite_values
874
909
 
875
910
  # Ensure we haven't created an invalid URI
876
911
  validate()
@@ -881,7 +916,7 @@ module Addressable
881
916
  #
882
917
  # @return [String] The user component.
883
918
  def user
884
- return instance_variable_defined?(:@user) ? @user : nil
919
+ return defined?(@user) ? @user : nil
885
920
  end
886
921
 
887
922
  ##
@@ -889,7 +924,9 @@ module Addressable
889
924
  #
890
925
  # @return [String] The user component, normalized.
891
926
  def normalized_user
892
- self.user && @normalized_user ||= (begin
927
+ return nil unless self.user
928
+ return @normalized_user if defined?(@normalized_user)
929
+ @normalized_user ||= begin
893
930
  if normalized_scheme =~ /https?/ && self.user.strip.empty? &&
894
931
  (!self.password || self.password.strip.empty?)
895
932
  nil
@@ -899,7 +936,10 @@ module Addressable
899
936
  Addressable::URI::CharacterClasses::UNRESERVED
900
937
  )
901
938
  end
902
- end)
939
+ end
940
+ # All normalized values should be UTF-8
941
+ @normalized_user.force_encoding(Encoding::UTF_8) if @normalized_user
942
+ @normalized_user
903
943
  end
904
944
 
905
945
  ##
@@ -917,13 +957,12 @@ module Addressable
917
957
  @user = EMPTY_STR if @user.nil?
918
958
  end
919
959
 
920
- # Reset dependant values
921
- @userinfo = nil
922
- @normalized_userinfo = nil
923
- @authority = nil
924
- @normalized_user = nil
925
- @uri_string = nil
926
- @hash = nil
960
+ # Reset dependent values
961
+ remove_instance_variable(:@userinfo) if defined?(@userinfo)
962
+ remove_instance_variable(:@normalized_userinfo) if defined?(@normalized_userinfo)
963
+ remove_instance_variable(:@authority) if defined?(@authority)
964
+ remove_instance_variable(:@normalized_user) if defined?(@normalized_user)
965
+ remove_composite_values
927
966
 
928
967
  # Ensure we haven't created an invalid URI
929
968
  validate()
@@ -934,7 +973,7 @@ module Addressable
934
973
  #
935
974
  # @return [String] The password component.
936
975
  def password
937
- return instance_variable_defined?(:@password) ? @password : nil
976
+ return defined?(@password) ? @password : nil
938
977
  end
939
978
 
940
979
  ##
@@ -942,7 +981,9 @@ module Addressable
942
981
  #
943
982
  # @return [String] The password component, normalized.
944
983
  def normalized_password
945
- self.password && @normalized_password ||= (begin
984
+ return nil unless self.password
985
+ return @normalized_password if defined?(@normalized_password)
986
+ @normalized_password ||= begin
946
987
  if self.normalized_scheme =~ /https?/ && self.password.strip.empty? &&
947
988
  (!self.user || self.user.strip.empty?)
948
989
  nil
@@ -952,7 +993,12 @@ module Addressable
952
993
  Addressable::URI::CharacterClasses::UNRESERVED
953
994
  )
954
995
  end
955
- end)
996
+ end
997
+ # All normalized values should be UTF-8
998
+ if @normalized_password
999
+ @normalized_password.force_encoding(Encoding::UTF_8)
1000
+ end
1001
+ @normalized_password
956
1002
  end
957
1003
 
958
1004
  ##
@@ -972,13 +1018,12 @@ module Addressable
972
1018
  @user = EMPTY_STR if @user.nil?
973
1019
  end
974
1020
 
975
- # Reset dependant values
976
- @userinfo = nil
977
- @normalized_userinfo = nil
978
- @authority = nil
979
- @normalized_password = nil
980
- @uri_string = nil
981
- @hash = nil
1021
+ # Reset dependent values
1022
+ remove_instance_variable(:@userinfo) if defined?(@userinfo)
1023
+ remove_instance_variable(:@normalized_userinfo) if defined?(@normalized_userinfo)
1024
+ remove_instance_variable(:@authority) if defined?(@authority)
1025
+ remove_instance_variable(:@normalized_password) if defined?(@normalized_password)
1026
+ remove_composite_values
982
1027
 
983
1028
  # Ensure we haven't created an invalid URI
984
1029
  validate()
@@ -992,13 +1037,13 @@ module Addressable
992
1037
  def userinfo
993
1038
  current_user = self.user
994
1039
  current_password = self.password
995
- (current_user || current_password) && @userinfo ||= (begin
1040
+ (current_user || current_password) && @userinfo ||= begin
996
1041
  if current_user && current_password
997
1042
  "#{current_user}:#{current_password}"
998
1043
  elsif current_user && !current_password
999
1044
  "#{current_user}"
1000
1045
  end
1001
- end)
1046
+ end
1002
1047
  end
1003
1048
 
1004
1049
  ##
@@ -1006,17 +1051,24 @@ module Addressable
1006
1051
  #
1007
1052
  # @return [String] The userinfo component, normalized.
1008
1053
  def normalized_userinfo
1009
- self.userinfo && @normalized_userinfo ||= (begin
1054
+ return nil unless self.userinfo
1055
+ return @normalized_userinfo if defined?(@normalized_userinfo)
1056
+ @normalized_userinfo ||= begin
1010
1057
  current_user = self.normalized_user
1011
1058
  current_password = self.normalized_password
1012
1059
  if !current_user && !current_password
1013
1060
  nil
1014
1061
  elsif current_user && current_password
1015
- "#{current_user}:#{current_password}"
1062
+ "#{current_user}:#{current_password}".dup
1016
1063
  elsif current_user && !current_password
1017
- "#{current_user}"
1064
+ "#{current_user}".dup
1018
1065
  end
1019
- end)
1066
+ end
1067
+ # All normalized values should be UTF-8
1068
+ if @normalized_userinfo
1069
+ @normalized_userinfo.force_encoding(Encoding::UTF_8)
1070
+ end
1071
+ @normalized_userinfo
1020
1072
  end
1021
1073
 
1022
1074
  ##
@@ -1040,10 +1092,9 @@ module Addressable
1040
1092
  self.password = new_password
1041
1093
  self.user = new_user
1042
1094
 
1043
- # Reset dependant values
1044
- @authority = nil
1045
- @uri_string = nil
1046
- @hash = nil
1095
+ # Reset dependent values
1096
+ remove_instance_variable(:@authority) if defined?(@authority)
1097
+ remove_composite_values
1047
1098
 
1048
1099
  # Ensure we haven't created an invalid URI
1049
1100
  validate()
@@ -1054,7 +1105,7 @@ module Addressable
1054
1105
  #
1055
1106
  # @return [String] The host component.
1056
1107
  def host
1057
- return instance_variable_defined?(:@host) ? @host : nil
1108
+ return defined?(@host) ? @host : nil
1058
1109
  end
1059
1110
 
1060
1111
  ##
@@ -1062,7 +1113,8 @@ module Addressable
1062
1113
  #
1063
1114
  # @return [String] The host component, normalized.
1064
1115
  def normalized_host
1065
- self.host && @normalized_host ||= (begin
1116
+ return nil unless self.host
1117
+ @normalized_host ||= begin
1066
1118
  if !self.host.strip.empty?
1067
1119
  result = ::Addressable::IDNA.to_ascii(
1068
1120
  URI.unencode_component(self.host.strip.downcase)
@@ -1071,11 +1123,17 @@ module Addressable
1071
1123
  # Single trailing dots are unnecessary.
1072
1124
  result = result[0...-1]
1073
1125
  end
1126
+ result = Addressable::URI.normalize_component(
1127
+ result,
1128
+ CharacterClasses::HOST)
1074
1129
  result
1075
1130
  else
1076
- EMPTY_STR
1131
+ EMPTY_STR.dup
1077
1132
  end
1078
- end)
1133
+ end
1134
+ # All normalized values should be UTF-8
1135
+ @normalized_host.force_encoding(Encoding::UTF_8) if @normalized_host
1136
+ @normalized_host
1079
1137
  end
1080
1138
 
1081
1139
  ##
@@ -1088,19 +1146,10 @@ module Addressable
1088
1146
  end
1089
1147
  @host = new_host ? new_host.to_str : nil
1090
1148
 
1091
- unreserved = CharacterClasses::UNRESERVED
1092
- sub_delims = CharacterClasses::SUB_DELIMS
1093
- if @host != nil && (@host =~ /[<>{}\/\?\#\@]/ ||
1094
- (@host[/^\[(.*)\]$/, 1] != nil && @host[/^\[(.*)\]$/, 1] !~
1095
- Regexp.new("^[#{unreserved}#{sub_delims}:]*$")))
1096
- raise InvalidURIError, "Invalid character in host: '#{@host.to_s}'"
1097
- end
1098
-
1099
- # Reset dependant values
1100
- @authority = nil
1101
- @normalized_host = nil
1102
- @uri_string = nil
1103
- @hash = nil
1149
+ # Reset dependent values
1150
+ remove_instance_variable(:@authority) if defined?(@authority)
1151
+ remove_instance_variable(:@normalized_host) if defined?(@normalized_host)
1152
+ remove_composite_values
1104
1153
 
1105
1154
  # Ensure we haven't created an invalid URI
1106
1155
  validate()
@@ -1126,7 +1175,10 @@ module Addressable
1126
1175
  #
1127
1176
  # @param [String, #to_str] new_hostname The new hostname for this URI.
1128
1177
  def hostname=(new_hostname)
1129
- if new_hostname && !new_hostname.respond_to?(:to_str)
1178
+ if new_hostname &&
1179
+ (new_hostname.respond_to?(:ipv4?) || new_hostname.respond_to?(:ipv6?))
1180
+ new_hostname = new_hostname.to_s
1181
+ elsif new_hostname && !new_hostname.respond_to?(:to_str)
1130
1182
  raise TypeError, "Can't convert #{new_hostname.class} into String."
1131
1183
  end
1132
1184
  v = new_hostname ? new_hostname.to_str : nil
@@ -1134,14 +1186,41 @@ module Addressable
1134
1186
  self.host = v
1135
1187
  end
1136
1188
 
1189
+ ##
1190
+ # Returns the top-level domain for this host.
1191
+ #
1192
+ # @example
1193
+ # Addressable::URI.parse("http://www.example.co.uk").tld # => "co.uk"
1194
+ def tld
1195
+ PublicSuffix.parse(self.host, ignore_private: true).tld
1196
+ end
1197
+
1198
+ ##
1199
+ # Sets the top-level domain for this URI.
1200
+ #
1201
+ # @param [String, #to_str] new_tld The new top-level domain.
1202
+ def tld=(new_tld)
1203
+ replaced_tld = host.sub(/#{tld}\z/, new_tld)
1204
+ self.host = PublicSuffix::Domain.new(replaced_tld).to_s
1205
+ end
1206
+
1207
+ ##
1208
+ # Returns the public suffix domain for this host.
1209
+ #
1210
+ # @example
1211
+ # Addressable::URI.parse("http://www.example.co.uk").domain # => "example.co.uk"
1212
+ def domain
1213
+ PublicSuffix.domain(self.host, ignore_private: true)
1214
+ end
1215
+
1137
1216
  ##
1138
1217
  # The authority component for this URI.
1139
1218
  # Combines the user, password, host, and port components.
1140
1219
  #
1141
1220
  # @return [String] The authority component.
1142
1221
  def authority
1143
- self.host && @authority ||= (begin
1144
- authority = ""
1222
+ self.host && @authority ||= begin
1223
+ authority = String.new
1145
1224
  if self.userinfo != nil
1146
1225
  authority << "#{self.userinfo}@"
1147
1226
  end
@@ -1150,7 +1229,7 @@ module Addressable
1150
1229
  authority << ":#{self.port}"
1151
1230
  end
1152
1231
  authority
1153
- end)
1232
+ end
1154
1233
  end
1155
1234
 
1156
1235
  ##
@@ -1158,8 +1237,9 @@ module Addressable
1158
1237
  #
1159
1238
  # @return [String] The authority component, normalized.
1160
1239
  def normalized_authority
1161
- self.authority && @normalized_authority ||= (begin
1162
- authority = ""
1240
+ return nil unless self.authority
1241
+ @normalized_authority ||= begin
1242
+ authority = String.new
1163
1243
  if self.normalized_userinfo != nil
1164
1244
  authority << "#{self.normalized_userinfo}@"
1165
1245
  end
@@ -1168,7 +1248,12 @@ module Addressable
1168
1248
  authority << ":#{self.normalized_port}"
1169
1249
  end
1170
1250
  authority
1171
- end)
1251
+ end
1252
+ # All normalized values should be UTF-8
1253
+ if @normalized_authority
1254
+ @normalized_authority.force_encoding(Encoding::UTF_8)
1255
+ end
1256
+ @normalized_authority
1172
1257
  end
1173
1258
 
1174
1259
  ##
@@ -1186,9 +1271,9 @@ module Addressable
1186
1271
  new_user = new_userinfo.strip[/^([^:]*):?/, 1]
1187
1272
  new_password = new_userinfo.strip[/:(.*)$/, 1]
1188
1273
  end
1189
- new_host = new_authority.gsub(
1274
+ new_host = new_authority.sub(
1190
1275
  /^([^\[\]]*)@/, EMPTY_STR
1191
- ).gsub(
1276
+ ).sub(
1192
1277
  /:([^:@\[\]]*?)$/, EMPTY_STR
1193
1278
  )
1194
1279
  new_port =
@@ -1201,11 +1286,10 @@ module Addressable
1201
1286
  self.host = defined?(new_host) ? new_host : nil
1202
1287
  self.port = defined?(new_port) ? new_port : nil
1203
1288
 
1204
- # Reset dependant values
1205
- @userinfo = nil
1206
- @normalized_userinfo = nil
1207
- @uri_string = nil
1208
- @hash = nil
1289
+ # Reset dependent values
1290
+ remove_instance_variable(:@userinfo) if defined?(@userinfo)
1291
+ remove_instance_variable(:@normalized_userinfo) if defined?(@normalized_userinfo)
1292
+ remove_composite_values
1209
1293
 
1210
1294
  # Ensure we haven't created an invalid URI
1211
1295
  validate()
@@ -1217,18 +1301,55 @@ module Addressable
1217
1301
  #
1218
1302
  # @return [String] The serialized origin.
1219
1303
  def origin
1220
- return (if self.scheme && self.authority
1304
+ if self.scheme && self.authority
1221
1305
  if self.normalized_port
1222
- (
1223
- "#{self.normalized_scheme}://#{self.normalized_host}" +
1224
- ":#{self.normalized_port}"
1225
- )
1306
+ "#{self.normalized_scheme}://#{self.normalized_host}" +
1307
+ ":#{self.normalized_port}"
1226
1308
  else
1227
1309
  "#{self.normalized_scheme}://#{self.normalized_host}"
1228
1310
  end
1229
1311
  else
1230
1312
  "null"
1231
- end)
1313
+ end
1314
+ end
1315
+
1316
+ ##
1317
+ # Sets the origin for this URI, serialized to ASCII, as per
1318
+ # RFC 6454, section 6.2. This assignment will reset the `userinfo`
1319
+ # component.
1320
+ #
1321
+ # @param [String, #to_str] new_origin The new origin component.
1322
+ def origin=(new_origin)
1323
+ if new_origin
1324
+ if !new_origin.respond_to?(:to_str)
1325
+ raise TypeError, "Can't convert #{new_origin.class} into String."
1326
+ end
1327
+ new_origin = new_origin.to_str
1328
+ new_scheme = new_origin[/^([^:\/?#]+):\/\//, 1]
1329
+ unless new_scheme
1330
+ raise InvalidURIError, 'An origin cannot omit the scheme.'
1331
+ end
1332
+ new_host = new_origin[/:\/\/([^\/?#:]+)/, 1]
1333
+ unless new_host
1334
+ raise InvalidURIError, 'An origin cannot omit the host.'
1335
+ end
1336
+ new_port = new_origin[/:([^:@\[\]\/]*?)$/, 1]
1337
+ end
1338
+
1339
+ self.scheme = defined?(new_scheme) ? new_scheme : nil
1340
+ self.host = defined?(new_host) ? new_host : nil
1341
+ self.port = defined?(new_port) ? new_port : nil
1342
+ self.userinfo = nil
1343
+
1344
+ # Reset dependent values
1345
+ remove_instance_variable(:@userinfo) if defined?(@userinfo)
1346
+ remove_instance_variable(:@normalized_userinfo) if defined?(@normalized_userinfo)
1347
+ remove_instance_variable(:@authority) if defined?(@authority)
1348
+ remove_instance_variable(:@normalized_authority) if defined?(@normalized_authority)
1349
+ remove_composite_values
1350
+
1351
+ # Ensure we haven't created an invalid URI
1352
+ validate()
1232
1353
  end
1233
1354
 
1234
1355
  # Returns an array of known ip-based schemes. These schemes typically
@@ -1252,7 +1373,7 @@ module Addressable
1252
1373
  #
1253
1374
  # @return [Integer] The port component.
1254
1375
  def port
1255
- return instance_variable_defined?(:@port) ? @port : nil
1376
+ return defined?(@port) ? @port : nil
1256
1377
  end
1257
1378
 
1258
1379
  ##
@@ -1260,10 +1381,14 @@ module Addressable
1260
1381
  #
1261
1382
  # @return [Integer] The port component, normalized.
1262
1383
  def normalized_port
1263
- if URI.port_mapping[self.normalized_scheme] == self.port
1264
- nil
1265
- else
1266
- self.port
1384
+ return nil unless self.port
1385
+ return @normalized_port if defined?(@normalized_port)
1386
+ @normalized_port ||= begin
1387
+ if URI.port_mapping[self.normalized_scheme] == self.port
1388
+ nil
1389
+ else
1390
+ self.port
1391
+ end
1267
1392
  end
1268
1393
  end
1269
1394
 
@@ -1275,6 +1400,11 @@ module Addressable
1275
1400
  if new_port != nil && new_port.respond_to?(:to_str)
1276
1401
  new_port = Addressable::URI.unencode_component(new_port.to_str)
1277
1402
  end
1403
+
1404
+ if new_port.respond_to?(:valid_encoding?) && !new_port.valid_encoding?
1405
+ raise InvalidURIError, "Invalid encoding in port"
1406
+ end
1407
+
1278
1408
  if new_port != nil && !(new_port.to_s =~ /^\d+$/)
1279
1409
  raise InvalidURIError,
1280
1410
  "Invalid port number: #{new_port.inspect}"
@@ -1283,11 +1413,10 @@ module Addressable
1283
1413
  @port = new_port.to_s.to_i
1284
1414
  @port = nil if @port == 0
1285
1415
 
1286
- # Reset dependant values
1287
- @authority = nil
1288
- @normalized_port = nil
1289
- @uri_string = nil
1290
- @hash = nil
1416
+ # Reset dependent values
1417
+ remove_instance_variable(:@authority) if defined?(@authority)
1418
+ remove_instance_variable(:@normalized_port) if defined?(@normalized_port)
1419
+ remove_composite_values
1291
1420
 
1292
1421
  # Ensure we haven't created an invalid URI
1293
1422
  validate()
@@ -1327,12 +1456,12 @@ module Addressable
1327
1456
  #
1328
1457
  # @return [String] The components that identify a site.
1329
1458
  def site
1330
- (self.scheme || self.authority) && @site ||= (begin
1331
- site_string = ""
1459
+ (self.scheme || self.authority) && @site ||= begin
1460
+ site_string = "".dup
1332
1461
  site_string << "#{self.scheme}:" if self.scheme != nil
1333
1462
  site_string << "//#{self.authority}" if self.authority != nil
1334
1463
  site_string
1335
- end)
1464
+ end
1336
1465
  end
1337
1466
 
1338
1467
  ##
@@ -1345,8 +1474,9 @@ module Addressable
1345
1474
  #
1346
1475
  # @return [String] The normalized components that identify a site.
1347
1476
  def normalized_site
1348
- self.site && @normalized_site ||= (begin
1349
- site_string = ""
1477
+ return nil unless self.site
1478
+ @normalized_site ||= begin
1479
+ site_string = "".dup
1350
1480
  if self.normalized_scheme != nil
1351
1481
  site_string << "#{self.normalized_scheme}:"
1352
1482
  end
@@ -1354,7 +1484,10 @@ module Addressable
1354
1484
  site_string << "//#{self.normalized_authority}"
1355
1485
  end
1356
1486
  site_string
1357
- end)
1487
+ end
1488
+ # All normalized values should be UTF-8
1489
+ @normalized_site.force_encoding(Encoding::UTF_8) if @normalized_site
1490
+ @normalized_site
1358
1491
  end
1359
1492
 
1360
1493
  ##
@@ -1384,7 +1517,7 @@ module Addressable
1384
1517
  #
1385
1518
  # @return [String] The path component.
1386
1519
  def path
1387
- return instance_variable_defined?(:@path) ? @path : EMPTY_STR
1520
+ return defined?(@path) ? @path : EMPTY_STR
1388
1521
  end
1389
1522
 
1390
1523
  NORMPATH = /^(?!\/)[^\/:]*:.*$/
@@ -1393,7 +1526,7 @@ module Addressable
1393
1526
  #
1394
1527
  # @return [String] The path component, normalized.
1395
1528
  def normalized_path
1396
- @normalized_path ||= (begin
1529
+ @normalized_path ||= begin
1397
1530
  path = self.path.to_s
1398
1531
  if self.scheme == nil && path =~ NORMPATH
1399
1532
  # Relative paths with colons in the first segment are ambiguous.
@@ -1401,20 +1534,23 @@ module Addressable
1401
1534
  end
1402
1535
  # String#split(delimeter, -1) uses the more strict splitting behavior
1403
1536
  # found by default in Python.
1404
- result = (path.strip.split(SLASH, -1).map do |segment|
1537
+ result = path.strip.split(SLASH, -1).map do |segment|
1405
1538
  Addressable::URI.normalize_component(
1406
1539
  segment,
1407
1540
  Addressable::URI::CharacterClasses::PCHAR
1408
1541
  )
1409
- end).join(SLASH)
1542
+ end.join(SLASH)
1410
1543
 
1411
1544
  result = URI.normalize_path(result)
1412
1545
  if result.empty? &&
1413
1546
  ["http", "https", "ftp", "tftp"].include?(self.normalized_scheme)
1414
- result = SLASH
1547
+ result = SLASH.dup
1415
1548
  end
1416
1549
  result
1417
- end)
1550
+ end
1551
+ # All normalized values should be UTF-8
1552
+ @normalized_path.force_encoding(Encoding::UTF_8) if @normalized_path
1553
+ @normalized_path
1418
1554
  end
1419
1555
 
1420
1556
  ##
@@ -1430,10 +1566,12 @@ module Addressable
1430
1566
  @path = "/#{@path}"
1431
1567
  end
1432
1568
 
1433
- # Reset dependant values
1434
- @normalized_path = nil
1435
- @uri_string = nil
1436
- @hash = nil
1569
+ # Reset dependent values
1570
+ remove_instance_variable(:@normalized_path) if defined?(@normalized_path)
1571
+ remove_composite_values
1572
+
1573
+ # Ensure we haven't created an invalid URI
1574
+ validate()
1437
1575
  end
1438
1576
 
1439
1577
  ##
@@ -1442,7 +1580,7 @@ module Addressable
1442
1580
  # @return [String] The path's basename.
1443
1581
  def basename
1444
1582
  # Path cannot be nil
1445
- return File.basename(self.path).gsub(/;[^\/]*$/, EMPTY_STR)
1583
+ return File.basename(self.path).sub(/;[^\/]*$/, EMPTY_STR)
1446
1584
  end
1447
1585
 
1448
1586
  ##
@@ -1460,7 +1598,7 @@ module Addressable
1460
1598
  #
1461
1599
  # @return [String] The query component.
1462
1600
  def query
1463
- return instance_variable_defined?(:@query) ? @query : nil
1601
+ return defined?(@query) ? @query : nil
1464
1602
  end
1465
1603
 
1466
1604
  ##
@@ -1468,15 +1606,23 @@ module Addressable
1468
1606
  #
1469
1607
  # @return [String] The query component, normalized.
1470
1608
  def normalized_query(*flags)
1471
- modified_query_class = Addressable::URI::CharacterClasses::QUERY.dup
1472
- # Make sure possible key-value pair delimiters are escaped.
1473
- modified_query_class.sub!("\\&", "").sub!("\\;", "")
1474
- pairs = (self.query || "").split("&", -1)
1475
- pairs.sort! if flags.include?(:sorted)
1476
- component = (pairs.map do |pair|
1477
- Addressable::URI.normalize_component(pair, modified_query_class, "+")
1478
- end).join("&")
1479
- component == "" ? nil : component
1609
+ return nil unless self.query
1610
+ return @normalized_query if defined?(@normalized_query)
1611
+ @normalized_query ||= begin
1612
+ modified_query_class = Addressable::URI::CharacterClasses::QUERY.dup
1613
+ # Make sure possible key-value pair delimiters are escaped.
1614
+ modified_query_class.sub!("\\&", "").sub!("\\;", "")
1615
+ pairs = (self.query || "").split("&", -1)
1616
+ pairs.delete_if(&:empty?) if flags.include?(:compacted)
1617
+ pairs.sort! if flags.include?(:sorted)
1618
+ component = pairs.map do |pair|
1619
+ Addressable::URI.normalize_component(pair, modified_query_class, "+")
1620
+ end.join("&")
1621
+ component == "" ? nil : component
1622
+ end
1623
+ # All normalized values should be UTF-8
1624
+ @normalized_query.force_encoding(Encoding::UTF_8) if @normalized_query
1625
+ @normalized_query
1480
1626
  end
1481
1627
 
1482
1628
  ##
@@ -1489,10 +1635,9 @@ module Addressable
1489
1635
  end
1490
1636
  @query = new_query ? new_query.to_str : nil
1491
1637
 
1492
- # Reset dependant values
1493
- @normalized_query = nil
1494
- @uri_string = nil
1495
- @hash = nil
1638
+ # Reset dependent values
1639
+ remove_instance_variable(:@normalized_query) if defined?(@normalized_query)
1640
+ remove_composite_values
1496
1641
  end
1497
1642
 
1498
1643
  ##
@@ -1501,7 +1646,8 @@ module Addressable
1501
1646
  # @param [Class] return_type The return type desired. Value must be either
1502
1647
  # `Hash` or `Array`.
1503
1648
  #
1504
- # @return [Hash, Array] The query string parsed as a Hash or Array object.
1649
+ # @return [Hash, Array, nil] The query string parsed as a Hash or Array
1650
+ # or nil if the query string is blank.
1505
1651
  #
1506
1652
  # @example
1507
1653
  # Addressable::URI.parse("?one=1&two=2&three=3").query_values
@@ -1510,15 +1656,19 @@ module Addressable
1510
1656
  # #=> [["one", "two"], ["one", "three"]]
1511
1657
  # Addressable::URI.parse("?one=two&one=three").query_values(Hash)
1512
1658
  # #=> {"one" => "three"}
1659
+ # Addressable::URI.parse("?").query_values
1660
+ # #=> {}
1661
+ # Addressable::URI.parse("").query_values
1662
+ # #=> nil
1513
1663
  def query_values(return_type=Hash)
1514
1664
  empty_accumulator = Array == return_type ? [] : {}
1515
1665
  if return_type != Hash && return_type != Array
1516
1666
  raise ArgumentError, "Invalid return type. Must be Hash or Array."
1517
1667
  end
1518
1668
  return nil if self.query == nil
1519
- split_query = (self.query.split("&").map do |pair|
1669
+ split_query = self.query.split("&").map do |pair|
1520
1670
  pair.split("=", 2) if pair && !pair.empty?
1521
- end).compact
1671
+ end.compact
1522
1672
  return split_query.inject(empty_accumulator.dup) do |accu, pair|
1523
1673
  # I'd rather use key/value identifiers instead of array lookups,
1524
1674
  # but in this case I really want to maintain the exact pair structure,
@@ -1529,7 +1679,7 @@ module Addressable
1529
1679
  # Treating '+' as a space was just an unbelievably bad idea.
1530
1680
  # There was nothing wrong with '%20'!
1531
1681
  # If it ain't broke, don't fix it!
1532
- pair[1] = URI.unencode_component(pair[1].to_str.gsub(/\+/, " "))
1682
+ pair[1] = URI.unencode_component(pair[1].to_str.tr("+", " "))
1533
1683
  end
1534
1684
  if return_type == Hash
1535
1685
  accu[pair[0]] = pair[1]
@@ -1581,7 +1731,7 @@ module Addressable
1581
1731
  end
1582
1732
 
1583
1733
  # new_query_values have form [['key1', 'value1'], ['key2', 'value2']]
1584
- buffer = ""
1734
+ buffer = "".dup
1585
1735
  new_query_values.each do |key, value|
1586
1736
  encoded_key = URI.encode_component(
1587
1737
  key, CharacterClasses::UNRESERVED
@@ -1611,7 +1761,7 @@ module Addressable
1611
1761
  #
1612
1762
  # @return [String] The request URI required for an HTTP request.
1613
1763
  def request_uri
1614
- return nil if self.absolute? && self.scheme !~ /^https?$/
1764
+ return nil if self.absolute? && self.scheme !~ /^https?$/i
1615
1765
  return (
1616
1766
  (!self.path.empty? ? self.path : SLASH) +
1617
1767
  (self.query ? "?#{self.query}" : EMPTY_STR)
@@ -1626,21 +1776,20 @@ module Addressable
1626
1776
  if !new_request_uri.respond_to?(:to_str)
1627
1777
  raise TypeError, "Can't convert #{new_request_uri.class} into String."
1628
1778
  end
1629
- if self.absolute? && self.scheme !~ /^https?$/
1779
+ if self.absolute? && self.scheme !~ /^https?$/i
1630
1780
  raise InvalidURIError,
1631
1781
  "Cannot set an HTTP request URI for a non-HTTP URI."
1632
1782
  end
1633
1783
  new_request_uri = new_request_uri.to_str
1634
- path_component = new_request_uri[/^([^\?]*)\?(?:.*)$/, 1]
1784
+ path_component = new_request_uri[/^([^\?]*)\??(?:.*)$/, 1]
1635
1785
  query_component = new_request_uri[/^(?:[^\?]*)\?(.*)$/, 1]
1636
1786
  path_component = path_component.to_s
1637
1787
  path_component = (!path_component.empty? ? path_component : SLASH)
1638
1788
  self.path = path_component
1639
1789
  self.query = query_component
1640
1790
 
1641
- # Reset dependant values
1642
- @uri_string = nil
1643
- @hash = nil
1791
+ # Reset dependent values
1792
+ remove_composite_values
1644
1793
  end
1645
1794
 
1646
1795
  ##
@@ -1648,7 +1797,7 @@ module Addressable
1648
1797
  #
1649
1798
  # @return [String] The fragment component.
1650
1799
  def fragment
1651
- return instance_variable_defined?(:@fragment) ? @fragment : nil
1800
+ return defined?(@fragment) ? @fragment : nil
1652
1801
  end
1653
1802
 
1654
1803
  ##
@@ -1656,13 +1805,20 @@ module Addressable
1656
1805
  #
1657
1806
  # @return [String] The fragment component, normalized.
1658
1807
  def normalized_fragment
1659
- self.fragment && @normalized_fragment ||= (begin
1808
+ return nil unless self.fragment
1809
+ return @normalized_fragment if defined?(@normalized_fragment)
1810
+ @normalized_fragment ||= begin
1660
1811
  component = Addressable::URI.normalize_component(
1661
1812
  self.fragment,
1662
1813
  Addressable::URI::CharacterClasses::FRAGMENT
1663
1814
  )
1664
1815
  component == "" ? nil : component
1665
- end)
1816
+ end
1817
+ # All normalized values should be UTF-8
1818
+ if @normalized_fragment
1819
+ @normalized_fragment.force_encoding(Encoding::UTF_8)
1820
+ end
1821
+ @normalized_fragment
1666
1822
  end
1667
1823
 
1668
1824
  ##
@@ -1675,10 +1831,9 @@ module Addressable
1675
1831
  end
1676
1832
  @fragment = new_fragment ? new_fragment.to_str : nil
1677
1833
 
1678
- # Reset dependant values
1679
- @normalized_fragment = nil
1680
- @uri_string = nil
1681
- @hash = nil
1834
+ # Reset dependent values
1835
+ remove_instance_variable(:@normalized_fragment) if defined?(@normalized_fragment)
1836
+ remove_composite_values
1682
1837
 
1683
1838
  # Ensure we haven't created an invalid URI
1684
1839
  validate()
@@ -1781,8 +1936,8 @@ module Addressable
1781
1936
  # Section 5.2.3 of RFC 3986
1782
1937
  #
1783
1938
  # Removes the right-most path segment from the base path.
1784
- if base_path =~ /\//
1785
- base_path.gsub!(/\/[^\/]+$/, SLASH)
1939
+ if base_path.include?(SLASH)
1940
+ base_path.sub!(/\/[^\/]+$/, SLASH)
1786
1941
  else
1787
1942
  base_path = EMPTY_STR
1788
1943
  end
@@ -2099,7 +2254,7 @@ module Addressable
2099
2254
  #
2100
2255
  # @return [Integer] A hash of the URI.
2101
2256
  def hash
2102
- return @hash ||= (self.to_s.hash * -1)
2257
+ @hash ||= self.to_s.hash * -1
2103
2258
  end
2104
2259
 
2105
2260
  ##
@@ -2182,18 +2337,16 @@ module Addressable
2182
2337
  raise InvalidURIError,
2183
2338
  "Cannot assemble URI string with ambiguous path: '#{self.path}'"
2184
2339
  end
2185
- @uri_string ||= (begin
2186
- uri_string = ""
2340
+ @uri_string ||= begin
2341
+ uri_string = String.new
2187
2342
  uri_string << "#{self.scheme}:" if self.scheme != nil
2188
2343
  uri_string << "//#{self.authority}" if self.authority != nil
2189
2344
  uri_string << self.path.to_s
2190
2345
  uri_string << "?#{self.query}" if self.query != nil
2191
2346
  uri_string << "##{self.fragment}" if self.fragment != nil
2192
- if uri_string.respond_to?(:force_encoding)
2193
- uri_string.force_encoding(Encoding::UTF_8)
2194
- end
2347
+ uri_string.force_encoding(Encoding::UTF_8)
2195
2348
  uri_string
2196
- end)
2349
+ end
2197
2350
  end
2198
2351
 
2199
2352
  ##
@@ -2233,16 +2386,16 @@ module Addressable
2233
2386
  #
2234
2387
  # @param [Proc] block
2235
2388
  # A set of operations to perform on a given URI.
2236
- def defer_validation(&block)
2237
- raise LocalJumpError, "No block given." unless block
2389
+ def defer_validation
2390
+ raise LocalJumpError, "No block given." unless block_given?
2238
2391
  @validation_deferred = true
2239
- block.call()
2392
+ yield
2240
2393
  @validation_deferred = false
2241
2394
  validate
2242
2395
  return nil
2243
2396
  end
2244
2397
 
2245
- private
2398
+ protected
2246
2399
  SELF_REF = '.'
2247
2400
  PARENT = '..'
2248
2401
 
@@ -2308,6 +2461,19 @@ module Addressable
2308
2461
  raise InvalidURIError,
2309
2462
  "Cannot have a relative path with an authority set: '#{self.to_s}'"
2310
2463
  end
2464
+ if self.path != nil && !self.path.empty? &&
2465
+ self.path[0..1] == SLASH + SLASH && self.authority == nil
2466
+ raise InvalidURIError,
2467
+ "Cannot have a path with two leading slashes " +
2468
+ "without an authority set: '#{self.to_s}'"
2469
+ end
2470
+ unreserved = CharacterClasses::UNRESERVED
2471
+ sub_delims = CharacterClasses::SUB_DELIMS
2472
+ if !self.host.nil? && (self.host =~ /[<>{}\/\\\?\#\@"[[:space:]]]/ ||
2473
+ (self.host[/^\[(.*)\]$/, 1] != nil && self.host[/^\[(.*)\]$/, 1] !~
2474
+ Regexp.new("^[#{unreserved}#{sub_delims}:]*$")))
2475
+ raise InvalidURIError, "Invalid character in host: '#{self.host.to_s}'"
2476
+ end
2311
2477
  return nil
2312
2478
  end
2313
2479
 
@@ -2319,9 +2485,11 @@ module Addressable
2319
2485
  #
2320
2486
  # @return [Addressable::URI] <code>self</code>.
2321
2487
  def replace_self(uri)
2322
- # Reset dependant values
2488
+ # Reset dependent values
2323
2489
  instance_variables.each do |var|
2324
- instance_variable_set(var, nil)
2490
+ if instance_variable_defined?(var) && var != :@validation_deferred
2491
+ remove_instance_variable(var)
2492
+ end
2325
2493
  end
2326
2494
 
2327
2495
  @scheme = uri.scheme
@@ -2336,7 +2504,7 @@ module Addressable
2336
2504
  end
2337
2505
 
2338
2506
  ##
2339
- # Splits path string with "/"(slash).
2507
+ # Splits path string with "/" (slash).
2340
2508
  # It is considered that there is empty string after last slash when
2341
2509
  # path ends with slash.
2342
2510
  #
@@ -2348,5 +2516,14 @@ module Addressable
2348
2516
  splitted << EMPTY_STR if path.end_with? SLASH
2349
2517
  splitted
2350
2518
  end
2519
+
2520
+ ##
2521
+ # Resets composite values for the entire URI
2522
+ #
2523
+ # @api private
2524
+ def remove_composite_values
2525
+ remove_instance_variable(:@uri_string) if defined?(@uri_string)
2526
+ remove_instance_variable(:@hash) if defined?(@hash)
2527
+ end
2351
2528
  end
2352
2529
  end