addressable 2.3.3 → 2.8.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
- # encoding:utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  #--
3
- # Copyright (C) 2006-2011 Bob Aman
4
+ # Copyright (C) Bob Aman
4
5
  #
5
6
  # Licensed under the Apache License, Version 2.0 (the "License");
6
7
  # you may not use this file except in compliance with the License.
@@ -18,6 +19,7 @@
18
19
 
19
20
  require "addressable/version"
20
21
  require "addressable/idna"
22
+ require "public_suffix"
21
23
 
22
24
  ##
23
25
  # Addressable is a library for processing links and URIs.
@@ -35,19 +37,48 @@ module Addressable
35
37
  ##
36
38
  # Container for the character classes specified in
37
39
  # <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.
40
+ #
41
+ # Note: Concatenated and interpolated `String`s are not affected by the
42
+ # `frozen_string_literal` directive and must be frozen explicitly.
43
+ #
44
+ # Interpolated `String`s *were* frozen this way before Ruby 3.0:
45
+ # https://bugs.ruby-lang.org/issues/17104
38
46
  module CharacterClasses
39
47
  ALPHA = "a-zA-Z"
40
48
  DIGIT = "0-9"
41
49
  GEN_DELIMS = "\\:\\/\\?\\#\\[\\]\\@"
42
50
  SUB_DELIMS = "\\!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\="
43
- RESERVED = GEN_DELIMS + SUB_DELIMS
44
- UNRESERVED = ALPHA + DIGIT + "\\-\\.\\_\\~"
45
- PCHAR = UNRESERVED + SUB_DELIMS + "\\:\\@"
46
- SCHEME = ALPHA + DIGIT + "\\-\\+\\."
47
- AUTHORITY = PCHAR
48
- PATH = PCHAR + "\\/"
49
- QUERY = PCHAR + "\\/\\?"
50
- FRAGMENT = PCHAR + "\\/\\?"
51
+ RESERVED = (GEN_DELIMS + SUB_DELIMS).freeze
52
+ UNRESERVED = (ALPHA + DIGIT + "\\-\\.\\_\\~").freeze
53
+ RESERVED_AND_UNRESERVED = RESERVED + UNRESERVED
54
+ PCHAR = (UNRESERVED + SUB_DELIMS + "\\:\\@").freeze
55
+ SCHEME = (ALPHA + DIGIT + "\\-\\+\\.").freeze
56
+ HOST = (UNRESERVED + SUB_DELIMS + "\\[\\:\\]").freeze
57
+ AUTHORITY = (PCHAR + "\\[\\]").freeze
58
+ PATH = (PCHAR + "\\/").freeze
59
+ QUERY = (PCHAR + "\\/\\?").freeze
60
+ FRAGMENT = (PCHAR + "\\/\\?").freeze
61
+ end
62
+
63
+ module NormalizeCharacterClasses
64
+ HOST = /[^#{CharacterClasses::HOST}]/
65
+ UNRESERVED = /[^#{CharacterClasses::UNRESERVED}]/
66
+ PCHAR = /[^#{CharacterClasses::PCHAR}]/
67
+ SCHEME = /[^#{CharacterClasses::SCHEME}]/
68
+ FRAGMENT = /[^#{CharacterClasses::FRAGMENT}]/
69
+ QUERY = %r{[^a-zA-Z0-9\-\.\_\~\!\$\'\(\)\*\+\,\=\:\@\/\?%]|%(?!2B|2b)}
70
+ end
71
+
72
+ module CharacterClassesRegexps
73
+ AUTHORITY = /[^#{CharacterClasses::AUTHORITY}]/
74
+ FRAGMENT = /[^#{CharacterClasses::FRAGMENT}]/
75
+ HOST = /[^#{CharacterClasses::HOST}]/
76
+ PATH = /[^#{CharacterClasses::PATH}]/
77
+ QUERY = /[^#{CharacterClasses::QUERY}]/
78
+ RESERVED = /[^#{CharacterClasses::RESERVED}]/
79
+ RESERVED_AND_UNRESERVED = /[^#{CharacterClasses::RESERVED_AND_UNRESERVED}]/
80
+ SCHEME = /[^#{CharacterClasses::SCHEME}]/
81
+ UNRESERVED = /[^#{CharacterClasses::UNRESERVED}]/
51
82
  end
52
83
 
53
84
  SLASH = '/'
@@ -69,7 +100,7 @@ module Addressable
69
100
  "wais" => 210,
70
101
  "ldap" => 389,
71
102
  "prospero" => 1525
72
- }
103
+ }.freeze
73
104
 
74
105
  ##
75
106
  # Returns a URI object based on the parsed string.
@@ -99,7 +130,7 @@ module Addressable
99
130
  uri = uri.to_str
100
131
  rescue TypeError, NoMethodError
101
132
  raise TypeError, "Can't convert #{uri.class} into String."
102
- end if not uri.is_a? String
133
+ end unless uri.is_a?(String)
103
134
 
104
135
  # This Regexp supplied as an example in RFC 3986, and it works great.
105
136
  scan = uri.scan(URIREGEX)
@@ -120,15 +151,15 @@ module Addressable
120
151
  user = userinfo.strip[/^([^:]*):?/, 1]
121
152
  password = userinfo.strip[/:(.*)$/, 1]
122
153
  end
123
- host = authority.gsub(
154
+
155
+ host = authority.sub(
124
156
  /^([^\[\]]*)@/, EMPTY_STR
125
- ).gsub(
157
+ ).sub(
126
158
  /:([^:@\[\]]*?)$/, EMPTY_STR
127
159
  )
160
+
128
161
  port = authority[/:([^:@\[\]]*?)$/, 1]
129
- end
130
- if port == EMPTY_STR
131
- port = nil
162
+ port = nil if port == EMPTY_STR
132
163
  end
133
164
 
134
165
  return new(
@@ -162,37 +193,63 @@ module Addressable
162
193
  return nil unless uri
163
194
  # If a URI object is passed, just return itself.
164
195
  return uri.dup if uri.kind_of?(self)
165
- if !uri.respond_to?(:to_str)
196
+
197
+ # If a URI object of the Ruby standard library variety is passed,
198
+ # convert it to a string, then parse the string.
199
+ # We do the check this way because we don't want to accidentally
200
+ # cause a missing constant exception to be thrown.
201
+ if uri.class.name =~ /^URI\b/
202
+ uri = uri.to_s
203
+ end
204
+
205
+ unless uri.respond_to?(:to_str)
166
206
  raise TypeError, "Can't convert #{uri.class} into String."
167
207
  end
168
208
  # Otherwise, convert to a String
169
- uri = uri.to_str.dup
209
+ uri = uri.to_str.dup.strip
170
210
  hints = {
171
211
  :scheme => "http"
172
212
  }.merge(hints)
173
213
  case uri
174
- when /^http:\/+/
175
- uri.gsub!(/^http:\/+/, "http://")
176
- when /^https:\/+/
177
- uri.gsub!(/^https:\/+/, "https://")
178
- when /^feed:\/+http:\/+/
179
- uri.gsub!(/^feed:\/+http:\/+/, "feed:http://")
180
- when /^feed:\/+/
181
- uri.gsub!(/^feed:\/+/, "feed://")
182
- when /^file:\/+/
183
- uri.gsub!(/^file:\/+/, "file:///")
214
+ when /^http:\//i
215
+ uri.sub!(/^http:\/+/i, "http://")
216
+ when /^https:\//i
217
+ uri.sub!(/^https:\/+/i, "https://")
218
+ when /^feed:\/+http:\//i
219
+ uri.sub!(/^feed:\/+http:\/+/i, "feed:http://")
220
+ when /^feed:\//i
221
+ uri.sub!(/^feed:\/+/i, "feed://")
222
+ when %r[^file:/{4}]i
223
+ uri.sub!(%r[^file:/+]i, "file:////")
224
+ when %r[^file://localhost/]i
225
+ uri.sub!(%r[^file://localhost/+]i, "file:///")
226
+ when %r[^file:/+]i
227
+ uri.sub!(%r[^file:/+]i, "file:///")
184
228
  when /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/
185
- uri.gsub!(/^/, hints[:scheme] + "://")
229
+ uri.sub!(/^/, hints[:scheme] + "://")
230
+ when /\A\d+\..*:\d+\z/
231
+ uri = "#{hints[:scheme]}://#{uri}"
232
+ end
233
+ match = uri.match(URIREGEX)
234
+ fragments = match.captures
235
+ authority = fragments[3]
236
+ if authority && authority.length > 0
237
+ new_authority = authority.tr("\\", "/").gsub(" ", "%20")
238
+ # NOTE: We want offset 4, not 3!
239
+ offset = match.offset(4)
240
+ uri = uri.dup
241
+ uri[offset[0]...offset[1]] = new_authority
186
242
  end
187
243
  parsed = self.parse(uri)
188
244
  if parsed.scheme =~ /^[^\/?#\.]+\.[^\/?#]+$/
189
245
  parsed = self.parse(hints[:scheme] + "://" + uri)
190
246
  end
191
247
  if parsed.path.include?(".")
192
- new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1]
193
- if new_host
248
+ if parsed.path[/\b@\b/]
249
+ parsed.scheme = "mailto" unless parsed.scheme
250
+ elsif new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1]
194
251
  parsed.defer_validation do
195
- new_path = parsed.path.gsub(
252
+ new_path = parsed.path.sub(
196
253
  Regexp.new("^" + Regexp.escape(new_host)), EMPTY_STR)
197
254
  parsed.host = new_host
198
255
  parsed.path = new_path
@@ -237,30 +294,30 @@ module Addressable
237
294
  return nil unless path
238
295
  # If a URI object is passed, just return itself.
239
296
  return path if path.kind_of?(self)
240
- if !path.respond_to?(:to_str)
297
+ unless path.respond_to?(:to_str)
241
298
  raise TypeError, "Can't convert #{path.class} into String."
242
299
  end
243
300
  # Otherwise, convert to a String
244
301
  path = path.to_str.strip
245
302
 
246
- path.gsub!(/^file:\/?\/?/, EMPTY_STR) if path =~ /^file:\/?\/?/
303
+ path.sub!(/^file:\/?\/?/, EMPTY_STR) if path =~ /^file:\/?\/?/
247
304
  path = SLASH + path if path =~ /^([a-zA-Z])[\|:]/
248
305
  uri = self.parse(path)
249
306
 
250
307
  if uri.scheme == nil
251
308
  # Adjust windows-style uris
252
- uri.path.gsub!(/^\/?([a-zA-Z])[\|:][\\\/]/) do
309
+ uri.path.sub!(/^\/?([a-zA-Z])[\|:][\\\/]/) do
253
310
  "/#{$1.downcase}:/"
254
311
  end
255
- uri.path.gsub!(/\\/, SLASH)
256
- if File.exists?(uri.path) &&
312
+ uri.path.tr!("\\", SLASH)
313
+ if File.exist?(uri.path) &&
257
314
  File.stat(uri.path).directory?
258
- uri.path.gsub!(/\/$/, EMPTY_STR)
315
+ uri.path.chomp!(SLASH)
259
316
  uri.path = uri.path + '/'
260
317
  end
261
318
 
262
319
  # If the path is absolute, set the scheme and host.
263
- if uri.path =~ /^\//
320
+ if uri.path.start_with?(SLASH)
264
321
  uri.scheme = "file"
265
322
  uri.host = EMPTY_STR
266
323
  end
@@ -285,18 +342,29 @@ module Addressable
285
342
  # #=> #<Addressable::URI:0xcab390 URI:http://example.com/relative/path>
286
343
  def self.join(*uris)
287
344
  uri_objects = uris.collect do |uri|
288
- if !uri.respond_to?(:to_str)
345
+ unless uri.respond_to?(:to_str)
289
346
  raise TypeError, "Can't convert #{uri.class} into String."
290
347
  end
291
348
  uri.kind_of?(self) ? uri : self.parse(uri.to_str)
292
349
  end
293
350
  result = uri_objects.shift.dup
294
- for uri in uri_objects
351
+ uri_objects.each do |uri|
295
352
  result.join!(uri)
296
353
  end
297
354
  return result
298
355
  end
299
356
 
357
+ ##
358
+ # Tables used to optimize encoding operations in `self.encode_component`
359
+ # and `self.normalize_component`
360
+ SEQUENCE_ENCODING_TABLE = (0..255).map do |byte|
361
+ format("%02x", byte).freeze
362
+ end.freeze
363
+
364
+ SEQUENCE_UPCASED_PERCENT_ENCODING_TABLE = (0..255).map do |byte|
365
+ format("%%%02X", byte).freeze
366
+ end.freeze
367
+
300
368
  ##
301
369
  # Percent encodes a URI component.
302
370
  #
@@ -332,9 +400,7 @@ module Addressable
332
400
  # "simple/example", Addressable::URI::CharacterClasses::UNRESERVED
333
401
  # )
334
402
  # => "simple%2Fexample"
335
- def self.encode_component(component, character_class=
336
- CharacterClasses::RESERVED + CharacterClasses::UNRESERVED,
337
- upcase_encoded='')
403
+ def self.encode_component(component, character_class=CharacterClassesRegexps::RESERVED_AND_UNRESERVED, upcase_encoded='')
338
404
  return nil if component.nil?
339
405
 
340
406
  begin
@@ -357,25 +423,27 @@ module Addressable
357
423
  if character_class.kind_of?(String)
358
424
  character_class = /[^#{character_class}]/
359
425
  end
360
- if component.respond_to?(:force_encoding)
361
- # We can't perform regexps on invalid UTF sequences, but
362
- # here we need to, so switch to ASCII.
363
- component = component.dup
364
- component.force_encoding(Encoding::ASCII_8BIT)
365
- end
366
- component.gsub!(character_class) do |sequence|
367
- (sequence.unpack('C*').map { |c| "%" + ("%02x" % c).upcase }).join
426
+ # We can't perform regexps on invalid UTF sequences, but
427
+ # here we need to, so switch to ASCII.
428
+ component = component.dup
429
+ component.force_encoding(Encoding::ASCII_8BIT)
430
+ # Avoiding gsub! because there are edge cases with frozen strings
431
+ component = component.gsub(character_class) do |char|
432
+ SEQUENCE_UPCASED_PERCENT_ENCODING_TABLE[char.ord]
368
433
  end
369
434
  if upcase_encoded.length > 0
370
- component.gsub!(/%(#{upcase_encoded.chars.map do |char|
371
- char.unpack('C*').map { |c| '%02x' % c }.join
372
- end.join('|')})/i) { |s| s.upcase }
435
+ upcase_encoded_chars = upcase_encoded.bytes.map do |byte|
436
+ SEQUENCE_ENCODING_TABLE[byte]
437
+ end
438
+ component = component.gsub(/%(#{upcase_encoded_chars.join('|')})/,
439
+ &:upcase)
373
440
  end
441
+
374
442
  return component
375
443
  end
376
444
 
377
445
  class << self
378
- alias_method :encode_component, :encode_component
446
+ alias_method :escape_component, :encode_component
379
447
  end
380
448
 
381
449
  ##
@@ -414,11 +482,14 @@ module Addressable
414
482
  "Expected Class (String or Addressable::URI), " +
415
483
  "got #{return_type.inspect}"
416
484
  end
485
+
417
486
  result = uri.gsub(/%[0-9a-f]{2}/i) do |sequence|
418
487
  c = sequence[1..3].to_i(16).chr
488
+ c.force_encoding(sequence.encoding)
419
489
  leave_encoded.include?(c) ? sequence : c
420
490
  end
421
- result.force_encoding("utf-8") if result.respond_to?(:force_encoding)
491
+
492
+ result.force_encoding(Encoding::UTF_8)
422
493
  if return_type == String
423
494
  return result
424
495
  elsif return_type == ::Addressable::URI
@@ -479,7 +550,7 @@ module Addressable
479
550
  # )
480
551
  # => "one two%2Fthree&four"
481
552
  def self.normalize_component(component, character_class=
482
- CharacterClasses::RESERVED + CharacterClasses::UNRESERVED,
553
+ CharacterClassesRegexps::RESERVED_AND_UNRESERVED,
483
554
  leave_encoded='')
484
555
  return nil if component.nil?
485
556
 
@@ -495,32 +566,34 @@ module Addressable
495
566
  end
496
567
  if character_class.kind_of?(String)
497
568
  leave_re = if leave_encoded.length > 0
498
- character_class << '%'
569
+ character_class = "#{character_class}%" unless character_class.include?('%')
499
570
 
500
- "|%(?!#{leave_encoded.chars.map do |char|
501
- seq = char.unpack('C*').map { |c| '%02x' % c }.join
502
- [seq.upcase, seq.downcase]
503
- end.flatten.join('|')})"
571
+ bytes = leave_encoded.bytes
572
+ leave_encoded_pattern = bytes.map { |b| SEQUENCE_ENCODING_TABLE[b] }.join('|')
573
+ "|%(?!#{leave_encoded_pattern}|#{leave_encoded_pattern.upcase})"
504
574
  end
505
575
 
506
- character_class = /[^#{character_class}]#{leave_re}/
507
- end
508
- if component.respond_to?(:force_encoding)
509
- # We can't perform regexps on invalid UTF sequences, but
510
- # here we need to, so switch to ASCII.
511
- component = component.dup
512
- component.force_encoding(Encoding::ASCII_8BIT)
576
+ character_class = if leave_re
577
+ /[^#{character_class}]#{leave_re}/
578
+ else
579
+ /[^#{character_class}]/
580
+ end
513
581
  end
582
+ # We can't perform regexps on invalid UTF sequences, but
583
+ # here we need to, so switch to ASCII.
584
+ component = component.dup
585
+ component.force_encoding(Encoding::ASCII_8BIT)
514
586
  unencoded = self.unencode_component(component, String, leave_encoded)
515
587
  begin
516
588
  encoded = self.encode_component(
517
- Addressable::IDNA.unicode_normalize_kc(unencoded),
589
+ unencoded.unicode_normalize(:nfc),
518
590
  character_class,
519
591
  leave_encoded
520
592
  )
521
593
  rescue ArgumentError
522
594
  encoded = self.encode_component(unencoded)
523
595
  end
596
+ encoded.force_encoding(Encoding::UTF_8)
524
597
  return encoded
525
598
  end
526
599
 
@@ -557,15 +630,15 @@ module Addressable
557
630
  uri_object = uri.kind_of?(self) ? uri : self.parse(uri)
558
631
  encoded_uri = Addressable::URI.new(
559
632
  :scheme => self.encode_component(uri_object.scheme,
560
- Addressable::URI::CharacterClasses::SCHEME),
633
+ Addressable::URI::CharacterClassesRegexps::SCHEME),
561
634
  :authority => self.encode_component(uri_object.authority,
562
- Addressable::URI::CharacterClasses::AUTHORITY),
635
+ Addressable::URI::CharacterClassesRegexps::AUTHORITY),
563
636
  :path => self.encode_component(uri_object.path,
564
- Addressable::URI::CharacterClasses::PATH),
637
+ Addressable::URI::CharacterClassesRegexps::PATH),
565
638
  :query => self.encode_component(uri_object.query,
566
- Addressable::URI::CharacterClasses::QUERY),
639
+ Addressable::URI::CharacterClassesRegexps::QUERY),
567
640
  :fragment => self.encode_component(uri_object.fragment,
568
- Addressable::URI::CharacterClasses::FRAGMENT)
641
+ Addressable::URI::CharacterClassesRegexps::FRAGMENT)
569
642
  )
570
643
  if return_type == String
571
644
  return encoded_uri.to_s
@@ -621,8 +694,7 @@ module Addressable
621
694
  components.each do |key, value|
622
695
  if value != nil
623
696
  begin
624
- components[key] =
625
- Addressable::IDNA.unicode_normalize_kc(value.to_str)
697
+ components[key] = value.to_str.unicode_normalize(:nfc)
626
698
  rescue ArgumentError
627
699
  # Likely a malformed UTF-8 character, skip unicode normalization
628
700
  components[key] = value.to_str
@@ -631,19 +703,19 @@ module Addressable
631
703
  end
632
704
  encoded_uri = Addressable::URI.new(
633
705
  :scheme => self.encode_component(components[:scheme],
634
- Addressable::URI::CharacterClasses::SCHEME),
706
+ Addressable::URI::CharacterClassesRegexps::SCHEME),
635
707
  :user => self.encode_component(components[:user],
636
- Addressable::URI::CharacterClasses::UNRESERVED),
708
+ Addressable::URI::CharacterClassesRegexps::UNRESERVED),
637
709
  :password => self.encode_component(components[:password],
638
- Addressable::URI::CharacterClasses::UNRESERVED),
710
+ Addressable::URI::CharacterClassesRegexps::UNRESERVED),
639
711
  :host => components[:host],
640
712
  :port => components[:port],
641
713
  :path => self.encode_component(components[:path],
642
- Addressable::URI::CharacterClasses::PATH),
714
+ Addressable::URI::CharacterClassesRegexps::PATH),
643
715
  :query => self.encode_component(components[:query],
644
- Addressable::URI::CharacterClasses::QUERY),
716
+ Addressable::URI::CharacterClassesRegexps::QUERY),
645
717
  :fragment => self.encode_component(components[:fragment],
646
- Addressable::URI::CharacterClasses::FRAGMENT)
718
+ Addressable::URI::CharacterClassesRegexps::FRAGMENT)
647
719
  )
648
720
  if return_type == String
649
721
  return encoded_uri.to_s
@@ -694,17 +766,17 @@ module Addressable
694
766
  [
695
767
  self.encode_component(
696
768
  key.gsub(/(\r\n|\n|\r)/, "\r\n"),
697
- CharacterClasses::UNRESERVED
769
+ CharacterClassesRegexps::UNRESERVED
698
770
  ).gsub("%20", "+"),
699
771
  self.encode_component(
700
772
  value.gsub(/(\r\n|\n|\r)/, "\r\n"),
701
- CharacterClasses::UNRESERVED
773
+ CharacterClassesRegexps::UNRESERVED
702
774
  ).gsub("%20", "+")
703
775
  ]
704
776
  end
705
- return (escaped_form_values.map do |(key, value)|
777
+ return escaped_form_values.map do |(key, value)|
706
778
  "#{key}=#{value}"
707
- end).join("&")
779
+ end.join("&")
708
780
  end
709
781
 
710
782
  ##
@@ -770,7 +842,9 @@ module Addressable
770
842
  end
771
843
  end
772
844
 
773
- self.defer_validation do
845
+ reset_ivs
846
+
847
+ defer_validation do
774
848
  # Bunch of crazy logic required because of the composite components
775
849
  # like userinfo and authority.
776
850
  self.scheme = options[:scheme] if options[:scheme]
@@ -785,6 +859,8 @@ module Addressable
785
859
  self.query_values = options[:query_values] if options[:query_values]
786
860
  self.fragment = options[:fragment] if options[:fragment]
787
861
  end
862
+
863
+ to_s # force path validation
788
864
  end
789
865
 
790
866
  ##
@@ -811,25 +887,27 @@ module Addressable
811
887
  # The scheme component for this URI.
812
888
  #
813
889
  # @return [String] The scheme component.
814
- def scheme
815
- return instance_variable_defined?(:@scheme) ? @scheme : nil
816
- end
890
+ attr_reader :scheme
817
891
 
818
892
  ##
819
893
  # The scheme component for this URI, normalized.
820
894
  #
821
895
  # @return [String] The scheme component, normalized.
822
896
  def normalized_scheme
823
- self.scheme && @normalized_scheme ||= (begin
824
- if self.scheme =~ /^\s*ssh\+svn\s*$/i
825
- "svn+ssh"
897
+ return nil unless self.scheme
898
+ if @normalized_scheme == NONE
899
+ @normalized_scheme = if self.scheme =~ /^\s*ssh\+svn\s*$/i
900
+ "svn+ssh".dup
826
901
  else
827
902
  Addressable::URI.normalize_component(
828
903
  self.scheme.strip.downcase,
829
- Addressable::URI::CharacterClasses::SCHEME
904
+ Addressable::URI::NormalizeCharacterClasses::SCHEME
830
905
  )
831
906
  end
832
- end)
907
+ end
908
+ # All normalized values should be UTF-8
909
+ force_utf8_encoding_if_needed(@normalized_scheme)
910
+ @normalized_scheme
833
911
  end
834
912
 
835
913
  ##
@@ -842,16 +920,15 @@ module Addressable
842
920
  elsif new_scheme
843
921
  new_scheme = new_scheme.to_str
844
922
  end
845
- if new_scheme && new_scheme !~ /[a-z][a-z0-9\.\+\-]*/i
846
- raise InvalidURIError, "Invalid scheme format."
923
+ if new_scheme && new_scheme !~ /\A[a-z][a-z0-9\.\+\-]*\z/i
924
+ raise InvalidURIError, "Invalid scheme format: '#{new_scheme}'"
847
925
  end
848
926
  @scheme = new_scheme
849
927
  @scheme = nil if @scheme.to_s.strip.empty?
850
928
 
851
- # Reset dependant values
852
- @normalized_scheme = nil
853
- @uri_string = nil
854
- @hash = nil
929
+ # Reset dependent values
930
+ @normalized_scheme = NONE
931
+ remove_composite_values
855
932
 
856
933
  # Ensure we haven't created an invalid URI
857
934
  validate()
@@ -861,26 +938,29 @@ module Addressable
861
938
  # The user component for this URI.
862
939
  #
863
940
  # @return [String] The user component.
864
- def user
865
- return instance_variable_defined?(:@user) ? @user : nil
866
- end
941
+ attr_reader :user
867
942
 
868
943
  ##
869
944
  # The user component for this URI, normalized.
870
945
  #
871
946
  # @return [String] The user component, normalized.
872
947
  def normalized_user
873
- self.user && @normalized_user ||= (begin
948
+ return nil unless self.user
949
+ return @normalized_user unless @normalized_user == NONE
950
+ @normalized_user = begin
874
951
  if normalized_scheme =~ /https?/ && self.user.strip.empty? &&
875
952
  (!self.password || self.password.strip.empty?)
876
953
  nil
877
954
  else
878
955
  Addressable::URI.normalize_component(
879
956
  self.user.strip,
880
- Addressable::URI::CharacterClasses::UNRESERVED
957
+ Addressable::URI::NormalizeCharacterClasses::UNRESERVED
881
958
  )
882
959
  end
883
- end)
960
+ end
961
+ # All normalized values should be UTF-8
962
+ force_utf8_encoding_if_needed(@normalized_user)
963
+ @normalized_user
884
964
  end
885
965
 
886
966
  ##
@@ -895,16 +975,15 @@ module Addressable
895
975
 
896
976
  # You can't have a nil user with a non-nil password
897
977
  if password != nil
898
- @user = EMPTY_STR if @user.nil?
978
+ @user = EMPTY_STR unless user
899
979
  end
900
980
 
901
- # Reset dependant values
981
+ # Reset dependent values
902
982
  @userinfo = nil
903
- @normalized_userinfo = nil
983
+ @normalized_userinfo = NONE
904
984
  @authority = nil
905
- @normalized_user = nil
906
- @uri_string = nil
907
- @hash = nil
985
+ @normalized_user = NONE
986
+ remove_composite_values
908
987
 
909
988
  # Ensure we haven't created an invalid URI
910
989
  validate()
@@ -914,26 +993,29 @@ module Addressable
914
993
  # The password component for this URI.
915
994
  #
916
995
  # @return [String] The password component.
917
- def password
918
- return instance_variable_defined?(:@password) ? @password : nil
919
- end
996
+ attr_reader :password
920
997
 
921
998
  ##
922
999
  # The password component for this URI, normalized.
923
1000
  #
924
1001
  # @return [String] The password component, normalized.
925
1002
  def normalized_password
926
- self.password && @normalized_password ||= (begin
1003
+ return nil unless self.password
1004
+ return @normalized_password unless @normalized_password == NONE
1005
+ @normalized_password = begin
927
1006
  if self.normalized_scheme =~ /https?/ && self.password.strip.empty? &&
928
1007
  (!self.user || self.user.strip.empty?)
929
1008
  nil
930
1009
  else
931
1010
  Addressable::URI.normalize_component(
932
1011
  self.password.strip,
933
- Addressable::URI::CharacterClasses::UNRESERVED
1012
+ Addressable::URI::NormalizeCharacterClasses::UNRESERVED
934
1013
  )
935
1014
  end
936
- end)
1015
+ end
1016
+ # All normalized values should be UTF-8
1017
+ force_utf8_encoding_if_needed(@normalized_password)
1018
+ @normalized_password
937
1019
  end
938
1020
 
939
1021
  ##
@@ -947,19 +1029,16 @@ module Addressable
947
1029
  @password = new_password ? new_password.to_str : nil
948
1030
 
949
1031
  # You can't have a nil user with a non-nil password
950
- @password ||= nil
951
- @user ||= nil
952
1032
  if @password != nil
953
- @user = EMPTY_STR if @user.nil?
1033
+ self.user = EMPTY_STR if user.nil?
954
1034
  end
955
1035
 
956
- # Reset dependant values
1036
+ # Reset dependent values
957
1037
  @userinfo = nil
958
- @normalized_userinfo = nil
1038
+ @normalized_userinfo = NONE
959
1039
  @authority = nil
960
- @normalized_password = nil
961
- @uri_string = nil
962
- @hash = nil
1040
+ @normalized_password = NONE
1041
+ remove_composite_values
963
1042
 
964
1043
  # Ensure we haven't created an invalid URI
965
1044
  validate()
@@ -973,13 +1052,13 @@ module Addressable
973
1052
  def userinfo
974
1053
  current_user = self.user
975
1054
  current_password = self.password
976
- (current_user || current_password) && @userinfo ||= (begin
1055
+ (current_user || current_password) && @userinfo ||= begin
977
1056
  if current_user && current_password
978
1057
  "#{current_user}:#{current_password}"
979
1058
  elsif current_user && !current_password
980
1059
  "#{current_user}"
981
1060
  end
982
- end)
1061
+ end
983
1062
  end
984
1063
 
985
1064
  ##
@@ -987,17 +1066,22 @@ module Addressable
987
1066
  #
988
1067
  # @return [String] The userinfo component, normalized.
989
1068
  def normalized_userinfo
990
- self.userinfo && @normalized_userinfo ||= (begin
1069
+ return nil unless self.userinfo
1070
+ return @normalized_userinfo unless @normalized_userinfo == NONE
1071
+ @normalized_userinfo = begin
991
1072
  current_user = self.normalized_user
992
1073
  current_password = self.normalized_password
993
1074
  if !current_user && !current_password
994
1075
  nil
995
1076
  elsif current_user && current_password
996
- "#{current_user}:#{current_password}"
1077
+ "#{current_user}:#{current_password}".dup
997
1078
  elsif current_user && !current_password
998
- "#{current_user}"
1079
+ "#{current_user}".dup
999
1080
  end
1000
- end)
1081
+ end
1082
+ # All normalized values should be UTF-8
1083
+ force_utf8_encoding_if_needed(@normalized_userinfo)
1084
+ @normalized_userinfo
1001
1085
  end
1002
1086
 
1003
1087
  ##
@@ -1021,10 +1105,9 @@ module Addressable
1021
1105
  self.password = new_password
1022
1106
  self.user = new_user
1023
1107
 
1024
- # Reset dependant values
1108
+ # Reset dependent values
1025
1109
  @authority = nil
1026
- @uri_string = nil
1027
- @hash = nil
1110
+ remove_composite_values
1028
1111
 
1029
1112
  # Ensure we haven't created an invalid URI
1030
1113
  validate()
@@ -1034,29 +1117,36 @@ module Addressable
1034
1117
  # The host component for this URI.
1035
1118
  #
1036
1119
  # @return [String] The host component.
1037
- def host
1038
- return instance_variable_defined?(:@host) ? @host : nil
1039
- end
1120
+ attr_reader :host
1040
1121
 
1041
1122
  ##
1042
1123
  # The host component for this URI, normalized.
1043
1124
  #
1044
1125
  # @return [String] The host component, normalized.
1045
1126
  def normalized_host
1046
- self.host && @normalized_host ||= (begin
1127
+ return nil unless self.host
1128
+
1129
+ @normalized_host ||= begin
1047
1130
  if !self.host.strip.empty?
1048
1131
  result = ::Addressable::IDNA.to_ascii(
1049
1132
  URI.unencode_component(self.host.strip.downcase)
1050
1133
  )
1051
- if result[-1..-1] == "."
1052
- # Trailing dots are unnecessary
1134
+ if result =~ /[^\.]\.$/
1135
+ # Single trailing dots are unnecessary.
1053
1136
  result = result[0...-1]
1054
1137
  end
1138
+ result = Addressable::URI.normalize_component(
1139
+ result,
1140
+ NormalizeCharacterClasses::HOST
1141
+ )
1055
1142
  result
1056
1143
  else
1057
- EMPTY_STR
1144
+ EMPTY_STR.dup
1058
1145
  end
1059
- end)
1146
+ end
1147
+ # All normalized values should be UTF-8
1148
+ force_utf8_encoding_if_needed(@normalized_host)
1149
+ @normalized_host
1060
1150
  end
1061
1151
 
1062
1152
  ##
@@ -1069,31 +1159,72 @@ module Addressable
1069
1159
  end
1070
1160
  @host = new_host ? new_host.to_str : nil
1071
1161
 
1072
- unreserved = CharacterClasses::UNRESERVED
1073
- sub_delims = CharacterClasses::SUB_DELIMS
1074
- if @host != nil && (@host =~ /[<>{}\/\?\#\@]/ ||
1075
- (@host[/^\[(.*)\]$/, 1] != nil && @host[/^\[(.*)\]$/, 1] !~
1076
- Regexp.new("^[#{unreserved}#{sub_delims}:]*$")))
1077
- raise InvalidURIError, "Invalid character in host: '#{@host.to_s}'"
1078
- end
1079
-
1080
- # Reset dependant values
1162
+ # Reset dependent values
1081
1163
  @authority = nil
1082
1164
  @normalized_host = nil
1083
- @uri_string = nil
1084
- @hash = nil
1165
+ remove_composite_values
1085
1166
 
1086
1167
  # Ensure we haven't created an invalid URI
1087
1168
  validate()
1088
1169
  end
1089
1170
 
1090
1171
  ##
1172
+ # This method is same as URI::Generic#host except
1173
+ # brackets for IPv6 (and 'IPvFuture') addresses are removed.
1174
+ #
1091
1175
  # @see Addressable::URI#host
1092
- alias_method :hostname, :host
1176
+ #
1177
+ # @return [String] The hostname for this URI.
1178
+ def hostname
1179
+ v = self.host
1180
+ /\A\[(.*)\]\z/ =~ v ? $1 : v
1181
+ end
1093
1182
 
1094
1183
  ##
1184
+ # This method is same as URI::Generic#host= except
1185
+ # the argument can be a bare IPv6 address (or 'IPvFuture').
1186
+ #
1095
1187
  # @see Addressable::URI#host=
1096
- alias_method :hostname=, :host=
1188
+ #
1189
+ # @param [String, #to_str] new_hostname The new hostname for this URI.
1190
+ def hostname=(new_hostname)
1191
+ if new_hostname &&
1192
+ (new_hostname.respond_to?(:ipv4?) || new_hostname.respond_to?(:ipv6?))
1193
+ new_hostname = new_hostname.to_s
1194
+ elsif new_hostname && !new_hostname.respond_to?(:to_str)
1195
+ raise TypeError, "Can't convert #{new_hostname.class} into String."
1196
+ end
1197
+ v = new_hostname ? new_hostname.to_str : nil
1198
+ v = "[#{v}]" if /\A\[.*\]\z/ !~ v && /:/ =~ v
1199
+ self.host = v
1200
+ end
1201
+
1202
+ ##
1203
+ # Returns the top-level domain for this host.
1204
+ #
1205
+ # @example
1206
+ # Addressable::URI.parse("http://www.example.co.uk").tld # => "co.uk"
1207
+ def tld
1208
+ PublicSuffix.parse(self.host, ignore_private: true).tld
1209
+ end
1210
+
1211
+ ##
1212
+ # Sets the top-level domain for this URI.
1213
+ #
1214
+ # @param [String, #to_str] new_tld The new top-level domain.
1215
+ def tld=(new_tld)
1216
+ replaced_tld = host.sub(/#{tld}\z/, new_tld)
1217
+ self.host = PublicSuffix::Domain.new(replaced_tld).to_s
1218
+ end
1219
+
1220
+ ##
1221
+ # Returns the public suffix domain for this host.
1222
+ #
1223
+ # @example
1224
+ # Addressable::URI.parse("http://www.example.co.uk").domain # => "example.co.uk"
1225
+ def domain
1226
+ PublicSuffix.domain(self.host, ignore_private: true)
1227
+ end
1097
1228
 
1098
1229
  ##
1099
1230
  # The authority component for this URI.
@@ -1101,8 +1232,8 @@ module Addressable
1101
1232
  #
1102
1233
  # @return [String] The authority component.
1103
1234
  def authority
1104
- self.host && @authority ||= (begin
1105
- authority = ""
1235
+ self.host && @authority ||= begin
1236
+ authority = String.new
1106
1237
  if self.userinfo != nil
1107
1238
  authority << "#{self.userinfo}@"
1108
1239
  end
@@ -1111,7 +1242,7 @@ module Addressable
1111
1242
  authority << ":#{self.port}"
1112
1243
  end
1113
1244
  authority
1114
- end)
1245
+ end
1115
1246
  end
1116
1247
 
1117
1248
  ##
@@ -1119,8 +1250,9 @@ module Addressable
1119
1250
  #
1120
1251
  # @return [String] The authority component, normalized.
1121
1252
  def normalized_authority
1122
- self.authority && @normalized_authority ||= (begin
1123
- authority = ""
1253
+ return nil unless self.authority
1254
+ @normalized_authority ||= begin
1255
+ authority = String.new
1124
1256
  if self.normalized_userinfo != nil
1125
1257
  authority << "#{self.normalized_userinfo}@"
1126
1258
  end
@@ -1129,7 +1261,10 @@ module Addressable
1129
1261
  authority << ":#{self.normalized_port}"
1130
1262
  end
1131
1263
  authority
1132
- end)
1264
+ end
1265
+ # All normalized values should be UTF-8
1266
+ force_utf8_encoding_if_needed(@normalized_authority)
1267
+ @normalized_authority
1133
1268
  end
1134
1269
 
1135
1270
  ##
@@ -1147,9 +1282,9 @@ module Addressable
1147
1282
  new_user = new_userinfo.strip[/^([^:]*):?/, 1]
1148
1283
  new_password = new_userinfo.strip[/:(.*)$/, 1]
1149
1284
  end
1150
- new_host = new_authority.gsub(
1285
+ new_host = new_authority.sub(
1151
1286
  /^([^\[\]]*)@/, EMPTY_STR
1152
- ).gsub(
1287
+ ).sub(
1153
1288
  /:([^:@\[\]]*?)$/, EMPTY_STR
1154
1289
  )
1155
1290
  new_port =
@@ -1157,16 +1292,15 @@ module Addressable
1157
1292
  end
1158
1293
 
1159
1294
  # Password assigned first to ensure validity in case of nil
1160
- self.password = defined?(new_password) ? new_password : nil
1161
- self.user = defined?(new_user) ? new_user : nil
1162
- self.host = defined?(new_host) ? new_host : nil
1163
- self.port = defined?(new_port) ? new_port : nil
1295
+ self.password = new_password
1296
+ self.user = new_user
1297
+ self.host = new_host
1298
+ self.port = new_port
1164
1299
 
1165
- # Reset dependant values
1300
+ # Reset dependent values
1166
1301
  @userinfo = nil
1167
- @normalized_userinfo = nil
1168
- @uri_string = nil
1169
- @hash = nil
1302
+ @normalized_userinfo = NONE
1303
+ remove_composite_values
1170
1304
 
1171
1305
  # Ensure we haven't created an invalid URI
1172
1306
  validate()
@@ -1174,22 +1308,59 @@ module Addressable
1174
1308
 
1175
1309
  ##
1176
1310
  # The origin for this URI, serialized to ASCII, as per
1177
- # draft-ietf-websec-origin-00, section 5.2.
1311
+ # RFC 6454, section 6.2.
1178
1312
  #
1179
1313
  # @return [String] The serialized origin.
1180
1314
  def origin
1181
- return (if self.scheme && self.authority
1315
+ if self.scheme && self.authority
1182
1316
  if self.normalized_port
1183
- (
1184
- "#{self.normalized_scheme}://#{self.normalized_host}" +
1185
- ":#{self.normalized_port}"
1186
- )
1317
+ "#{self.normalized_scheme}://#{self.normalized_host}" +
1318
+ ":#{self.normalized_port}"
1187
1319
  else
1188
1320
  "#{self.normalized_scheme}://#{self.normalized_host}"
1189
1321
  end
1190
1322
  else
1191
1323
  "null"
1192
- end)
1324
+ end
1325
+ end
1326
+
1327
+ ##
1328
+ # Sets the origin for this URI, serialized to ASCII, as per
1329
+ # RFC 6454, section 6.2. This assignment will reset the `userinfo`
1330
+ # component.
1331
+ #
1332
+ # @param [String, #to_str] new_origin The new origin component.
1333
+ def origin=(new_origin)
1334
+ if new_origin
1335
+ if !new_origin.respond_to?(:to_str)
1336
+ raise TypeError, "Can't convert #{new_origin.class} into String."
1337
+ end
1338
+ new_origin = new_origin.to_str
1339
+ new_scheme = new_origin[/^([^:\/?#]+):\/\//, 1]
1340
+ unless new_scheme
1341
+ raise InvalidURIError, 'An origin cannot omit the scheme.'
1342
+ end
1343
+ new_host = new_origin[/:\/\/([^\/?#:]+)/, 1]
1344
+ unless new_host
1345
+ raise InvalidURIError, 'An origin cannot omit the host.'
1346
+ end
1347
+ new_port = new_origin[/:([^:@\[\]\/]*?)$/, 1]
1348
+ end
1349
+
1350
+ self.scheme = new_scheme
1351
+ self.host = new_host
1352
+ self.port = new_port
1353
+ self.userinfo = nil
1354
+
1355
+ # Reset dependent values
1356
+ @userinfo = nil
1357
+ @normalized_userinfo = NONE
1358
+ @authority = nil
1359
+ @normalized_authority = nil
1360
+ remove_composite_values
1361
+
1362
+ # Ensure we haven't created an invalid URI
1363
+ validate()
1193
1364
  end
1194
1365
 
1195
1366
  # Returns an array of known ip-based schemes. These schemes typically
@@ -1212,19 +1383,21 @@ module Addressable
1212
1383
  # infer port numbers from default values.
1213
1384
  #
1214
1385
  # @return [Integer] The port component.
1215
- def port
1216
- return instance_variable_defined?(:@port) ? @port : nil
1217
- end
1386
+ attr_reader :port
1218
1387
 
1219
1388
  ##
1220
1389
  # The port component for this URI, normalized.
1221
1390
  #
1222
1391
  # @return [Integer] The port component, normalized.
1223
1392
  def normalized_port
1224
- if URI.port_mapping[self.normalized_scheme] == self.port
1225
- nil
1226
- else
1227
- self.port
1393
+ return nil unless self.port
1394
+ return @normalized_port unless @normalized_port == NONE
1395
+ @normalized_port = begin
1396
+ if URI.port_mapping[self.normalized_scheme] == self.port
1397
+ nil
1398
+ else
1399
+ self.port
1400
+ end
1228
1401
  end
1229
1402
  end
1230
1403
 
@@ -1236,6 +1409,11 @@ module Addressable
1236
1409
  if new_port != nil && new_port.respond_to?(:to_str)
1237
1410
  new_port = Addressable::URI.unencode_component(new_port.to_str)
1238
1411
  end
1412
+
1413
+ if new_port.respond_to?(:valid_encoding?) && !new_port.valid_encoding?
1414
+ raise InvalidURIError, "Invalid encoding in port"
1415
+ end
1416
+
1239
1417
  if new_port != nil && !(new_port.to_s =~ /^\d+$/)
1240
1418
  raise InvalidURIError,
1241
1419
  "Invalid port number: #{new_port.inspect}"
@@ -1244,11 +1422,10 @@ module Addressable
1244
1422
  @port = new_port.to_s.to_i
1245
1423
  @port = nil if @port == 0
1246
1424
 
1247
- # Reset dependant values
1425
+ # Reset dependent values
1248
1426
  @authority = nil
1249
- @normalized_port = nil
1250
- @uri_string = nil
1251
- @hash = nil
1427
+ @normalized_port = NONE
1428
+ remove_composite_values
1252
1429
 
1253
1430
  # Ensure we haven't created an invalid URI
1254
1431
  validate()
@@ -1288,12 +1465,12 @@ module Addressable
1288
1465
  #
1289
1466
  # @return [String] The components that identify a site.
1290
1467
  def site
1291
- (self.scheme || self.authority) && @site ||= (begin
1292
- site_string = ""
1468
+ (self.scheme || self.authority) && @site ||= begin
1469
+ site_string = "".dup
1293
1470
  site_string << "#{self.scheme}:" if self.scheme != nil
1294
1471
  site_string << "//#{self.authority}" if self.authority != nil
1295
1472
  site_string
1296
- end)
1473
+ end
1297
1474
  end
1298
1475
 
1299
1476
  ##
@@ -1306,8 +1483,9 @@ module Addressable
1306
1483
  #
1307
1484
  # @return [String] The normalized components that identify a site.
1308
1485
  def normalized_site
1309
- self.site && @normalized_site ||= (begin
1310
- site_string = ""
1486
+ return nil unless self.site
1487
+ @normalized_site ||= begin
1488
+ site_string = "".dup
1311
1489
  if self.normalized_scheme != nil
1312
1490
  site_string << "#{self.normalized_scheme}:"
1313
1491
  end
@@ -1315,7 +1493,10 @@ module Addressable
1315
1493
  site_string << "//#{self.normalized_authority}"
1316
1494
  end
1317
1495
  site_string
1318
- end)
1496
+ end
1497
+ # All normalized values should be UTF-8
1498
+ force_utf8_encoding_if_needed(@normalized_site)
1499
+ @normalized_site
1319
1500
  end
1320
1501
 
1321
1502
  ##
@@ -1344,9 +1525,7 @@ module Addressable
1344
1525
  # The path component for this URI.
1345
1526
  #
1346
1527
  # @return [String] The path component.
1347
- def path
1348
- return instance_variable_defined?(:@path) ? @path : EMPTY_STR
1349
- end
1528
+ attr_reader :path
1350
1529
 
1351
1530
  NORMPATH = /^(?!\/)[^\/:]*:.*$/
1352
1531
  ##
@@ -1354,28 +1533,31 @@ module Addressable
1354
1533
  #
1355
1534
  # @return [String] The path component, normalized.
1356
1535
  def normalized_path
1357
- @normalized_path ||= (begin
1536
+ @normalized_path ||= begin
1358
1537
  path = self.path.to_s
1359
1538
  if self.scheme == nil && path =~ NORMPATH
1360
1539
  # Relative paths with colons in the first segment are ambiguous.
1361
1540
  path = path.sub(":", "%2F")
1362
1541
  end
1363
- # String#split(delimeter, -1) uses the more strict splitting behavior
1542
+ # String#split(delimiter, -1) uses the more strict splitting behavior
1364
1543
  # found by default in Python.
1365
- result = (path.strip.split(SLASH, -1).map do |segment|
1544
+ result = path.strip.split(SLASH, -1).map do |segment|
1366
1545
  Addressable::URI.normalize_component(
1367
1546
  segment,
1368
- Addressable::URI::CharacterClasses::PCHAR
1547
+ Addressable::URI::NormalizeCharacterClasses::PCHAR
1369
1548
  )
1370
- end).join(SLASH)
1549
+ end.join(SLASH)
1371
1550
 
1372
1551
  result = URI.normalize_path(result)
1373
1552
  if result.empty? &&
1374
1553
  ["http", "https", "ftp", "tftp"].include?(self.normalized_scheme)
1375
- result = SLASH
1554
+ result = SLASH.dup
1376
1555
  end
1377
1556
  result
1378
- end)
1557
+ end
1558
+ # All normalized values should be UTF-8
1559
+ force_utf8_encoding_if_needed(@normalized_path)
1560
+ @normalized_path
1379
1561
  end
1380
1562
 
1381
1563
  ##
@@ -1391,10 +1573,12 @@ module Addressable
1391
1573
  @path = "/#{@path}"
1392
1574
  end
1393
1575
 
1394
- # Reset dependant values
1576
+ # Reset dependent values
1395
1577
  @normalized_path = nil
1396
- @uri_string = nil
1397
- @hash = nil
1578
+ remove_composite_values
1579
+
1580
+ # Ensure we haven't created an invalid URI
1581
+ validate()
1398
1582
  end
1399
1583
 
1400
1584
  ##
@@ -1403,7 +1587,7 @@ module Addressable
1403
1587
  # @return [String] The path's basename.
1404
1588
  def basename
1405
1589
  # Path cannot be nil
1406
- return File.basename(self.path).gsub(/;[^\/]*$/, EMPTY_STR)
1590
+ return File.basename(self.path).sub(/;[^\/]*$/, EMPTY_STR)
1407
1591
  end
1408
1592
 
1409
1593
  ##
@@ -1420,24 +1604,34 @@ module Addressable
1420
1604
  # The query component for this URI.
1421
1605
  #
1422
1606
  # @return [String] The query component.
1423
- def query
1424
- return instance_variable_defined?(:@query) ? @query : nil
1425
- end
1607
+ attr_reader :query
1426
1608
 
1427
1609
  ##
1428
1610
  # The query component for this URI, normalized.
1429
1611
  #
1430
1612
  # @return [String] The query component, normalized.
1431
- def normalized_query
1432
- self.query && @normalized_query ||= (begin
1433
- (self.query.split("&", -1).map do |pair|
1613
+ def normalized_query(*flags)
1614
+ return nil unless self.query
1615
+ return @normalized_query unless @normalized_query == NONE
1616
+ @normalized_query = begin
1617
+ modified_query_class = Addressable::URI::CharacterClasses::QUERY.dup
1618
+ # Make sure possible key-value pair delimiters are escaped.
1619
+ modified_query_class.sub!("\\&", "").sub!("\\;", "")
1620
+ pairs = (query || "").split("&", -1)
1621
+ pairs.delete_if(&:empty?).uniq! if flags.include?(:compacted)
1622
+ pairs.sort! if flags.include?(:sorted)
1623
+ component = pairs.map do |pair|
1434
1624
  Addressable::URI.normalize_component(
1435
1625
  pair,
1436
- Addressable::URI::CharacterClasses::QUERY.sub("\\&", ""),
1437
- '+'
1626
+ Addressable::URI::NormalizeCharacterClasses::QUERY,
1627
+ "+"
1438
1628
  )
1439
- end).join("&")
1440
- end)
1629
+ end.join("&")
1630
+ component == "" ? nil : component
1631
+ end
1632
+ # All normalized values should be UTF-8
1633
+ force_utf8_encoding_if_needed(@normalized_query)
1634
+ @normalized_query
1441
1635
  end
1442
1636
 
1443
1637
  ##
@@ -1450,10 +1644,9 @@ module Addressable
1450
1644
  end
1451
1645
  @query = new_query ? new_query.to_str : nil
1452
1646
 
1453
- # Reset dependant values
1454
- @normalized_query = nil
1455
- @uri_string = nil
1456
- @hash = nil
1647
+ # Reset dependent values
1648
+ @normalized_query = NONE
1649
+ remove_composite_values
1457
1650
  end
1458
1651
 
1459
1652
  ##
@@ -1462,7 +1655,8 @@ module Addressable
1462
1655
  # @param [Class] return_type The return type desired. Value must be either
1463
1656
  # `Hash` or `Array`.
1464
1657
  #
1465
- # @return [Hash, Array] The query string parsed as a Hash or Array object.
1658
+ # @return [Hash, Array, nil] The query string parsed as a Hash or Array
1659
+ # or nil if the query string is blank.
1466
1660
  #
1467
1661
  # @example
1468
1662
  # Addressable::URI.parse("?one=1&two=2&three=3").query_values
@@ -1471,26 +1665,32 @@ module Addressable
1471
1665
  # #=> [["one", "two"], ["one", "three"]]
1472
1666
  # Addressable::URI.parse("?one=two&one=three").query_values(Hash)
1473
1667
  # #=> {"one" => "three"}
1668
+ # Addressable::URI.parse("?").query_values
1669
+ # #=> {}
1670
+ # Addressable::URI.parse("").query_values
1671
+ # #=> nil
1474
1672
  def query_values(return_type=Hash)
1475
1673
  empty_accumulator = Array == return_type ? [] : {}
1476
1674
  if return_type != Hash && return_type != Array
1477
1675
  raise ArgumentError, "Invalid return type. Must be Hash or Array."
1478
1676
  end
1479
1677
  return nil if self.query == nil
1480
- split_query = (self.query.split("&").map do |pair|
1678
+ split_query = self.query.split("&").map do |pair|
1481
1679
  pair.split("=", 2) if pair && !pair.empty?
1482
- end).compact
1680
+ end.compact
1483
1681
  return split_query.inject(empty_accumulator.dup) do |accu, pair|
1484
1682
  # I'd rather use key/value identifiers instead of array lookups,
1485
1683
  # but in this case I really want to maintain the exact pair structure,
1486
1684
  # so it's best to make all changes in-place.
1487
1685
  pair[0] = URI.unencode_component(pair[0])
1488
1686
  if pair[1].respond_to?(:to_str)
1687
+ value = pair[1].to_str
1489
1688
  # I loathe the fact that I have to do this. Stupid HTML 4.01.
1490
1689
  # Treating '+' as a space was just an unbelievably bad idea.
1491
1690
  # There was nothing wrong with '%20'!
1492
1691
  # If it ain't broke, don't fix it!
1493
- pair[1] = URI.unencode_component(pair[1].to_str.gsub(/\+/, " "))
1692
+ value = value.tr("+", " ") if ["http", "https", nil].include?(scheme)
1693
+ pair[1] = URI.unencode_component(value)
1494
1694
  end
1495
1695
  if return_type == Hash
1496
1696
  accu[pair[0]] = pair[1]
@@ -1542,23 +1742,23 @@ module Addressable
1542
1742
  end
1543
1743
 
1544
1744
  # new_query_values have form [['key1', 'value1'], ['key2', 'value2']]
1545
- buffer = ""
1745
+ buffer = "".dup
1546
1746
  new_query_values.each do |key, value|
1547
1747
  encoded_key = URI.encode_component(
1548
- key, CharacterClasses::UNRESERVED
1748
+ key, CharacterClassesRegexps::UNRESERVED
1549
1749
  )
1550
1750
  if value == nil
1551
1751
  buffer << "#{encoded_key}&"
1552
1752
  elsif value.kind_of?(Array)
1553
1753
  value.each do |sub_value|
1554
1754
  encoded_value = URI.encode_component(
1555
- sub_value, CharacterClasses::UNRESERVED
1755
+ sub_value, CharacterClassesRegexps::UNRESERVED
1556
1756
  )
1557
1757
  buffer << "#{encoded_key}=#{encoded_value}&"
1558
1758
  end
1559
1759
  else
1560
1760
  encoded_value = URI.encode_component(
1561
- value, CharacterClasses::UNRESERVED
1761
+ value, CharacterClassesRegexps::UNRESERVED
1562
1762
  )
1563
1763
  buffer << "#{encoded_key}=#{encoded_value}&"
1564
1764
  end
@@ -1572,7 +1772,7 @@ module Addressable
1572
1772
  #
1573
1773
  # @return [String] The request URI required for an HTTP request.
1574
1774
  def request_uri
1575
- return nil if self.absolute? && self.scheme !~ /^https?$/
1775
+ return nil if self.absolute? && self.scheme !~ /^https?$/i
1576
1776
  return (
1577
1777
  (!self.path.empty? ? self.path : SLASH) +
1578
1778
  (self.query ? "?#{self.query}" : EMPTY_STR)
@@ -1587,42 +1787,45 @@ module Addressable
1587
1787
  if !new_request_uri.respond_to?(:to_str)
1588
1788
  raise TypeError, "Can't convert #{new_request_uri.class} into String."
1589
1789
  end
1590
- if self.absolute? && self.scheme !~ /^https?$/
1790
+ if self.absolute? && self.scheme !~ /^https?$/i
1591
1791
  raise InvalidURIError,
1592
1792
  "Cannot set an HTTP request URI for a non-HTTP URI."
1593
1793
  end
1594
1794
  new_request_uri = new_request_uri.to_str
1595
- path_component = new_request_uri[/^([^\?]*)\?(?:.*)$/, 1]
1795
+ path_component = new_request_uri[/^([^\?]*)\??(?:.*)$/, 1]
1596
1796
  query_component = new_request_uri[/^(?:[^\?]*)\?(.*)$/, 1]
1597
1797
  path_component = path_component.to_s
1598
1798
  path_component = (!path_component.empty? ? path_component : SLASH)
1599
1799
  self.path = path_component
1600
1800
  self.query = query_component
1601
1801
 
1602
- # Reset dependant values
1603
- @uri_string = nil
1604
- @hash = nil
1802
+ # Reset dependent values
1803
+ remove_composite_values
1605
1804
  end
1606
1805
 
1607
1806
  ##
1608
1807
  # The fragment component for this URI.
1609
1808
  #
1610
1809
  # @return [String] The fragment component.
1611
- def fragment
1612
- return instance_variable_defined?(:@fragment) ? @fragment : nil
1613
- end
1810
+ attr_reader :fragment
1614
1811
 
1615
1812
  ##
1616
1813
  # The fragment component for this URI, normalized.
1617
1814
  #
1618
1815
  # @return [String] The fragment component, normalized.
1619
1816
  def normalized_fragment
1620
- self.fragment && @normalized_fragment ||= (begin
1621
- Addressable::URI.normalize_component(
1622
- self.fragment.strip,
1623
- Addressable::URI::CharacterClasses::FRAGMENT
1817
+ return nil unless self.fragment
1818
+ return @normalized_fragment unless @normalized_fragment == NONE
1819
+ @normalized_fragment = begin
1820
+ component = Addressable::URI.normalize_component(
1821
+ self.fragment,
1822
+ Addressable::URI::NormalizeCharacterClasses::FRAGMENT
1624
1823
  )
1625
- end)
1824
+ component == "" ? nil : component
1825
+ end
1826
+ # All normalized values should be UTF-8
1827
+ force_utf8_encoding_if_needed(@normalized_fragment)
1828
+ @normalized_fragment
1626
1829
  end
1627
1830
 
1628
1831
  ##
@@ -1635,10 +1838,9 @@ module Addressable
1635
1838
  end
1636
1839
  @fragment = new_fragment ? new_fragment.to_str : nil
1637
1840
 
1638
- # Reset dependant values
1639
- @normalized_fragment = nil
1640
- @uri_string = nil
1641
- @hash = nil
1841
+ # Reset dependent values
1842
+ @normalized_fragment = NONE
1843
+ remove_composite_values
1642
1844
 
1643
1845
  # Ensure we haven't created an invalid URI
1644
1846
  validate()
@@ -1741,8 +1943,8 @@ module Addressable
1741
1943
  # Section 5.2.3 of RFC 3986
1742
1944
  #
1743
1945
  # Removes the right-most path segment from the base path.
1744
- if base_path =~ /\//
1745
- base_path.gsub!(/\/[^\/]+$/, SLASH)
1946
+ if base_path.include?(SLASH)
1947
+ base_path.sub!(/\/[^\/]+$/, SLASH)
1746
1948
  else
1747
1949
  base_path = EMPTY_STR
1748
1950
  end
@@ -1766,7 +1968,7 @@ module Addressable
1766
1968
  end
1767
1969
  joined_fragment = uri.fragment
1768
1970
 
1769
- return Addressable::URI.new(
1971
+ return self.class.new(
1770
1972
  :scheme => joined_scheme,
1771
1973
  :user => joined_user,
1772
1974
  :password => joined_password,
@@ -1803,7 +2005,7 @@ module Addressable
1803
2005
  #
1804
2006
  # @see Hash#merge
1805
2007
  def merge(hash)
1806
- if !hash.respond_to?(:to_hash)
2008
+ unless hash.respond_to?(:to_hash)
1807
2009
  raise TypeError, "Can't convert #{hash.class} into Hash."
1808
2010
  end
1809
2011
  hash = hash.to_hash
@@ -1822,7 +2024,7 @@ module Addressable
1822
2024
  end
1823
2025
  end
1824
2026
 
1825
- uri = Addressable::URI.new
2027
+ uri = self.class.new
1826
2028
  uri.defer_validation do
1827
2029
  # Bunch of crazy logic required because of the composite components
1828
2030
  # like userinfo and authority.
@@ -1906,9 +2108,16 @@ module Addressable
1906
2108
  components[:query] = nil
1907
2109
  end
1908
2110
  else
1909
- if uri.path != SLASH
1910
- components[:path].gsub!(
1911
- Regexp.new("^" + Regexp.escape(uri.path)), EMPTY_STR)
2111
+ if uri.path != SLASH and components[:path]
2112
+ self_splitted_path = split_path(components[:path])
2113
+ uri_splitted_path = split_path(uri.path)
2114
+ self_dir = self_splitted_path.shift
2115
+ uri_dir = uri_splitted_path.shift
2116
+ while !self_splitted_path.empty? && !uri_splitted_path.empty? and self_dir == uri_dir
2117
+ self_dir = self_splitted_path.shift
2118
+ uri_dir = uri_splitted_path.shift
2119
+ end
2120
+ components[:path] = (uri_splitted_path.fill('..') + [self_dir] + self_splitted_path).join(SLASH)
1912
2121
  end
1913
2122
  end
1914
2123
  end
@@ -1963,7 +2172,7 @@ module Addressable
1963
2172
  end
1964
2173
  end
1965
2174
 
1966
- return Addressable::URI.new(
2175
+ return self.class.new(
1967
2176
  :scheme => normalized_scheme,
1968
2177
  :authority => normalized_authority,
1969
2178
  :path => normalized_path,
@@ -2052,7 +2261,7 @@ module Addressable
2052
2261
  #
2053
2262
  # @return [Integer] A hash of the URI.
2054
2263
  def hash
2055
- return @hash ||= (self.to_s.hash * -1)
2264
+ @hash ||= self.to_s.hash * -1
2056
2265
  end
2057
2266
 
2058
2267
  ##
@@ -2060,7 +2269,7 @@ module Addressable
2060
2269
  #
2061
2270
  # @return [Addressable::URI] The cloned URI.
2062
2271
  def dup
2063
- duplicated_uri = Addressable::URI.new(
2272
+ duplicated_uri = self.class.new(
2064
2273
  :scheme => self.scheme ? self.scheme.dup : nil,
2065
2274
  :user => self.user ? self.user.dup : nil,
2066
2275
  :password => self.password ? self.password.dup : nil,
@@ -2116,6 +2325,15 @@ module Addressable
2116
2325
  replace_self(self.omit(*components))
2117
2326
  end
2118
2327
 
2328
+ ##
2329
+ # Determines if the URI is an empty string.
2330
+ #
2331
+ # @return [TrueClass, FalseClass]
2332
+ # Returns <code>true</code> if empty, <code>false</code> otherwise.
2333
+ def empty?
2334
+ return self.to_s.empty?
2335
+ end
2336
+
2119
2337
  ##
2120
2338
  # Converts the URI to a <code>String</code>.
2121
2339
  #
@@ -2126,18 +2344,16 @@ module Addressable
2126
2344
  raise InvalidURIError,
2127
2345
  "Cannot assemble URI string with ambiguous path: '#{self.path}'"
2128
2346
  end
2129
- @uri_string ||= (begin
2130
- uri_string = ""
2347
+ @uri_string ||= begin
2348
+ uri_string = String.new
2131
2349
  uri_string << "#{self.scheme}:" if self.scheme != nil
2132
2350
  uri_string << "//#{self.authority}" if self.authority != nil
2133
2351
  uri_string << self.path.to_s
2134
2352
  uri_string << "?#{self.query}" if self.query != nil
2135
2353
  uri_string << "##{self.fragment}" if self.fragment != nil
2136
- if uri_string.respond_to?(:force_encoding)
2137
- uri_string.force_encoding(Encoding::UTF_8)
2138
- end
2354
+ uri_string.force_encoding(Encoding::UTF_8)
2139
2355
  uri_string
2140
- end)
2356
+ end
2141
2357
  end
2142
2358
 
2143
2359
  ##
@@ -2166,7 +2382,7 @@ module Addressable
2166
2382
  #
2167
2383
  # @return [String] The URI object's state, as a <code>String</code>.
2168
2384
  def inspect
2169
- sprintf("#<%s:%#0x URI:%s>", URI.to_s, self.object_id, self.to_s)
2385
+ sprintf("#<%s:%#0x URI:%s>", self.class.to_s, self.object_id, self.to_s)
2170
2386
  end
2171
2387
 
2172
2388
  ##
@@ -2177,16 +2393,36 @@ module Addressable
2177
2393
  #
2178
2394
  # @param [Proc] block
2179
2395
  # A set of operations to perform on a given URI.
2180
- def defer_validation(&block)
2181
- raise LocalJumpError, "No block given." unless block
2396
+ def defer_validation
2397
+ raise LocalJumpError, "No block given." unless block_given?
2182
2398
  @validation_deferred = true
2183
- block.call()
2399
+ yield
2184
2400
  @validation_deferred = false
2185
2401
  validate
2186
- return nil
2402
+ ensure
2403
+ @validation_deferred = false
2404
+ end
2405
+
2406
+ def encode_with(coder)
2407
+ instance_variables.each do |ivar|
2408
+ value = instance_variable_get(ivar)
2409
+ if value != NONE
2410
+ key = ivar.to_s.slice(1..-1)
2411
+ coder[key] = value
2412
+ end
2413
+ end
2414
+ nil
2415
+ end
2416
+
2417
+ def init_with(coder)
2418
+ reset_ivs
2419
+ coder.map.each do |key, value|
2420
+ instance_variable_set("@#{key}", value)
2421
+ end
2422
+ nil
2187
2423
  end
2188
2424
 
2189
- private
2425
+ protected
2190
2426
  SELF_REF = '.'
2191
2427
  PARENT = '..'
2192
2428
 
@@ -2204,37 +2440,42 @@ module Addressable
2204
2440
  def self.normalize_path(path)
2205
2441
  # Section 5.2.4 of RFC 3986
2206
2442
 
2207
- return nil if path.nil?
2443
+ return if path.nil?
2208
2444
  normalized_path = path.dup
2209
- begin
2210
- mod = nil
2445
+ loop do
2211
2446
  mod ||= normalized_path.gsub!(RULE_2A, SLASH)
2212
2447
 
2213
2448
  pair = normalized_path.match(RULE_2B_2C)
2214
- parent, current = pair[1], pair[2] if pair
2449
+ if pair
2450
+ parent = pair[1]
2451
+ current = pair[2]
2452
+ else
2453
+ parent = nil
2454
+ current = nil
2455
+ end
2456
+
2457
+ regexp = "/#{Regexp.escape(parent.to_s)}/\\.\\./|"
2458
+ regexp += "(/#{Regexp.escape(current.to_s)}/\\.\\.$)"
2459
+
2215
2460
  if pair && ((parent != SELF_REF && parent != PARENT) ||
2216
2461
  (current != SELF_REF && current != PARENT))
2217
- mod ||= normalized_path.gsub!(
2218
- Regexp.new(
2219
- "/#{Regexp.escape(parent.to_s)}/\\.\\./|" +
2220
- "(/#{Regexp.escape(current.to_s)}/\\.\\.$)"
2221
- ), SLASH
2222
- )
2462
+ mod ||= normalized_path.gsub!(Regexp.new(regexp), SLASH)
2223
2463
  end
2224
2464
 
2225
2465
  mod ||= normalized_path.gsub!(RULE_2D, EMPTY_STR)
2226
2466
  # Non-standard, removes prefixed dotted segments from path.
2227
2467
  mod ||= normalized_path.gsub!(RULE_PREFIXED_PARENT, SLASH)
2228
- end until mod.nil?
2468
+ break if mod.nil?
2469
+ end
2229
2470
 
2230
- return normalized_path
2471
+ normalized_path
2231
2472
  end
2232
2473
 
2233
2474
  ##
2234
2475
  # Ensures that the URI is valid.
2235
2476
  def validate
2236
2477
  return if !!@validation_deferred
2237
- if self.scheme != nil &&
2478
+ if self.scheme != nil && self.ip_based? &&
2238
2479
  (self.host == nil || self.host.empty?) &&
2239
2480
  (self.path == nil || self.path.empty?)
2240
2481
  raise InvalidURIError,
@@ -2252,6 +2493,19 @@ module Addressable
2252
2493
  raise InvalidURIError,
2253
2494
  "Cannot have a relative path with an authority set: '#{self.to_s}'"
2254
2495
  end
2496
+ if self.path != nil && !self.path.empty? &&
2497
+ self.path[0..1] == SLASH + SLASH && self.authority == nil
2498
+ raise InvalidURIError,
2499
+ "Cannot have a path with two leading slashes " +
2500
+ "without an authority set: '#{self.to_s}'"
2501
+ end
2502
+ unreserved = CharacterClasses::UNRESERVED
2503
+ sub_delims = CharacterClasses::SUB_DELIMS
2504
+ if !self.host.nil? && (self.host =~ /[<>{}\/\\\?\#\@"[[:space:]]]/ ||
2505
+ (self.host[/^\[(.*)\]$/, 1] != nil && self.host[/^\[(.*)\]$/, 1] !~
2506
+ Regexp.new("^[#{unreserved}#{sub_delims}:]*$")))
2507
+ raise InvalidURIError, "Invalid character in host: '#{self.host.to_s}'"
2508
+ end
2255
2509
  return nil
2256
2510
  end
2257
2511
 
@@ -2263,10 +2517,8 @@ module Addressable
2263
2517
  #
2264
2518
  # @return [Addressable::URI] <code>self</code>.
2265
2519
  def replace_self(uri)
2266
- # Reset dependant values
2267
- instance_variables.each do |var|
2268
- instance_variable_set(var, nil)
2269
- end
2520
+ # Reset dependent values
2521
+ reset_ivs
2270
2522
 
2271
2523
  @scheme = uri.scheme
2272
2524
  @user = uri.user
@@ -2278,5 +2530,73 @@ module Addressable
2278
2530
  @fragment = uri.fragment
2279
2531
  return self
2280
2532
  end
2533
+
2534
+ ##
2535
+ # Splits path string with "/" (slash).
2536
+ # It is considered that there is empty string after last slash when
2537
+ # path ends with slash.
2538
+ #
2539
+ # @param [String] path The path to split.
2540
+ #
2541
+ # @return [Array<String>] An array of parts of path.
2542
+ def split_path(path)
2543
+ splitted = path.split(SLASH)
2544
+ splitted << EMPTY_STR if path.end_with? SLASH
2545
+ splitted
2546
+ end
2547
+
2548
+ ##
2549
+ # Resets composite values for the entire URI
2550
+ #
2551
+ # @api private
2552
+ def remove_composite_values
2553
+ @uri_string = nil
2554
+ @hash = nil
2555
+ end
2556
+
2557
+ ##
2558
+ # Converts the string to be UTF-8 if it is not already UTF-8
2559
+ #
2560
+ # @api private
2561
+ def force_utf8_encoding_if_needed(str)
2562
+ if str && str.encoding != Encoding::UTF_8
2563
+ str.force_encoding(Encoding::UTF_8)
2564
+ end
2565
+ end
2566
+
2567
+ private
2568
+
2569
+ ##
2570
+ # Resets instance variables
2571
+ #
2572
+ # @api private
2573
+ def reset_ivs
2574
+ @scheme = nil
2575
+ @user = nil
2576
+ @normalized_scheme = NONE
2577
+ @normalized_user = NONE
2578
+ @uri_string = nil
2579
+ @hash = nil
2580
+ @userinfo = nil
2581
+ @normalized_userinfo = NONE
2582
+ @authority = nil
2583
+ @password = nil
2584
+ @normalized_authority = nil
2585
+ @port = nil
2586
+ @normalized_password = NONE
2587
+ @host = nil
2588
+ @normalized_host = nil
2589
+ @normalized_port = NONE
2590
+ @path = EMPTY_STR
2591
+ @normalized_path = nil
2592
+ @normalized_query = NONE
2593
+ @fragment = nil
2594
+ @normalized_fragment = NONE
2595
+ @query = nil
2596
+ end
2597
+
2598
+ NONE = Module.new.freeze
2599
+
2600
+ private_constant :NONE
2281
2601
  end
2282
2602
  end