addressable 2.6.0 → 2.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # encoding:utf-8
4
3
  #--
5
4
  # Copyright (C) Bob Aman
6
5
  #
@@ -38,20 +37,35 @@ module Addressable
38
37
  ##
39
38
  # Container for the character classes specified in
40
39
  # <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.
40
+ #
41
+ # Note: Concatenated and interpolated `String`s are not affected by the
42
+ # `frozen_string_literal` directive and must be frozen explicitly.
43
+ #
44
+ # Interpolated `String`s *were* frozen this way before Ruby 3.0:
45
+ # https://bugs.ruby-lang.org/issues/17104
41
46
  module CharacterClasses
42
47
  ALPHA = "a-zA-Z"
43
48
  DIGIT = "0-9"
44
49
  GEN_DELIMS = "\\:\\/\\?\\#\\[\\]\\@"
45
50
  SUB_DELIMS = "\\!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\="
46
- RESERVED = GEN_DELIMS + SUB_DELIMS
47
- UNRESERVED = ALPHA + DIGIT + "\\-\\.\\_\\~"
48
- PCHAR = UNRESERVED + SUB_DELIMS + "\\:\\@"
49
- SCHEME = ALPHA + DIGIT + "\\-\\+\\."
50
- HOST = UNRESERVED + SUB_DELIMS + "\\[\\:\\]"
51
- AUTHORITY = PCHAR
52
- PATH = PCHAR + "\\/"
53
- QUERY = PCHAR + "\\/\\?"
54
- FRAGMENT = PCHAR + "\\/\\?"
51
+ RESERVED = (GEN_DELIMS + SUB_DELIMS).freeze
52
+ UNRESERVED = (ALPHA + DIGIT + "\\-\\.\\_\\~").freeze
53
+ PCHAR = (UNRESERVED + SUB_DELIMS + "\\:\\@").freeze
54
+ SCHEME = (ALPHA + DIGIT + "\\-\\+\\.").freeze
55
+ HOST = (UNRESERVED + SUB_DELIMS + "\\[\\:\\]").freeze
56
+ AUTHORITY = (PCHAR + "\\[\\]").freeze
57
+ PATH = (PCHAR + "\\/").freeze
58
+ QUERY = (PCHAR + "\\/\\?").freeze
59
+ FRAGMENT = (PCHAR + "\\/\\?").freeze
60
+ end
61
+
62
+ module NormalizeCharacterClasses
63
+ HOST = /[^#{CharacterClasses::HOST}]/
64
+ UNRESERVED = /[^#{CharacterClasses::UNRESERVED}]/
65
+ PCHAR = /[^#{CharacterClasses::PCHAR}]/
66
+ SCHEME = /[^#{CharacterClasses::SCHEME}]/
67
+ FRAGMENT = /[^#{CharacterClasses::FRAGMENT}]/
68
+ QUERY = %r{[^a-zA-Z0-9\-\.\_\~\!\$\'\(\)\*\+\,\=\:\@\/\?%]|%(?!2B|2b)}
55
69
  end
56
70
 
57
71
  SLASH = '/'
@@ -73,7 +87,7 @@ module Addressable
73
87
  "wais" => 210,
74
88
  "ldap" => 389,
75
89
  "prospero" => 1525
76
- }
90
+ }.freeze
77
91
 
78
92
  ##
79
93
  # Returns a URI object based on the parsed string.
@@ -103,7 +117,7 @@ module Addressable
103
117
  uri = uri.to_str
104
118
  rescue TypeError, NoMethodError
105
119
  raise TypeError, "Can't convert #{uri.class} into String."
106
- end if not uri.is_a? String
120
+ end unless uri.is_a?(String)
107
121
 
108
122
  # This Regexp supplied as an example in RFC 3986, and it works great.
109
123
  scan = uri.scan(URIREGEX)
@@ -124,15 +138,15 @@ module Addressable
124
138
  user = userinfo.strip[/^([^:]*):?/, 1]
125
139
  password = userinfo.strip[/:(.*)$/, 1]
126
140
  end
141
+
127
142
  host = authority.sub(
128
143
  /^([^\[\]]*)@/, EMPTY_STR
129
144
  ).sub(
130
145
  /:([^:@\[\]]*?)$/, EMPTY_STR
131
146
  )
147
+
132
148
  port = authority[/:([^:@\[\]]*?)$/, 1]
133
- end
134
- if port == EMPTY_STR
135
- port = nil
149
+ port = nil if port == EMPTY_STR
136
150
  end
137
151
 
138
152
  return new(
@@ -175,7 +189,7 @@ module Addressable
175
189
  uri = uri.to_s
176
190
  end
177
191
 
178
- if !uri.respond_to?(:to_str)
192
+ unless uri.respond_to?(:to_str)
179
193
  raise TypeError, "Can't convert #{uri.class} into String."
180
194
  end
181
195
  # Otherwise, convert to a String
@@ -207,7 +221,7 @@ module Addressable
207
221
  fragments = match.captures
208
222
  authority = fragments[3]
209
223
  if authority && authority.length > 0
210
- new_authority = authority.gsub(/\\/, '/').gsub(/ /, '%20')
224
+ new_authority = authority.tr("\\", "/").gsub(" ", "%20")
211
225
  # NOTE: We want offset 4, not 3!
212
226
  offset = match.offset(4)
213
227
  uri = uri.dup
@@ -218,8 +232,9 @@ module Addressable
218
232
  parsed = self.parse(hints[:scheme] + "://" + uri)
219
233
  end
220
234
  if parsed.path.include?(".")
221
- new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1]
222
- if new_host
235
+ if parsed.path[/\b@\b/]
236
+ parsed.scheme = "mailto" unless parsed.scheme
237
+ elsif new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1]
223
238
  parsed.defer_validation do
224
239
  new_path = parsed.path.sub(
225
240
  Regexp.new("^" + Regexp.escape(new_host)), EMPTY_STR)
@@ -266,7 +281,7 @@ module Addressable
266
281
  return nil unless path
267
282
  # If a URI object is passed, just return itself.
268
283
  return path if path.kind_of?(self)
269
- if !path.respond_to?(:to_str)
284
+ unless path.respond_to?(:to_str)
270
285
  raise TypeError, "Can't convert #{path.class} into String."
271
286
  end
272
287
  # Otherwise, convert to a String
@@ -281,15 +296,15 @@ module Addressable
281
296
  uri.path.sub!(/^\/?([a-zA-Z])[\|:][\\\/]/) do
282
297
  "/#{$1.downcase}:/"
283
298
  end
284
- uri.path.gsub!(/\\/, SLASH)
299
+ uri.path.tr!("\\", SLASH)
285
300
  if File.exist?(uri.path) &&
286
301
  File.stat(uri.path).directory?
287
- uri.path.sub!(/\/$/, EMPTY_STR)
302
+ uri.path.chomp!(SLASH)
288
303
  uri.path = uri.path + '/'
289
304
  end
290
305
 
291
306
  # If the path is absolute, set the scheme and host.
292
- if uri.path =~ /^\//
307
+ if uri.path.start_with?(SLASH)
293
308
  uri.scheme = "file"
294
309
  uri.host = EMPTY_STR
295
310
  end
@@ -314,18 +329,29 @@ module Addressable
314
329
  # #=> #<Addressable::URI:0xcab390 URI:http://example.com/relative/path>
315
330
  def self.join(*uris)
316
331
  uri_objects = uris.collect do |uri|
317
- if !uri.respond_to?(:to_str)
332
+ unless uri.respond_to?(:to_str)
318
333
  raise TypeError, "Can't convert #{uri.class} into String."
319
334
  end
320
335
  uri.kind_of?(self) ? uri : self.parse(uri.to_str)
321
336
  end
322
337
  result = uri_objects.shift.dup
323
- for uri in uri_objects
338
+ uri_objects.each do |uri|
324
339
  result.join!(uri)
325
340
  end
326
341
  return result
327
342
  end
328
343
 
344
+ ##
345
+ # Tables used to optimize encoding operations in `self.encode_component`
346
+ # and `self.normalize_component`
347
+ SEQUENCE_ENCODING_TABLE = (0..255).map do |byte|
348
+ format("%02x", byte).freeze
349
+ end.freeze
350
+
351
+ SEQUENCE_UPCASED_PERCENT_ENCODING_TABLE = (0..255).map do |byte|
352
+ format("%%%02X", byte).freeze
353
+ end.freeze
354
+
329
355
  ##
330
356
  # Percent encodes a URI component.
331
357
  #
@@ -391,19 +417,22 @@ module Addressable
391
417
  component = component.dup
392
418
  component.force_encoding(Encoding::ASCII_8BIT)
393
419
  # Avoiding gsub! because there are edge cases with frozen strings
394
- component = component.gsub(character_class) do |sequence|
395
- (sequence.unpack('C*').map { |c| "%" + ("%02x" % c).upcase }).join
420
+ component = component.gsub(character_class) do |char|
421
+ SEQUENCE_UPCASED_PERCENT_ENCODING_TABLE[char.ord]
396
422
  end
397
423
  if upcase_encoded.length > 0
398
- component = component.gsub(/%(#{upcase_encoded.chars.map do |char|
399
- char.unpack('C*').map { |c| '%02x' % c }.join
400
- end.join('|')})/i) { |s| s.upcase }
424
+ upcase_encoded_chars = upcase_encoded.bytes.map do |byte|
425
+ SEQUENCE_ENCODING_TABLE[byte]
426
+ end
427
+ component = component.gsub(/%(#{upcase_encoded_chars.join('|')})/,
428
+ &:upcase)
401
429
  end
430
+
402
431
  return component
403
432
  end
404
433
 
405
434
  class << self
406
- alias_method :encode_component, :encode_component
435
+ alias_method :escape_component, :encode_component
407
436
  end
408
437
 
409
438
  ##
@@ -442,16 +471,14 @@ module Addressable
442
471
  "Expected Class (String or Addressable::URI), " +
443
472
  "got #{return_type.inspect}"
444
473
  end
445
- uri = uri.dup
446
- # Seriously, only use UTF-8. I'm really not kidding!
447
- uri.force_encoding("utf-8")
448
- leave_encoded = leave_encoded.dup.force_encoding("utf-8")
449
- result = uri.gsub(/%[0-9a-f]{2}/iu) do |sequence|
474
+
475
+ result = uri.gsub(/%[0-9a-f]{2}/i) do |sequence|
450
476
  c = sequence[1..3].to_i(16).chr
451
- c.force_encoding("utf-8")
477
+ c.force_encoding(sequence.encoding)
452
478
  leave_encoded.include?(c) ? sequence : c
453
479
  end
454
- result.force_encoding("utf-8")
480
+
481
+ result.force_encoding(Encoding::UTF_8)
455
482
  if return_type == String
456
483
  return result
457
484
  elsif return_type == ::Addressable::URI
@@ -530,13 +557,16 @@ module Addressable
530
557
  leave_re = if leave_encoded.length > 0
531
558
  character_class = "#{character_class}%" unless character_class.include?('%')
532
559
 
533
- "|%(?!#{leave_encoded.chars.map do |char|
534
- seq = char.unpack('C*').map { |c| '%02x' % c }.join
535
- [seq.upcase, seq.downcase]
536
- end.flatten.join('|')})"
560
+ bytes = leave_encoded.bytes
561
+ leave_encoded_pattern = bytes.map { |b| SEQUENCE_ENCODING_TABLE[b] }.join('|')
562
+ "|%(?!#{leave_encoded_pattern}|#{leave_encoded_pattern.upcase})"
537
563
  end
538
564
 
539
- character_class = /[^#{character_class}]#{leave_re}/
565
+ character_class = if leave_re
566
+ /[^#{character_class}]#{leave_re}/
567
+ else
568
+ /[^#{character_class}]/
569
+ end
540
570
  end
541
571
  # We can't perform regexps on invalid UTF sequences, but
542
572
  # here we need to, so switch to ASCII.
@@ -545,7 +575,7 @@ module Addressable
545
575
  unencoded = self.unencode_component(component, String, leave_encoded)
546
576
  begin
547
577
  encoded = self.encode_component(
548
- Addressable::IDNA.unicode_normalize_kc(unencoded),
578
+ unencoded.unicode_normalize(:nfc),
549
579
  character_class,
550
580
  leave_encoded
551
581
  )
@@ -653,8 +683,7 @@ module Addressable
653
683
  components.each do |key, value|
654
684
  if value != nil
655
685
  begin
656
- components[key] =
657
- Addressable::IDNA.unicode_normalize_kc(value.to_str)
686
+ components[key] = value.to_str.unicode_normalize(:nfc)
658
687
  rescue ArgumentError
659
688
  # Likely a malformed UTF-8 character, skip unicode normalization
660
689
  components[key] = value.to_str
@@ -802,7 +831,9 @@ module Addressable
802
831
  end
803
832
  end
804
833
 
805
- self.defer_validation do
834
+ reset_ivs
835
+
836
+ defer_validation do
806
837
  # Bunch of crazy logic required because of the composite components
807
838
  # like userinfo and authority.
808
839
  self.scheme = options[:scheme] if options[:scheme]
@@ -817,7 +848,8 @@ module Addressable
817
848
  self.query_values = options[:query_values] if options[:query_values]
818
849
  self.fragment = options[:fragment] if options[:fragment]
819
850
  end
820
- self.to_s
851
+
852
+ to_s # force path validation
821
853
  end
822
854
 
823
855
  ##
@@ -844,9 +876,7 @@ module Addressable
844
876
  # The scheme component for this URI.
845
877
  #
846
878
  # @return [String] The scheme component.
847
- def scheme
848
- return defined?(@scheme) ? @scheme : nil
849
- end
879
+ attr_reader :scheme
850
880
 
851
881
  ##
852
882
  # The scheme component for this URI, normalized.
@@ -854,18 +884,18 @@ module Addressable
854
884
  # @return [String] The scheme component, normalized.
855
885
  def normalized_scheme
856
886
  return nil unless self.scheme
857
- @normalized_scheme ||= begin
858
- if self.scheme =~ /^\s*ssh\+svn\s*$/i
887
+ if @normalized_scheme == NONE
888
+ @normalized_scheme = if self.scheme =~ /^\s*ssh\+svn\s*$/i
859
889
  "svn+ssh".dup
860
890
  else
861
891
  Addressable::URI.normalize_component(
862
892
  self.scheme.strip.downcase,
863
- Addressable::URI::CharacterClasses::SCHEME
893
+ Addressable::URI::NormalizeCharacterClasses::SCHEME
864
894
  )
865
895
  end
866
896
  end
867
897
  # All normalized values should be UTF-8
868
- @normalized_scheme.force_encoding(Encoding::UTF_8) if @normalized_scheme
898
+ force_utf8_encoding_if_needed(@normalized_scheme)
869
899
  @normalized_scheme
870
900
  end
871
901
 
@@ -880,13 +910,13 @@ module Addressable
880
910
  new_scheme = new_scheme.to_str
881
911
  end
882
912
  if new_scheme && new_scheme !~ /\A[a-z][a-z0-9\.\+\-]*\z/i
883
- raise InvalidURIError, "Invalid scheme format: #{new_scheme}"
913
+ raise InvalidURIError, "Invalid scheme format: '#{new_scheme}'"
884
914
  end
885
915
  @scheme = new_scheme
886
916
  @scheme = nil if @scheme.to_s.strip.empty?
887
917
 
888
918
  # Reset dependent values
889
- remove_instance_variable(:@normalized_scheme) if defined?(@normalized_scheme)
919
+ @normalized_scheme = NONE
890
920
  remove_composite_values
891
921
 
892
922
  # Ensure we haven't created an invalid URI
@@ -897,9 +927,7 @@ module Addressable
897
927
  # The user component for this URI.
898
928
  #
899
929
  # @return [String] The user component.
900
- def user
901
- return defined?(@user) ? @user : nil
902
- end
930
+ attr_reader :user
903
931
 
904
932
  ##
905
933
  # The user component for this URI, normalized.
@@ -907,20 +935,20 @@ module Addressable
907
935
  # @return [String] The user component, normalized.
908
936
  def normalized_user
909
937
  return nil unless self.user
910
- return @normalized_user if defined?(@normalized_user)
911
- @normalized_user ||= begin
938
+ return @normalized_user unless @normalized_user == NONE
939
+ @normalized_user = begin
912
940
  if normalized_scheme =~ /https?/ && self.user.strip.empty? &&
913
941
  (!self.password || self.password.strip.empty?)
914
942
  nil
915
943
  else
916
944
  Addressable::URI.normalize_component(
917
945
  self.user.strip,
918
- Addressable::URI::CharacterClasses::UNRESERVED
946
+ Addressable::URI::NormalizeCharacterClasses::UNRESERVED
919
947
  )
920
948
  end
921
949
  end
922
950
  # All normalized values should be UTF-8
923
- @normalized_user.force_encoding(Encoding::UTF_8) if @normalized_user
951
+ force_utf8_encoding_if_needed(@normalized_user)
924
952
  @normalized_user
925
953
  end
926
954
 
@@ -936,14 +964,14 @@ module Addressable
936
964
 
937
965
  # You can't have a nil user with a non-nil password
938
966
  if password != nil
939
- @user = EMPTY_STR if @user.nil?
967
+ @user = EMPTY_STR unless user
940
968
  end
941
969
 
942
970
  # Reset dependent values
943
- remove_instance_variable(:@userinfo) if defined?(@userinfo)
944
- remove_instance_variable(:@normalized_userinfo) if defined?(@normalized_userinfo)
945
- remove_instance_variable(:@authority) if defined?(@authority)
946
- remove_instance_variable(:@normalized_user) if defined?(@normalized_user)
971
+ @userinfo = nil
972
+ @normalized_userinfo = NONE
973
+ @authority = nil
974
+ @normalized_user = NONE
947
975
  remove_composite_values
948
976
 
949
977
  # Ensure we haven't created an invalid URI
@@ -954,9 +982,7 @@ module Addressable
954
982
  # The password component for this URI.
955
983
  #
956
984
  # @return [String] The password component.
957
- def password
958
- return defined?(@password) ? @password : nil
959
- end
985
+ attr_reader :password
960
986
 
961
987
  ##
962
988
  # The password component for this URI, normalized.
@@ -964,22 +990,20 @@ module Addressable
964
990
  # @return [String] The password component, normalized.
965
991
  def normalized_password
966
992
  return nil unless self.password
967
- return @normalized_password if defined?(@normalized_password)
968
- @normalized_password ||= begin
993
+ return @normalized_password unless @normalized_password == NONE
994
+ @normalized_password = begin
969
995
  if self.normalized_scheme =~ /https?/ && self.password.strip.empty? &&
970
996
  (!self.user || self.user.strip.empty?)
971
997
  nil
972
998
  else
973
999
  Addressable::URI.normalize_component(
974
1000
  self.password.strip,
975
- Addressable::URI::CharacterClasses::UNRESERVED
1001
+ Addressable::URI::NormalizeCharacterClasses::UNRESERVED
976
1002
  )
977
1003
  end
978
1004
  end
979
1005
  # All normalized values should be UTF-8
980
- if @normalized_password
981
- @normalized_password.force_encoding(Encoding::UTF_8)
982
- end
1006
+ force_utf8_encoding_if_needed(@normalized_password)
983
1007
  @normalized_password
984
1008
  end
985
1009
 
@@ -994,17 +1018,15 @@ module Addressable
994
1018
  @password = new_password ? new_password.to_str : nil
995
1019
 
996
1020
  # You can't have a nil user with a non-nil password
997
- @password ||= nil
998
- @user ||= nil
999
1021
  if @password != nil
1000
- @user = EMPTY_STR if @user.nil?
1022
+ self.user = EMPTY_STR if user.nil?
1001
1023
  end
1002
1024
 
1003
1025
  # Reset dependent values
1004
- remove_instance_variable(:@userinfo) if defined?(@userinfo)
1005
- remove_instance_variable(:@normalized_userinfo) if defined?(@normalized_userinfo)
1006
- remove_instance_variable(:@authority) if defined?(@authority)
1007
- remove_instance_variable(:@normalized_password) if defined?(@normalized_password)
1026
+ @userinfo = nil
1027
+ @normalized_userinfo = NONE
1028
+ @authority = nil
1029
+ @normalized_password = NONE
1008
1030
  remove_composite_values
1009
1031
 
1010
1032
  # Ensure we haven't created an invalid URI
@@ -1034,8 +1056,8 @@ module Addressable
1034
1056
  # @return [String] The userinfo component, normalized.
1035
1057
  def normalized_userinfo
1036
1058
  return nil unless self.userinfo
1037
- return @normalized_userinfo if defined?(@normalized_userinfo)
1038
- @normalized_userinfo ||= begin
1059
+ return @normalized_userinfo unless @normalized_userinfo == NONE
1060
+ @normalized_userinfo = begin
1039
1061
  current_user = self.normalized_user
1040
1062
  current_password = self.normalized_password
1041
1063
  if !current_user && !current_password
@@ -1047,9 +1069,7 @@ module Addressable
1047
1069
  end
1048
1070
  end
1049
1071
  # All normalized values should be UTF-8
1050
- if @normalized_userinfo
1051
- @normalized_userinfo.force_encoding(Encoding::UTF_8)
1052
- end
1072
+ force_utf8_encoding_if_needed(@normalized_userinfo)
1053
1073
  @normalized_userinfo
1054
1074
  end
1055
1075
 
@@ -1075,7 +1095,7 @@ module Addressable
1075
1095
  self.user = new_user
1076
1096
 
1077
1097
  # Reset dependent values
1078
- remove_instance_variable(:@authority) if defined?(@authority)
1098
+ @authority = nil
1079
1099
  remove_composite_values
1080
1100
 
1081
1101
  # Ensure we haven't created an invalid URI
@@ -1086,9 +1106,7 @@ module Addressable
1086
1106
  # The host component for this URI.
1087
1107
  #
1088
1108
  # @return [String] The host component.
1089
- def host
1090
- return defined?(@host) ? @host : nil
1091
- end
1109
+ attr_reader :host
1092
1110
 
1093
1111
  ##
1094
1112
  # The host component for this URI, normalized.
@@ -1096,6 +1114,7 @@ module Addressable
1096
1114
  # @return [String] The host component, normalized.
1097
1115
  def normalized_host
1098
1116
  return nil unless self.host
1117
+
1099
1118
  @normalized_host ||= begin
1100
1119
  if !self.host.strip.empty?
1101
1120
  result = ::Addressable::IDNA.to_ascii(
@@ -1107,14 +1126,15 @@ module Addressable
1107
1126
  end
1108
1127
  result = Addressable::URI.normalize_component(
1109
1128
  result,
1110
- CharacterClasses::HOST)
1129
+ NormalizeCharacterClasses::HOST
1130
+ )
1111
1131
  result
1112
1132
  else
1113
1133
  EMPTY_STR.dup
1114
1134
  end
1115
1135
  end
1116
1136
  # All normalized values should be UTF-8
1117
- @normalized_host.force_encoding(Encoding::UTF_8) if @normalized_host
1137
+ force_utf8_encoding_if_needed(@normalized_host)
1118
1138
  @normalized_host
1119
1139
  end
1120
1140
 
@@ -1129,8 +1149,8 @@ module Addressable
1129
1149
  @host = new_host ? new_host.to_str : nil
1130
1150
 
1131
1151
  # Reset dependent values
1132
- remove_instance_variable(:@authority) if defined?(@authority)
1133
- remove_instance_variable(:@normalized_host) if defined?(@normalized_host)
1152
+ @authority = nil
1153
+ @normalized_host = nil
1134
1154
  remove_composite_values
1135
1155
 
1136
1156
  # Ensure we haven't created an invalid URI
@@ -1172,7 +1192,7 @@ module Addressable
1172
1192
  # Returns the top-level domain for this host.
1173
1193
  #
1174
1194
  # @example
1175
- # Addressable::URI.parse("www.example.co.uk").tld # => "co.uk"
1195
+ # Addressable::URI.parse("http://www.example.co.uk").tld # => "co.uk"
1176
1196
  def tld
1177
1197
  PublicSuffix.parse(self.host, ignore_private: true).tld
1178
1198
  end
@@ -1182,7 +1202,7 @@ module Addressable
1182
1202
  #
1183
1203
  # @param [String, #to_str] new_tld The new top-level domain.
1184
1204
  def tld=(new_tld)
1185
- replaced_tld = domain.sub(/#{tld}\z/, new_tld)
1205
+ replaced_tld = host.sub(/#{tld}\z/, new_tld)
1186
1206
  self.host = PublicSuffix::Domain.new(replaced_tld).to_s
1187
1207
  end
1188
1208
 
@@ -1190,7 +1210,7 @@ module Addressable
1190
1210
  # Returns the public suffix domain for this host.
1191
1211
  #
1192
1212
  # @example
1193
- # Addressable::URI.parse("www.example.co.uk").domain # => "example.co.uk"
1213
+ # Addressable::URI.parse("http://www.example.co.uk").domain # => "example.co.uk"
1194
1214
  def domain
1195
1215
  PublicSuffix.domain(self.host, ignore_private: true)
1196
1216
  end
@@ -1232,9 +1252,7 @@ module Addressable
1232
1252
  authority
1233
1253
  end
1234
1254
  # All normalized values should be UTF-8
1235
- if @normalized_authority
1236
- @normalized_authority.force_encoding(Encoding::UTF_8)
1237
- end
1255
+ force_utf8_encoding_if_needed(@normalized_authority)
1238
1256
  @normalized_authority
1239
1257
  end
1240
1258
 
@@ -1263,14 +1281,14 @@ module Addressable
1263
1281
  end
1264
1282
 
1265
1283
  # Password assigned first to ensure validity in case of nil
1266
- self.password = defined?(new_password) ? new_password : nil
1267
- self.user = defined?(new_user) ? new_user : nil
1268
- self.host = defined?(new_host) ? new_host : nil
1269
- self.port = defined?(new_port) ? new_port : nil
1284
+ self.password = new_password
1285
+ self.user = new_user
1286
+ self.host = new_host
1287
+ self.port = new_port
1270
1288
 
1271
1289
  # Reset dependent values
1272
- remove_instance_variable(:@userinfo) if defined?(@userinfo)
1273
- remove_instance_variable(:@normalized_userinfo) if defined?(@normalized_userinfo)
1290
+ @userinfo = nil
1291
+ @normalized_userinfo = NONE
1274
1292
  remove_composite_values
1275
1293
 
1276
1294
  # Ensure we haven't created an invalid URI
@@ -1318,16 +1336,16 @@ module Addressable
1318
1336
  new_port = new_origin[/:([^:@\[\]\/]*?)$/, 1]
1319
1337
  end
1320
1338
 
1321
- self.scheme = defined?(new_scheme) ? new_scheme : nil
1322
- self.host = defined?(new_host) ? new_host : nil
1323
- self.port = defined?(new_port) ? new_port : nil
1339
+ self.scheme = new_scheme
1340
+ self.host = new_host
1341
+ self.port = new_port
1324
1342
  self.userinfo = nil
1325
1343
 
1326
1344
  # Reset dependent values
1327
- remove_instance_variable(:@userinfo) if defined?(@userinfo)
1328
- remove_instance_variable(:@normalized_userinfo) if defined?(@normalized_userinfo)
1329
- remove_instance_variable(:@authority) if defined?(@authority)
1330
- remove_instance_variable(:@normalized_authority) if defined?(@normalized_authority)
1345
+ @userinfo = nil
1346
+ @normalized_userinfo = NONE
1347
+ @authority = nil
1348
+ @normalized_authority = nil
1331
1349
  remove_composite_values
1332
1350
 
1333
1351
  # Ensure we haven't created an invalid URI
@@ -1354,9 +1372,7 @@ module Addressable
1354
1372
  # infer port numbers from default values.
1355
1373
  #
1356
1374
  # @return [Integer] The port component.
1357
- def port
1358
- return defined?(@port) ? @port : nil
1359
- end
1375
+ attr_reader :port
1360
1376
 
1361
1377
  ##
1362
1378
  # The port component for this URI, normalized.
@@ -1364,8 +1380,8 @@ module Addressable
1364
1380
  # @return [Integer] The port component, normalized.
1365
1381
  def normalized_port
1366
1382
  return nil unless self.port
1367
- return @normalized_port if defined?(@normalized_port)
1368
- @normalized_port ||= begin
1383
+ return @normalized_port unless @normalized_port == NONE
1384
+ @normalized_port = begin
1369
1385
  if URI.port_mapping[self.normalized_scheme] == self.port
1370
1386
  nil
1371
1387
  else
@@ -1396,8 +1412,8 @@ module Addressable
1396
1412
  @port = nil if @port == 0
1397
1413
 
1398
1414
  # Reset dependent values
1399
- remove_instance_variable(:@authority) if defined?(@authority)
1400
- remove_instance_variable(:@normalized_port) if defined?(@normalized_port)
1415
+ @authority = nil
1416
+ @normalized_port = NONE
1401
1417
  remove_composite_values
1402
1418
 
1403
1419
  # Ensure we haven't created an invalid URI
@@ -1468,7 +1484,7 @@ module Addressable
1468
1484
  site_string
1469
1485
  end
1470
1486
  # All normalized values should be UTF-8
1471
- @normalized_site.force_encoding(Encoding::UTF_8) if @normalized_site
1487
+ force_utf8_encoding_if_needed(@normalized_site)
1472
1488
  @normalized_site
1473
1489
  end
1474
1490
 
@@ -1498,9 +1514,7 @@ module Addressable
1498
1514
  # The path component for this URI.
1499
1515
  #
1500
1516
  # @return [String] The path component.
1501
- def path
1502
- return defined?(@path) ? @path : EMPTY_STR
1503
- end
1517
+ attr_reader :path
1504
1518
 
1505
1519
  NORMPATH = /^(?!\/)[^\/:]*:.*$/
1506
1520
  ##
@@ -1519,7 +1533,7 @@ module Addressable
1519
1533
  result = path.strip.split(SLASH, -1).map do |segment|
1520
1534
  Addressable::URI.normalize_component(
1521
1535
  segment,
1522
- Addressable::URI::CharacterClasses::PCHAR
1536
+ Addressable::URI::NormalizeCharacterClasses::PCHAR
1523
1537
  )
1524
1538
  end.join(SLASH)
1525
1539
 
@@ -1531,7 +1545,7 @@ module Addressable
1531
1545
  result
1532
1546
  end
1533
1547
  # All normalized values should be UTF-8
1534
- @normalized_path.force_encoding(Encoding::UTF_8) if @normalized_path
1548
+ force_utf8_encoding_if_needed(@normalized_path)
1535
1549
  @normalized_path
1536
1550
  end
1537
1551
 
@@ -1549,7 +1563,7 @@ module Addressable
1549
1563
  end
1550
1564
 
1551
1565
  # Reset dependent values
1552
- remove_instance_variable(:@normalized_path) if defined?(@normalized_path)
1566
+ @normalized_path = nil
1553
1567
  remove_composite_values
1554
1568
 
1555
1569
  # Ensure we haven't created an invalid URI
@@ -1579,9 +1593,7 @@ module Addressable
1579
1593
  # The query component for this URI.
1580
1594
  #
1581
1595
  # @return [String] The query component.
1582
- def query
1583
- return defined?(@query) ? @query : nil
1584
- end
1596
+ attr_reader :query
1585
1597
 
1586
1598
  ##
1587
1599
  # The query component for this URI, normalized.
@@ -1589,20 +1601,25 @@ module Addressable
1589
1601
  # @return [String] The query component, normalized.
1590
1602
  def normalized_query(*flags)
1591
1603
  return nil unless self.query
1592
- return @normalized_query if defined?(@normalized_query)
1593
- @normalized_query ||= begin
1604
+ return @normalized_query unless @normalized_query == NONE
1605
+ @normalized_query = begin
1594
1606
  modified_query_class = Addressable::URI::CharacterClasses::QUERY.dup
1595
1607
  # Make sure possible key-value pair delimiters are escaped.
1596
1608
  modified_query_class.sub!("\\&", "").sub!("\\;", "")
1597
- pairs = (self.query || "").split("&", -1)
1609
+ pairs = (query || "").split("&", -1)
1610
+ pairs.delete_if(&:empty?).uniq! if flags.include?(:compacted)
1598
1611
  pairs.sort! if flags.include?(:sorted)
1599
1612
  component = pairs.map do |pair|
1600
- Addressable::URI.normalize_component(pair, modified_query_class, "+")
1613
+ Addressable::URI.normalize_component(
1614
+ pair,
1615
+ Addressable::URI::NormalizeCharacterClasses::QUERY,
1616
+ "+"
1617
+ )
1601
1618
  end.join("&")
1602
1619
  component == "" ? nil : component
1603
1620
  end
1604
1621
  # All normalized values should be UTF-8
1605
- @normalized_query.force_encoding(Encoding::UTF_8) if @normalized_query
1622
+ force_utf8_encoding_if_needed(@normalized_query)
1606
1623
  @normalized_query
1607
1624
  end
1608
1625
 
@@ -1617,7 +1634,7 @@ module Addressable
1617
1634
  @query = new_query ? new_query.to_str : nil
1618
1635
 
1619
1636
  # Reset dependent values
1620
- remove_instance_variable(:@normalized_query) if defined?(@normalized_query)
1637
+ @normalized_query = NONE
1621
1638
  remove_composite_values
1622
1639
  end
1623
1640
 
@@ -1656,11 +1673,13 @@ module Addressable
1656
1673
  # so it's best to make all changes in-place.
1657
1674
  pair[0] = URI.unencode_component(pair[0])
1658
1675
  if pair[1].respond_to?(:to_str)
1676
+ value = pair[1].to_str
1659
1677
  # I loathe the fact that I have to do this. Stupid HTML 4.01.
1660
1678
  # Treating '+' as a space was just an unbelievably bad idea.
1661
1679
  # There was nothing wrong with '%20'!
1662
1680
  # If it ain't broke, don't fix it!
1663
- pair[1] = URI.unencode_component(pair[1].to_str.gsub(/\+/, " "))
1681
+ value = value.tr("+", " ") if ["http", "https", nil].include?(scheme)
1682
+ pair[1] = URI.unencode_component(value)
1664
1683
  end
1665
1684
  if return_type == Hash
1666
1685
  accu[pair[0]] = pair[1]
@@ -1777,9 +1796,7 @@ module Addressable
1777
1796
  # The fragment component for this URI.
1778
1797
  #
1779
1798
  # @return [String] The fragment component.
1780
- def fragment
1781
- return defined?(@fragment) ? @fragment : nil
1782
- end
1799
+ attr_reader :fragment
1783
1800
 
1784
1801
  ##
1785
1802
  # The fragment component for this URI, normalized.
@@ -1787,18 +1804,16 @@ module Addressable
1787
1804
  # @return [String] The fragment component, normalized.
1788
1805
  def normalized_fragment
1789
1806
  return nil unless self.fragment
1790
- return @normalized_fragment if defined?(@normalized_fragment)
1791
- @normalized_fragment ||= begin
1807
+ return @normalized_fragment unless @normalized_fragment == NONE
1808
+ @normalized_fragment = begin
1792
1809
  component = Addressable::URI.normalize_component(
1793
1810
  self.fragment,
1794
- Addressable::URI::CharacterClasses::FRAGMENT
1811
+ Addressable::URI::NormalizeCharacterClasses::FRAGMENT
1795
1812
  )
1796
1813
  component == "" ? nil : component
1797
1814
  end
1798
1815
  # All normalized values should be UTF-8
1799
- if @normalized_fragment
1800
- @normalized_fragment.force_encoding(Encoding::UTF_8)
1801
- end
1816
+ force_utf8_encoding_if_needed(@normalized_fragment)
1802
1817
  @normalized_fragment
1803
1818
  end
1804
1819
 
@@ -1813,7 +1828,7 @@ module Addressable
1813
1828
  @fragment = new_fragment ? new_fragment.to_str : nil
1814
1829
 
1815
1830
  # Reset dependent values
1816
- remove_instance_variable(:@normalized_fragment) if defined?(@normalized_fragment)
1831
+ @normalized_fragment = NONE
1817
1832
  remove_composite_values
1818
1833
 
1819
1834
  # Ensure we haven't created an invalid URI
@@ -1917,7 +1932,7 @@ module Addressable
1917
1932
  # Section 5.2.3 of RFC 3986
1918
1933
  #
1919
1934
  # Removes the right-most path segment from the base path.
1920
- if base_path =~ /\//
1935
+ if base_path.include?(SLASH)
1921
1936
  base_path.sub!(/\/[^\/]+$/, SLASH)
1922
1937
  else
1923
1938
  base_path = EMPTY_STR
@@ -1979,7 +1994,7 @@ module Addressable
1979
1994
  #
1980
1995
  # @see Hash#merge
1981
1996
  def merge(hash)
1982
- if !hash.respond_to?(:to_hash)
1997
+ unless hash.respond_to?(:to_hash)
1983
1998
  raise TypeError, "Can't convert #{hash.class} into Hash."
1984
1999
  end
1985
2000
  hash = hash.to_hash
@@ -2367,13 +2382,33 @@ module Addressable
2367
2382
  #
2368
2383
  # @param [Proc] block
2369
2384
  # A set of operations to perform on a given URI.
2370
- def defer_validation(&block)
2371
- raise LocalJumpError, "No block given." unless block
2385
+ def defer_validation
2386
+ raise LocalJumpError, "No block given." unless block_given?
2372
2387
  @validation_deferred = true
2373
- block.call()
2388
+ yield
2374
2389
  @validation_deferred = false
2375
2390
  validate
2376
- return nil
2391
+ ensure
2392
+ @validation_deferred = false
2393
+ end
2394
+
2395
+ def encode_with(coder)
2396
+ instance_variables.each do |ivar|
2397
+ value = instance_variable_get(ivar)
2398
+ if value != NONE
2399
+ key = ivar.to_s.slice(1..-1)
2400
+ coder[key] = value
2401
+ end
2402
+ end
2403
+ nil
2404
+ end
2405
+
2406
+ def init_with(coder)
2407
+ reset_ivs
2408
+ coder.map.each do |key, value|
2409
+ instance_variable_set("@#{key}", value)
2410
+ end
2411
+ nil
2377
2412
  end
2378
2413
 
2379
2414
  protected
@@ -2394,30 +2429,35 @@ module Addressable
2394
2429
  def self.normalize_path(path)
2395
2430
  # Section 5.2.4 of RFC 3986
2396
2431
 
2397
- return nil if path.nil?
2432
+ return if path.nil?
2398
2433
  normalized_path = path.dup
2399
- begin
2400
- mod = nil
2434
+ loop do
2401
2435
  mod ||= normalized_path.gsub!(RULE_2A, SLASH)
2402
2436
 
2403
2437
  pair = normalized_path.match(RULE_2B_2C)
2404
- parent, current = pair[1], pair[2] if pair
2438
+ if pair
2439
+ parent = pair[1]
2440
+ current = pair[2]
2441
+ else
2442
+ parent = nil
2443
+ current = nil
2444
+ end
2445
+
2446
+ regexp = "/#{Regexp.escape(parent.to_s)}/\\.\\./|"
2447
+ regexp += "(/#{Regexp.escape(current.to_s)}/\\.\\.$)"
2448
+
2405
2449
  if pair && ((parent != SELF_REF && parent != PARENT) ||
2406
2450
  (current != SELF_REF && current != PARENT))
2407
- mod ||= normalized_path.gsub!(
2408
- Regexp.new(
2409
- "/#{Regexp.escape(parent.to_s)}/\\.\\./|" +
2410
- "(/#{Regexp.escape(current.to_s)}/\\.\\.$)"
2411
- ), SLASH
2412
- )
2451
+ mod ||= normalized_path.gsub!(Regexp.new(regexp), SLASH)
2413
2452
  end
2414
2453
 
2415
2454
  mod ||= normalized_path.gsub!(RULE_2D, EMPTY_STR)
2416
2455
  # Non-standard, removes prefixed dotted segments from path.
2417
2456
  mod ||= normalized_path.gsub!(RULE_PREFIXED_PARENT, SLASH)
2418
- end until mod.nil?
2457
+ break if mod.nil?
2458
+ end
2419
2459
 
2420
- return normalized_path
2460
+ normalized_path
2421
2461
  end
2422
2462
 
2423
2463
  ##
@@ -2467,11 +2507,7 @@ module Addressable
2467
2507
  # @return [Addressable::URI] <code>self</code>.
2468
2508
  def replace_self(uri)
2469
2509
  # Reset dependent values
2470
- instance_variables.each do |var|
2471
- if instance_variable_defined?(var) && var != :@validation_deferred
2472
- remove_instance_variable(var)
2473
- end
2474
- end
2510
+ reset_ivs
2475
2511
 
2476
2512
  @scheme = uri.scheme
2477
2513
  @user = uri.user
@@ -2503,8 +2539,53 @@ module Addressable
2503
2539
  #
2504
2540
  # @api private
2505
2541
  def remove_composite_values
2506
- remove_instance_variable(:@uri_string) if defined?(@uri_string)
2507
- remove_instance_variable(:@hash) if defined?(@hash)
2542
+ @uri_string = nil
2543
+ @hash = nil
2508
2544
  end
2545
+
2546
+ ##
2547
+ # Converts the string to be UTF-8 if it is not already UTF-8
2548
+ #
2549
+ # @api private
2550
+ def force_utf8_encoding_if_needed(str)
2551
+ if str && str.encoding != Encoding::UTF_8
2552
+ str.force_encoding(Encoding::UTF_8)
2553
+ end
2554
+ end
2555
+
2556
+ private
2557
+
2558
+ ##
2559
+ # Resets instance variables
2560
+ #
2561
+ # @api private
2562
+ def reset_ivs
2563
+ @scheme = nil
2564
+ @user = nil
2565
+ @normalized_scheme = NONE
2566
+ @normalized_user = NONE
2567
+ @uri_string = nil
2568
+ @hash = nil
2569
+ @userinfo = nil
2570
+ @normalized_userinfo = NONE
2571
+ @authority = nil
2572
+ @password = nil
2573
+ @normalized_authority = nil
2574
+ @port = nil
2575
+ @normalized_password = NONE
2576
+ @host = nil
2577
+ @normalized_host = nil
2578
+ @normalized_port = NONE
2579
+ @path = EMPTY_STR
2580
+ @normalized_path = nil
2581
+ @normalized_query = NONE
2582
+ @fragment = nil
2583
+ @normalized_fragment = NONE
2584
+ @query = nil
2585
+ end
2586
+
2587
+ NONE = Module.new.freeze
2588
+
2589
+ private_constant :NONE
2509
2590
  end
2510
2591
  end