addressable 2.3.6 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # encoding:utf-8
2
4
  #--
3
- # Copyright (C) 2006-2013 Bob Aman
5
+ # Copyright (C) Bob Aman
4
6
  #
5
7
  # Licensed under the Apache License, Version 2.0 (the "License");
6
8
  # you may not use this file except in compliance with the License.
@@ -18,6 +20,7 @@
18
20
 
19
21
  require "addressable/version"
20
22
  require "addressable/idna"
23
+ require "public_suffix"
21
24
 
22
25
  ##
23
26
  # Addressable is a library for processing links and URIs.
@@ -44,12 +47,22 @@ module Addressable
44
47
  UNRESERVED = ALPHA + DIGIT + "\\-\\.\\_\\~"
45
48
  PCHAR = UNRESERVED + SUB_DELIMS + "\\:\\@"
46
49
  SCHEME = ALPHA + DIGIT + "\\-\\+\\."
47
- AUTHORITY = PCHAR
50
+ HOST = UNRESERVED + SUB_DELIMS + "\\[\\:\\]"
51
+ AUTHORITY = PCHAR + "\\[\\:\\]"
48
52
  PATH = PCHAR + "\\/"
49
53
  QUERY = PCHAR + "\\/\\?"
50
54
  FRAGMENT = PCHAR + "\\/\\?"
51
55
  end
52
56
 
57
+ module NormalizeCharacterClasses
58
+ HOST = /[^#{CharacterClasses::HOST}]/
59
+ UNRESERVED = /[^#{CharacterClasses::UNRESERVED}]/
60
+ PCHAR = /[^#{CharacterClasses::PCHAR}]/
61
+ SCHEME = /[^#{CharacterClasses::SCHEME}]/
62
+ FRAGMENT = /[^#{CharacterClasses::FRAGMENT}]/
63
+ QUERY = %r{[^a-zA-Z0-9\-\.\_\~\!\$\'\(\)\*\+\,\=\:\@\/\?%]|%(?!2B|2b)}
64
+ end
65
+
53
66
  SLASH = '/'
54
67
  EMPTY_STR = ''
55
68
 
@@ -69,7 +82,7 @@ module Addressable
69
82
  "wais" => 210,
70
83
  "ldap" => 389,
71
84
  "prospero" => 1525
72
- }
85
+ }.freeze
73
86
 
74
87
  ##
75
88
  # Returns a URI object based on the parsed string.
@@ -120,9 +133,9 @@ module Addressable
120
133
  user = userinfo.strip[/^([^:]*):?/, 1]
121
134
  password = userinfo.strip[/:(.*)$/, 1]
122
135
  end
123
- host = authority.gsub(
136
+ host = authority.sub(
124
137
  /^([^\[\]]*)@/, EMPTY_STR
125
- ).gsub(
138
+ ).sub(
126
139
  /:([^:@\[\]]*?)$/, EMPTY_STR
127
140
  )
128
141
  port = authority[/:([^:@\[\]]*?)$/, 1]
@@ -175,33 +188,50 @@ module Addressable
175
188
  raise TypeError, "Can't convert #{uri.class} into String."
176
189
  end
177
190
  # Otherwise, convert to a String
178
- uri = uri.to_str.dup
191
+ uri = uri.to_str.dup.strip
179
192
  hints = {
180
193
  :scheme => "http"
181
194
  }.merge(hints)
182
195
  case uri
183
- when /^http:\/+/
184
- uri.gsub!(/^http:\/+/, "http://")
185
- when /^https:\/+/
186
- uri.gsub!(/^https:\/+/, "https://")
187
- when /^feed:\/+http:\/+/
188
- uri.gsub!(/^feed:\/+http:\/+/, "feed:http://")
189
- when /^feed:\/+/
190
- uri.gsub!(/^feed:\/+/, "feed://")
191
- when /^file:\/+/
192
- uri.gsub!(/^file:\/+/, "file:///")
196
+ when /^http:\//i
197
+ uri.sub!(/^http:\/+/i, "http://")
198
+ when /^https:\//i
199
+ uri.sub!(/^https:\/+/i, "https://")
200
+ when /^feed:\/+http:\//i
201
+ uri.sub!(/^feed:\/+http:\/+/i, "feed:http://")
202
+ when /^feed:\//i
203
+ uri.sub!(/^feed:\/+/i, "feed://")
204
+ when %r[^file:/{4}]i
205
+ uri.sub!(%r[^file:/+]i, "file:////")
206
+ when %r[^file://localhost/]i
207
+ uri.sub!(%r[^file://localhost/+]i, "file:///")
208
+ when %r[^file:/+]i
209
+ uri.sub!(%r[^file:/+]i, "file:///")
193
210
  when /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/
194
- uri.gsub!(/^/, hints[:scheme] + "://")
211
+ uri.sub!(/^/, hints[:scheme] + "://")
212
+ when /\A\d+\..*:\d+\z/
213
+ uri = "#{hints[:scheme]}://#{uri}"
214
+ end
215
+ match = uri.match(URIREGEX)
216
+ fragments = match.captures
217
+ authority = fragments[3]
218
+ if authority && authority.length > 0
219
+ new_authority = authority.tr("\\", "/").gsub(" ", "%20")
220
+ # NOTE: We want offset 4, not 3!
221
+ offset = match.offset(4)
222
+ uri = uri.dup
223
+ uri[offset[0]...offset[1]] = new_authority
195
224
  end
196
225
  parsed = self.parse(uri)
197
226
  if parsed.scheme =~ /^[^\/?#\.]+\.[^\/?#]+$/
198
227
  parsed = self.parse(hints[:scheme] + "://" + uri)
199
228
  end
200
229
  if parsed.path.include?(".")
201
- new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1]
202
- if new_host
230
+ if parsed.path[/\b@\b/]
231
+ parsed.scheme = "mailto" unless parsed.scheme
232
+ elsif new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1]
203
233
  parsed.defer_validation do
204
- new_path = parsed.path.gsub(
234
+ new_path = parsed.path.sub(
205
235
  Regexp.new("^" + Regexp.escape(new_host)), EMPTY_STR)
206
236
  parsed.host = new_host
207
237
  parsed.path = new_path
@@ -252,24 +282,24 @@ module Addressable
252
282
  # Otherwise, convert to a String
253
283
  path = path.to_str.strip
254
284
 
255
- path.gsub!(/^file:\/?\/?/, EMPTY_STR) if path =~ /^file:\/?\/?/
285
+ path.sub!(/^file:\/?\/?/, EMPTY_STR) if path =~ /^file:\/?\/?/
256
286
  path = SLASH + path if path =~ /^([a-zA-Z])[\|:]/
257
287
  uri = self.parse(path)
258
288
 
259
289
  if uri.scheme == nil
260
290
  # Adjust windows-style uris
261
- uri.path.gsub!(/^\/?([a-zA-Z])[\|:][\\\/]/) do
291
+ uri.path.sub!(/^\/?([a-zA-Z])[\|:][\\\/]/) do
262
292
  "/#{$1.downcase}:/"
263
293
  end
264
- uri.path.gsub!(/\\/, SLASH)
265
- if File.exists?(uri.path) &&
294
+ uri.path.tr!("\\", SLASH)
295
+ if File.exist?(uri.path) &&
266
296
  File.stat(uri.path).directory?
267
- uri.path.gsub!(/\/$/, EMPTY_STR)
297
+ uri.path.chomp!(SLASH)
268
298
  uri.path = uri.path + '/'
269
299
  end
270
300
 
271
301
  # If the path is absolute, set the scheme and host.
272
- if uri.path =~ /^\//
302
+ if uri.path.start_with?(SLASH)
273
303
  uri.scheme = "file"
274
304
  uri.host = EMPTY_STR
275
305
  end
@@ -306,6 +336,21 @@ module Addressable
306
336
  return result
307
337
  end
308
338
 
339
+ ##
340
+ # Tables used to optimize encoding operations in `self.encode_component`
341
+ # and `self.normalize_component`
342
+ SEQUENCE_ENCODING_TABLE = Hash.new do |hash, sequence|
343
+ hash[sequence] = sequence.unpack("C*").map do |c|
344
+ format("%02x", c)
345
+ end.join
346
+ end
347
+
348
+ SEQUENCE_UPCASED_PERCENT_ENCODING_TABLE = Hash.new do |hash, sequence|
349
+ hash[sequence] = sequence.unpack("C*").map do |c|
350
+ format("%%%02X", c)
351
+ end.join
352
+ end
353
+
309
354
  ##
310
355
  # Percent encodes a URI component.
311
356
  #
@@ -366,26 +411,26 @@ module Addressable
366
411
  if character_class.kind_of?(String)
367
412
  character_class = /[^#{character_class}]/
368
413
  end
369
- if component.respond_to?(:force_encoding)
370
- # We can't perform regexps on invalid UTF sequences, but
371
- # here we need to, so switch to ASCII.
372
- component = component.dup
373
- component.force_encoding(Encoding::ASCII_8BIT)
374
- end
414
+ # We can't perform regexps on invalid UTF sequences, but
415
+ # here we need to, so switch to ASCII.
416
+ component = component.dup
417
+ component.force_encoding(Encoding::ASCII_8BIT)
375
418
  # Avoiding gsub! because there are edge cases with frozen strings
376
419
  component = component.gsub(character_class) do |sequence|
377
- (sequence.unpack('C*').map { |c| "%" + ("%02x" % c).upcase }).join
420
+ SEQUENCE_UPCASED_PERCENT_ENCODING_TABLE[sequence]
378
421
  end
379
422
  if upcase_encoded.length > 0
380
- component = component.gsub(/%(#{upcase_encoded.chars.map do |char|
381
- char.unpack('C*').map { |c| '%02x' % c }.join
382
- end.join('|')})/i) { |s| s.upcase }
423
+ upcase_encoded_chars = upcase_encoded.chars.map do |char|
424
+ SEQUENCE_ENCODING_TABLE[char]
425
+ end
426
+ component = component.gsub(/%(#{upcase_encoded_chars.join('|')})/,
427
+ &:upcase)
383
428
  end
384
429
  return component
385
430
  end
386
431
 
387
432
  class << self
388
- alias_method :encode_component, :encode_component
433
+ alias_method :escape_component, :encode_component
389
434
  end
390
435
 
391
436
  ##
@@ -426,14 +471,18 @@ module Addressable
426
471
  end
427
472
  uri = uri.dup
428
473
  # Seriously, only use UTF-8. I'm really not kidding!
429
- uri.force_encoding("utf-8") if uri.respond_to?(:force_encoding)
430
- leave_encoded.force_encoding("utf-8") if leave_encoded.respond_to?(:force_encoding)
474
+ uri.force_encoding("utf-8")
475
+
476
+ unless leave_encoded.empty?
477
+ leave_encoded = leave_encoded.dup.force_encoding("utf-8")
478
+ end
479
+
431
480
  result = uri.gsub(/%[0-9a-f]{2}/iu) do |sequence|
432
481
  c = sequence[1..3].to_i(16).chr
433
- c.force_encoding("utf-8") if c.respond_to?(:force_encoding)
482
+ c.force_encoding("utf-8")
434
483
  leave_encoded.include?(c) ? sequence : c
435
484
  end
436
- result.force_encoding("utf-8") if result.respond_to?(:force_encoding)
485
+ result.force_encoding("utf-8")
437
486
  if return_type == String
438
487
  return result
439
488
  elsif return_type == ::Addressable::URI
@@ -513,19 +562,21 @@ module Addressable
513
562
  character_class = "#{character_class}%" unless character_class.include?('%')
514
563
 
515
564
  "|%(?!#{leave_encoded.chars.map do |char|
516
- seq = char.unpack('C*').map { |c| '%02x' % c }.join
565
+ seq = SEQUENCE_ENCODING_TABLE[char]
517
566
  [seq.upcase, seq.downcase]
518
567
  end.flatten.join('|')})"
519
568
  end
520
569
 
521
- character_class = /[^#{character_class}]#{leave_re}/
522
- end
523
- if component.respond_to?(:force_encoding)
524
- # We can't perform regexps on invalid UTF sequences, but
525
- # here we need to, so switch to ASCII.
526
- component = component.dup
527
- component.force_encoding(Encoding::ASCII_8BIT)
570
+ character_class = if leave_re
571
+ /[^#{character_class}]#{leave_re}/
572
+ else
573
+ /[^#{character_class}]/
574
+ end
528
575
  end
576
+ # We can't perform regexps on invalid UTF sequences, but
577
+ # here we need to, so switch to ASCII.
578
+ component = component.dup
579
+ component.force_encoding(Encoding::ASCII_8BIT)
529
580
  unencoded = self.unencode_component(component, String, leave_encoded)
530
581
  begin
531
582
  encoded = self.encode_component(
@@ -536,9 +587,7 @@ module Addressable
536
587
  rescue ArgumentError
537
588
  encoded = self.encode_component(unencoded)
538
589
  end
539
- if encoded.respond_to?(:force_encoding)
540
- encoded.force_encoding(Encoding::UTF_8)
541
- end
590
+ encoded.force_encoding(Encoding::UTF_8)
542
591
  return encoded
543
592
  end
544
593
 
@@ -720,9 +769,9 @@ module Addressable
720
769
  ).gsub("%20", "+")
721
770
  ]
722
771
  end
723
- return (escaped_form_values.map do |(key, value)|
772
+ return escaped_form_values.map do |(key, value)|
724
773
  "#{key}=#{value}"
725
- end).join("&")
774
+ end.join("&")
726
775
  end
727
776
 
728
777
  ##
@@ -803,6 +852,7 @@ module Addressable
803
852
  self.query_values = options[:query_values] if options[:query_values]
804
853
  self.fragment = options[:fragment] if options[:fragment]
805
854
  end
855
+ self.to_s
806
856
  end
807
857
 
808
858
  ##
@@ -830,7 +880,7 @@ module Addressable
830
880
  #
831
881
  # @return [String] The scheme component.
832
882
  def scheme
833
- return instance_variable_defined?(:@scheme) ? @scheme : nil
883
+ return defined?(@scheme) ? @scheme : nil
834
884
  end
835
885
 
836
886
  ##
@@ -838,16 +888,20 @@ module Addressable
838
888
  #
839
889
  # @return [String] The scheme component, normalized.
840
890
  def normalized_scheme
841
- self.scheme && @normalized_scheme ||= (begin
891
+ return nil unless self.scheme
892
+ @normalized_scheme ||= begin
842
893
  if self.scheme =~ /^\s*ssh\+svn\s*$/i
843
- "svn+ssh"
894
+ "svn+ssh".dup
844
895
  else
845
896
  Addressable::URI.normalize_component(
846
897
  self.scheme.strip.downcase,
847
- Addressable::URI::CharacterClasses::SCHEME
898
+ Addressable::URI::NormalizeCharacterClasses::SCHEME
848
899
  )
849
900
  end
850
- end)
901
+ end
902
+ # All normalized values should be UTF-8
903
+ @normalized_scheme.force_encoding(Encoding::UTF_8) if @normalized_scheme
904
+ @normalized_scheme
851
905
  end
852
906
 
853
907
  ##
@@ -860,16 +914,15 @@ module Addressable
860
914
  elsif new_scheme
861
915
  new_scheme = new_scheme.to_str
862
916
  end
863
- if new_scheme && new_scheme !~ /[a-z][a-z0-9\.\+\-]*/i
864
- raise InvalidURIError, "Invalid scheme format."
917
+ if new_scheme && new_scheme !~ /\A[a-z][a-z0-9\.\+\-]*\z/i
918
+ raise InvalidURIError, "Invalid scheme format: '#{new_scheme}'"
865
919
  end
866
920
  @scheme = new_scheme
867
921
  @scheme = nil if @scheme.to_s.strip.empty?
868
922
 
869
- # Reset dependant values
870
- @normalized_scheme = nil
871
- @uri_string = nil
872
- @hash = nil
923
+ # Reset dependent values
924
+ remove_instance_variable(:@normalized_scheme) if defined?(@normalized_scheme)
925
+ remove_composite_values
873
926
 
874
927
  # Ensure we haven't created an invalid URI
875
928
  validate()
@@ -880,7 +933,7 @@ module Addressable
880
933
  #
881
934
  # @return [String] The user component.
882
935
  def user
883
- return instance_variable_defined?(:@user) ? @user : nil
936
+ return defined?(@user) ? @user : nil
884
937
  end
885
938
 
886
939
  ##
@@ -888,17 +941,22 @@ module Addressable
888
941
  #
889
942
  # @return [String] The user component, normalized.
890
943
  def normalized_user
891
- self.user && @normalized_user ||= (begin
944
+ return nil unless self.user
945
+ return @normalized_user if defined?(@normalized_user)
946
+ @normalized_user ||= begin
892
947
  if normalized_scheme =~ /https?/ && self.user.strip.empty? &&
893
948
  (!self.password || self.password.strip.empty?)
894
949
  nil
895
950
  else
896
951
  Addressable::URI.normalize_component(
897
952
  self.user.strip,
898
- Addressable::URI::CharacterClasses::UNRESERVED
953
+ Addressable::URI::NormalizeCharacterClasses::UNRESERVED
899
954
  )
900
955
  end
901
- end)
956
+ end
957
+ # All normalized values should be UTF-8
958
+ @normalized_user.force_encoding(Encoding::UTF_8) if @normalized_user
959
+ @normalized_user
902
960
  end
903
961
 
904
962
  ##
@@ -916,13 +974,12 @@ module Addressable
916
974
  @user = EMPTY_STR if @user.nil?
917
975
  end
918
976
 
919
- # Reset dependant values
920
- @userinfo = nil
921
- @normalized_userinfo = nil
922
- @authority = nil
923
- @normalized_user = nil
924
- @uri_string = nil
925
- @hash = nil
977
+ # Reset dependent values
978
+ remove_instance_variable(:@userinfo) if defined?(@userinfo)
979
+ remove_instance_variable(:@normalized_userinfo) if defined?(@normalized_userinfo)
980
+ remove_instance_variable(:@authority) if defined?(@authority)
981
+ remove_instance_variable(:@normalized_user) if defined?(@normalized_user)
982
+ remove_composite_values
926
983
 
927
984
  # Ensure we haven't created an invalid URI
928
985
  validate()
@@ -933,7 +990,7 @@ module Addressable
933
990
  #
934
991
  # @return [String] The password component.
935
992
  def password
936
- return instance_variable_defined?(:@password) ? @password : nil
993
+ return defined?(@password) ? @password : nil
937
994
  end
938
995
 
939
996
  ##
@@ -941,17 +998,24 @@ module Addressable
941
998
  #
942
999
  # @return [String] The password component, normalized.
943
1000
  def normalized_password
944
- self.password && @normalized_password ||= (begin
1001
+ return nil unless self.password
1002
+ return @normalized_password if defined?(@normalized_password)
1003
+ @normalized_password ||= begin
945
1004
  if self.normalized_scheme =~ /https?/ && self.password.strip.empty? &&
946
1005
  (!self.user || self.user.strip.empty?)
947
1006
  nil
948
1007
  else
949
1008
  Addressable::URI.normalize_component(
950
1009
  self.password.strip,
951
- Addressable::URI::CharacterClasses::UNRESERVED
1010
+ Addressable::URI::NormalizeCharacterClasses::UNRESERVED
952
1011
  )
953
1012
  end
954
- end)
1013
+ end
1014
+ # All normalized values should be UTF-8
1015
+ if @normalized_password
1016
+ @normalized_password.force_encoding(Encoding::UTF_8)
1017
+ end
1018
+ @normalized_password
955
1019
  end
956
1020
 
957
1021
  ##
@@ -971,13 +1035,12 @@ module Addressable
971
1035
  @user = EMPTY_STR if @user.nil?
972
1036
  end
973
1037
 
974
- # Reset dependant values
975
- @userinfo = nil
976
- @normalized_userinfo = nil
977
- @authority = nil
978
- @normalized_password = nil
979
- @uri_string = nil
980
- @hash = nil
1038
+ # Reset dependent values
1039
+ remove_instance_variable(:@userinfo) if defined?(@userinfo)
1040
+ remove_instance_variable(:@normalized_userinfo) if defined?(@normalized_userinfo)
1041
+ remove_instance_variable(:@authority) if defined?(@authority)
1042
+ remove_instance_variable(:@normalized_password) if defined?(@normalized_password)
1043
+ remove_composite_values
981
1044
 
982
1045
  # Ensure we haven't created an invalid URI
983
1046
  validate()
@@ -991,13 +1054,13 @@ module Addressable
991
1054
  def userinfo
992
1055
  current_user = self.user
993
1056
  current_password = self.password
994
- (current_user || current_password) && @userinfo ||= (begin
1057
+ (current_user || current_password) && @userinfo ||= begin
995
1058
  if current_user && current_password
996
1059
  "#{current_user}:#{current_password}"
997
1060
  elsif current_user && !current_password
998
1061
  "#{current_user}"
999
1062
  end
1000
- end)
1063
+ end
1001
1064
  end
1002
1065
 
1003
1066
  ##
@@ -1005,17 +1068,24 @@ module Addressable
1005
1068
  #
1006
1069
  # @return [String] The userinfo component, normalized.
1007
1070
  def normalized_userinfo
1008
- self.userinfo && @normalized_userinfo ||= (begin
1071
+ return nil unless self.userinfo
1072
+ return @normalized_userinfo if defined?(@normalized_userinfo)
1073
+ @normalized_userinfo ||= begin
1009
1074
  current_user = self.normalized_user
1010
1075
  current_password = self.normalized_password
1011
1076
  if !current_user && !current_password
1012
1077
  nil
1013
1078
  elsif current_user && current_password
1014
- "#{current_user}:#{current_password}"
1079
+ "#{current_user}:#{current_password}".dup
1015
1080
  elsif current_user && !current_password
1016
- "#{current_user}"
1081
+ "#{current_user}".dup
1017
1082
  end
1018
- end)
1083
+ end
1084
+ # All normalized values should be UTF-8
1085
+ if @normalized_userinfo
1086
+ @normalized_userinfo.force_encoding(Encoding::UTF_8)
1087
+ end
1088
+ @normalized_userinfo
1019
1089
  end
1020
1090
 
1021
1091
  ##
@@ -1039,10 +1109,9 @@ module Addressable
1039
1109
  self.password = new_password
1040
1110
  self.user = new_user
1041
1111
 
1042
- # Reset dependant values
1043
- @authority = nil
1044
- @uri_string = nil
1045
- @hash = nil
1112
+ # Reset dependent values
1113
+ remove_instance_variable(:@authority) if defined?(@authority)
1114
+ remove_composite_values
1046
1115
 
1047
1116
  # Ensure we haven't created an invalid URI
1048
1117
  validate()
@@ -1053,7 +1122,7 @@ module Addressable
1053
1122
  #
1054
1123
  # @return [String] The host component.
1055
1124
  def host
1056
- return instance_variable_defined?(:@host) ? @host : nil
1125
+ return defined?(@host) ? @host : nil
1057
1126
  end
1058
1127
 
1059
1128
  ##
@@ -1061,7 +1130,9 @@ module Addressable
1061
1130
  #
1062
1131
  # @return [String] The host component, normalized.
1063
1132
  def normalized_host
1064
- self.host && @normalized_host ||= (begin
1133
+ return nil unless self.host
1134
+
1135
+ @normalized_host ||= begin
1065
1136
  if !self.host.strip.empty?
1066
1137
  result = ::Addressable::IDNA.to_ascii(
1067
1138
  URI.unencode_component(self.host.strip.downcase)
@@ -1070,11 +1141,20 @@ module Addressable
1070
1141
  # Single trailing dots are unnecessary.
1071
1142
  result = result[0...-1]
1072
1143
  end
1144
+ result = Addressable::URI.normalize_component(
1145
+ result,
1146
+ NormalizeCharacterClasses::HOST
1147
+ )
1073
1148
  result
1074
1149
  else
1075
- EMPTY_STR
1150
+ EMPTY_STR.dup
1076
1151
  end
1077
- end)
1152
+ end
1153
+ # All normalized values should be UTF-8
1154
+ if @normalized_host && !@normalized_host.empty?
1155
+ @normalized_host.force_encoding(Encoding::UTF_8)
1156
+ end
1157
+ @normalized_host
1078
1158
  end
1079
1159
 
1080
1160
  ##
@@ -1087,19 +1167,10 @@ module Addressable
1087
1167
  end
1088
1168
  @host = new_host ? new_host.to_str : nil
1089
1169
 
1090
- unreserved = CharacterClasses::UNRESERVED
1091
- sub_delims = CharacterClasses::SUB_DELIMS
1092
- if @host != nil && (@host =~ /[<>{}\/\?\#\@]/ ||
1093
- (@host[/^\[(.*)\]$/, 1] != nil && @host[/^\[(.*)\]$/, 1] !~
1094
- Regexp.new("^[#{unreserved}#{sub_delims}:]*$")))
1095
- raise InvalidURIError, "Invalid character in host: '#{@host.to_s}'"
1096
- end
1097
-
1098
- # Reset dependant values
1099
- @authority = nil
1100
- @normalized_host = nil
1101
- @uri_string = nil
1102
- @hash = nil
1170
+ # Reset dependent values
1171
+ remove_instance_variable(:@authority) if defined?(@authority)
1172
+ remove_instance_variable(:@normalized_host) if defined?(@normalized_host)
1173
+ remove_composite_values
1103
1174
 
1104
1175
  # Ensure we haven't created an invalid URI
1105
1176
  validate()
@@ -1125,7 +1196,10 @@ module Addressable
1125
1196
  #
1126
1197
  # @param [String, #to_str] new_hostname The new hostname for this URI.
1127
1198
  def hostname=(new_hostname)
1128
- if new_hostname && !new_hostname.respond_to?(:to_str)
1199
+ if new_hostname &&
1200
+ (new_hostname.respond_to?(:ipv4?) || new_hostname.respond_to?(:ipv6?))
1201
+ new_hostname = new_hostname.to_s
1202
+ elsif new_hostname && !new_hostname.respond_to?(:to_str)
1129
1203
  raise TypeError, "Can't convert #{new_hostname.class} into String."
1130
1204
  end
1131
1205
  v = new_hostname ? new_hostname.to_str : nil
@@ -1133,14 +1207,41 @@ module Addressable
1133
1207
  self.host = v
1134
1208
  end
1135
1209
 
1210
+ ##
1211
+ # Returns the top-level domain for this host.
1212
+ #
1213
+ # @example
1214
+ # Addressable::URI.parse("http://www.example.co.uk").tld # => "co.uk"
1215
+ def tld
1216
+ PublicSuffix.parse(self.host, ignore_private: true).tld
1217
+ end
1218
+
1219
+ ##
1220
+ # Sets the top-level domain for this URI.
1221
+ #
1222
+ # @param [String, #to_str] new_tld The new top-level domain.
1223
+ def tld=(new_tld)
1224
+ replaced_tld = host.sub(/#{tld}\z/, new_tld)
1225
+ self.host = PublicSuffix::Domain.new(replaced_tld).to_s
1226
+ end
1227
+
1228
+ ##
1229
+ # Returns the public suffix domain for this host.
1230
+ #
1231
+ # @example
1232
+ # Addressable::URI.parse("http://www.example.co.uk").domain # => "example.co.uk"
1233
+ def domain
1234
+ PublicSuffix.domain(self.host, ignore_private: true)
1235
+ end
1236
+
1136
1237
  ##
1137
1238
  # The authority component for this URI.
1138
1239
  # Combines the user, password, host, and port components.
1139
1240
  #
1140
1241
  # @return [String] The authority component.
1141
1242
  def authority
1142
- self.host && @authority ||= (begin
1143
- authority = ""
1243
+ self.host && @authority ||= begin
1244
+ authority = String.new
1144
1245
  if self.userinfo != nil
1145
1246
  authority << "#{self.userinfo}@"
1146
1247
  end
@@ -1149,7 +1250,7 @@ module Addressable
1149
1250
  authority << ":#{self.port}"
1150
1251
  end
1151
1252
  authority
1152
- end)
1253
+ end
1153
1254
  end
1154
1255
 
1155
1256
  ##
@@ -1157,8 +1258,9 @@ module Addressable
1157
1258
  #
1158
1259
  # @return [String] The authority component, normalized.
1159
1260
  def normalized_authority
1160
- self.authority && @normalized_authority ||= (begin
1161
- authority = ""
1261
+ return nil unless self.authority
1262
+ @normalized_authority ||= begin
1263
+ authority = String.new
1162
1264
  if self.normalized_userinfo != nil
1163
1265
  authority << "#{self.normalized_userinfo}@"
1164
1266
  end
@@ -1167,7 +1269,12 @@ module Addressable
1167
1269
  authority << ":#{self.normalized_port}"
1168
1270
  end
1169
1271
  authority
1170
- end)
1272
+ end
1273
+ # All normalized values should be UTF-8
1274
+ if @normalized_authority
1275
+ @normalized_authority.force_encoding(Encoding::UTF_8)
1276
+ end
1277
+ @normalized_authority
1171
1278
  end
1172
1279
 
1173
1280
  ##
@@ -1185,9 +1292,9 @@ module Addressable
1185
1292
  new_user = new_userinfo.strip[/^([^:]*):?/, 1]
1186
1293
  new_password = new_userinfo.strip[/:(.*)$/, 1]
1187
1294
  end
1188
- new_host = new_authority.gsub(
1295
+ new_host = new_authority.sub(
1189
1296
  /^([^\[\]]*)@/, EMPTY_STR
1190
- ).gsub(
1297
+ ).sub(
1191
1298
  /:([^:@\[\]]*?)$/, EMPTY_STR
1192
1299
  )
1193
1300
  new_port =
@@ -1200,11 +1307,10 @@ module Addressable
1200
1307
  self.host = defined?(new_host) ? new_host : nil
1201
1308
  self.port = defined?(new_port) ? new_port : nil
1202
1309
 
1203
- # Reset dependant values
1204
- @userinfo = nil
1205
- @normalized_userinfo = nil
1206
- @uri_string = nil
1207
- @hash = nil
1310
+ # Reset dependent values
1311
+ remove_instance_variable(:@userinfo) if defined?(@userinfo)
1312
+ remove_instance_variable(:@normalized_userinfo) if defined?(@normalized_userinfo)
1313
+ remove_composite_values
1208
1314
 
1209
1315
  # Ensure we haven't created an invalid URI
1210
1316
  validate()
@@ -1216,18 +1322,55 @@ module Addressable
1216
1322
  #
1217
1323
  # @return [String] The serialized origin.
1218
1324
  def origin
1219
- return (if self.scheme && self.authority
1325
+ if self.scheme && self.authority
1220
1326
  if self.normalized_port
1221
- (
1222
- "#{self.normalized_scheme}://#{self.normalized_host}" +
1223
- ":#{self.normalized_port}"
1224
- )
1327
+ "#{self.normalized_scheme}://#{self.normalized_host}" +
1328
+ ":#{self.normalized_port}"
1225
1329
  else
1226
1330
  "#{self.normalized_scheme}://#{self.normalized_host}"
1227
1331
  end
1228
1332
  else
1229
1333
  "null"
1230
- end)
1334
+ end
1335
+ end
1336
+
1337
+ ##
1338
+ # Sets the origin for this URI, serialized to ASCII, as per
1339
+ # RFC 6454, section 6.2. This assignment will reset the `userinfo`
1340
+ # component.
1341
+ #
1342
+ # @param [String, #to_str] new_origin The new origin component.
1343
+ def origin=(new_origin)
1344
+ if new_origin
1345
+ if !new_origin.respond_to?(:to_str)
1346
+ raise TypeError, "Can't convert #{new_origin.class} into String."
1347
+ end
1348
+ new_origin = new_origin.to_str
1349
+ new_scheme = new_origin[/^([^:\/?#]+):\/\//, 1]
1350
+ unless new_scheme
1351
+ raise InvalidURIError, 'An origin cannot omit the scheme.'
1352
+ end
1353
+ new_host = new_origin[/:\/\/([^\/?#:]+)/, 1]
1354
+ unless new_host
1355
+ raise InvalidURIError, 'An origin cannot omit the host.'
1356
+ end
1357
+ new_port = new_origin[/:([^:@\[\]\/]*?)$/, 1]
1358
+ end
1359
+
1360
+ self.scheme = defined?(new_scheme) ? new_scheme : nil
1361
+ self.host = defined?(new_host) ? new_host : nil
1362
+ self.port = defined?(new_port) ? new_port : nil
1363
+ self.userinfo = nil
1364
+
1365
+ # Reset dependent values
1366
+ remove_instance_variable(:@userinfo) if defined?(@userinfo)
1367
+ remove_instance_variable(:@normalized_userinfo) if defined?(@normalized_userinfo)
1368
+ remove_instance_variable(:@authority) if defined?(@authority)
1369
+ remove_instance_variable(:@normalized_authority) if defined?(@normalized_authority)
1370
+ remove_composite_values
1371
+
1372
+ # Ensure we haven't created an invalid URI
1373
+ validate()
1231
1374
  end
1232
1375
 
1233
1376
  # Returns an array of known ip-based schemes. These schemes typically
@@ -1251,7 +1394,7 @@ module Addressable
1251
1394
  #
1252
1395
  # @return [Integer] The port component.
1253
1396
  def port
1254
- return instance_variable_defined?(:@port) ? @port : nil
1397
+ return defined?(@port) ? @port : nil
1255
1398
  end
1256
1399
 
1257
1400
  ##
@@ -1259,10 +1402,14 @@ module Addressable
1259
1402
  #
1260
1403
  # @return [Integer] The port component, normalized.
1261
1404
  def normalized_port
1262
- if URI.port_mapping[self.normalized_scheme] == self.port
1263
- nil
1264
- else
1265
- self.port
1405
+ return nil unless self.port
1406
+ return @normalized_port if defined?(@normalized_port)
1407
+ @normalized_port ||= begin
1408
+ if URI.port_mapping[self.normalized_scheme] == self.port
1409
+ nil
1410
+ else
1411
+ self.port
1412
+ end
1266
1413
  end
1267
1414
  end
1268
1415
 
@@ -1274,6 +1421,11 @@ module Addressable
1274
1421
  if new_port != nil && new_port.respond_to?(:to_str)
1275
1422
  new_port = Addressable::URI.unencode_component(new_port.to_str)
1276
1423
  end
1424
+
1425
+ if new_port.respond_to?(:valid_encoding?) && !new_port.valid_encoding?
1426
+ raise InvalidURIError, "Invalid encoding in port"
1427
+ end
1428
+
1277
1429
  if new_port != nil && !(new_port.to_s =~ /^\d+$/)
1278
1430
  raise InvalidURIError,
1279
1431
  "Invalid port number: #{new_port.inspect}"
@@ -1282,11 +1434,10 @@ module Addressable
1282
1434
  @port = new_port.to_s.to_i
1283
1435
  @port = nil if @port == 0
1284
1436
 
1285
- # Reset dependant values
1286
- @authority = nil
1287
- @normalized_port = nil
1288
- @uri_string = nil
1289
- @hash = nil
1437
+ # Reset dependent values
1438
+ remove_instance_variable(:@authority) if defined?(@authority)
1439
+ remove_instance_variable(:@normalized_port) if defined?(@normalized_port)
1440
+ remove_composite_values
1290
1441
 
1291
1442
  # Ensure we haven't created an invalid URI
1292
1443
  validate()
@@ -1326,12 +1477,12 @@ module Addressable
1326
1477
  #
1327
1478
  # @return [String] The components that identify a site.
1328
1479
  def site
1329
- (self.scheme || self.authority) && @site ||= (begin
1330
- site_string = ""
1480
+ (self.scheme || self.authority) && @site ||= begin
1481
+ site_string = "".dup
1331
1482
  site_string << "#{self.scheme}:" if self.scheme != nil
1332
1483
  site_string << "//#{self.authority}" if self.authority != nil
1333
1484
  site_string
1334
- end)
1485
+ end
1335
1486
  end
1336
1487
 
1337
1488
  ##
@@ -1344,8 +1495,9 @@ module Addressable
1344
1495
  #
1345
1496
  # @return [String] The normalized components that identify a site.
1346
1497
  def normalized_site
1347
- self.site && @normalized_site ||= (begin
1348
- site_string = ""
1498
+ return nil unless self.site
1499
+ @normalized_site ||= begin
1500
+ site_string = "".dup
1349
1501
  if self.normalized_scheme != nil
1350
1502
  site_string << "#{self.normalized_scheme}:"
1351
1503
  end
@@ -1353,7 +1505,10 @@ module Addressable
1353
1505
  site_string << "//#{self.normalized_authority}"
1354
1506
  end
1355
1507
  site_string
1356
- end)
1508
+ end
1509
+ # All normalized values should be UTF-8
1510
+ @normalized_site.force_encoding(Encoding::UTF_8) if @normalized_site
1511
+ @normalized_site
1357
1512
  end
1358
1513
 
1359
1514
  ##
@@ -1383,7 +1538,7 @@ module Addressable
1383
1538
  #
1384
1539
  # @return [String] The path component.
1385
1540
  def path
1386
- return instance_variable_defined?(:@path) ? @path : EMPTY_STR
1541
+ return defined?(@path) ? @path : EMPTY_STR
1387
1542
  end
1388
1543
 
1389
1544
  NORMPATH = /^(?!\/)[^\/:]*:.*$/
@@ -1392,7 +1547,7 @@ module Addressable
1392
1547
  #
1393
1548
  # @return [String] The path component, normalized.
1394
1549
  def normalized_path
1395
- @normalized_path ||= (begin
1550
+ @normalized_path ||= begin
1396
1551
  path = self.path.to_s
1397
1552
  if self.scheme == nil && path =~ NORMPATH
1398
1553
  # Relative paths with colons in the first segment are ambiguous.
@@ -1400,20 +1555,23 @@ module Addressable
1400
1555
  end
1401
1556
  # String#split(delimeter, -1) uses the more strict splitting behavior
1402
1557
  # found by default in Python.
1403
- result = (path.strip.split(SLASH, -1).map do |segment|
1558
+ result = path.strip.split(SLASH, -1).map do |segment|
1404
1559
  Addressable::URI.normalize_component(
1405
1560
  segment,
1406
- Addressable::URI::CharacterClasses::PCHAR
1561
+ Addressable::URI::NormalizeCharacterClasses::PCHAR
1407
1562
  )
1408
- end).join(SLASH)
1563
+ end.join(SLASH)
1409
1564
 
1410
1565
  result = URI.normalize_path(result)
1411
1566
  if result.empty? &&
1412
1567
  ["http", "https", "ftp", "tftp"].include?(self.normalized_scheme)
1413
- result = SLASH
1568
+ result = SLASH.dup
1414
1569
  end
1415
1570
  result
1416
- end)
1571
+ end
1572
+ # All normalized values should be UTF-8
1573
+ @normalized_path.force_encoding(Encoding::UTF_8) if @normalized_path
1574
+ @normalized_path
1417
1575
  end
1418
1576
 
1419
1577
  ##
@@ -1429,10 +1587,12 @@ module Addressable
1429
1587
  @path = "/#{@path}"
1430
1588
  end
1431
1589
 
1432
- # Reset dependant values
1433
- @normalized_path = nil
1434
- @uri_string = nil
1435
- @hash = nil
1590
+ # Reset dependent values
1591
+ remove_instance_variable(:@normalized_path) if defined?(@normalized_path)
1592
+ remove_composite_values
1593
+
1594
+ # Ensure we haven't created an invalid URI
1595
+ validate()
1436
1596
  end
1437
1597
 
1438
1598
  ##
@@ -1441,7 +1601,7 @@ module Addressable
1441
1601
  # @return [String] The path's basename.
1442
1602
  def basename
1443
1603
  # Path cannot be nil
1444
- return File.basename(self.path).gsub(/;[^\/]*$/, EMPTY_STR)
1604
+ return File.basename(self.path).sub(/;[^\/]*$/, EMPTY_STR)
1445
1605
  end
1446
1606
 
1447
1607
  ##
@@ -1459,7 +1619,7 @@ module Addressable
1459
1619
  #
1460
1620
  # @return [String] The query component.
1461
1621
  def query
1462
- return instance_variable_defined?(:@query) ? @query : nil
1622
+ return defined?(@query) ? @query : nil
1463
1623
  end
1464
1624
 
1465
1625
  ##
@@ -1467,15 +1627,27 @@ module Addressable
1467
1627
  #
1468
1628
  # @return [String] The query component, normalized.
1469
1629
  def normalized_query(*flags)
1470
- modified_query_class = Addressable::URI::CharacterClasses::QUERY.dup
1471
- # Make sure possible key-value pair delimiters are escaped.
1472
- modified_query_class.sub!("\\&", "").sub!("\\;", "")
1473
- pairs = (self.query || "").split("&", -1)
1474
- pairs.sort! if flags.include?(:sorted)
1475
- component = (pairs.map do |pair|
1476
- Addressable::URI.normalize_component(pair, modified_query_class, "+")
1477
- end).join("&")
1478
- component == "" ? nil : component
1630
+ return nil unless self.query
1631
+ return @normalized_query if defined?(@normalized_query)
1632
+ @normalized_query ||= begin
1633
+ modified_query_class = Addressable::URI::CharacterClasses::QUERY.dup
1634
+ # Make sure possible key-value pair delimiters are escaped.
1635
+ modified_query_class.sub!("\\&", "").sub!("\\;", "")
1636
+ pairs = (query || "").split("&", -1)
1637
+ pairs.delete_if(&:empty?).uniq! if flags.include?(:compacted)
1638
+ pairs.sort! if flags.include?(:sorted)
1639
+ component = pairs.map do |pair|
1640
+ Addressable::URI.normalize_component(
1641
+ pair,
1642
+ Addressable::URI::NormalizeCharacterClasses::QUERY,
1643
+ "+"
1644
+ )
1645
+ end.join("&")
1646
+ component == "" ? nil : component
1647
+ end
1648
+ # All normalized values should be UTF-8
1649
+ @normalized_query.force_encoding(Encoding::UTF_8) if @normalized_query
1650
+ @normalized_query
1479
1651
  end
1480
1652
 
1481
1653
  ##
@@ -1488,10 +1660,9 @@ module Addressable
1488
1660
  end
1489
1661
  @query = new_query ? new_query.to_str : nil
1490
1662
 
1491
- # Reset dependant values
1492
- @normalized_query = nil
1493
- @uri_string = nil
1494
- @hash = nil
1663
+ # Reset dependent values
1664
+ remove_instance_variable(:@normalized_query) if defined?(@normalized_query)
1665
+ remove_composite_values
1495
1666
  end
1496
1667
 
1497
1668
  ##
@@ -1500,7 +1671,8 @@ module Addressable
1500
1671
  # @param [Class] return_type The return type desired. Value must be either
1501
1672
  # `Hash` or `Array`.
1502
1673
  #
1503
- # @return [Hash, Array] The query string parsed as a Hash or Array object.
1674
+ # @return [Hash, Array, nil] The query string parsed as a Hash or Array
1675
+ # or nil if the query string is blank.
1504
1676
  #
1505
1677
  # @example
1506
1678
  # Addressable::URI.parse("?one=1&two=2&three=3").query_values
@@ -1509,26 +1681,32 @@ module Addressable
1509
1681
  # #=> [["one", "two"], ["one", "three"]]
1510
1682
  # Addressable::URI.parse("?one=two&one=three").query_values(Hash)
1511
1683
  # #=> {"one" => "three"}
1684
+ # Addressable::URI.parse("?").query_values
1685
+ # #=> {}
1686
+ # Addressable::URI.parse("").query_values
1687
+ # #=> nil
1512
1688
  def query_values(return_type=Hash)
1513
1689
  empty_accumulator = Array == return_type ? [] : {}
1514
1690
  if return_type != Hash && return_type != Array
1515
1691
  raise ArgumentError, "Invalid return type. Must be Hash or Array."
1516
1692
  end
1517
1693
  return nil if self.query == nil
1518
- split_query = (self.query.split("&").map do |pair|
1694
+ split_query = self.query.split("&").map do |pair|
1519
1695
  pair.split("=", 2) if pair && !pair.empty?
1520
- end).compact
1696
+ end.compact
1521
1697
  return split_query.inject(empty_accumulator.dup) do |accu, pair|
1522
1698
  # I'd rather use key/value identifiers instead of array lookups,
1523
1699
  # but in this case I really want to maintain the exact pair structure,
1524
1700
  # so it's best to make all changes in-place.
1525
1701
  pair[0] = URI.unencode_component(pair[0])
1526
1702
  if pair[1].respond_to?(:to_str)
1703
+ value = pair[1].to_str
1527
1704
  # I loathe the fact that I have to do this. Stupid HTML 4.01.
1528
1705
  # Treating '+' as a space was just an unbelievably bad idea.
1529
1706
  # There was nothing wrong with '%20'!
1530
1707
  # If it ain't broke, don't fix it!
1531
- pair[1] = URI.unencode_component(pair[1].to_str.gsub(/\+/, " "))
1708
+ value = value.tr("+", " ") if ["http", "https", nil].include?(scheme)
1709
+ pair[1] = URI.unencode_component(value)
1532
1710
  end
1533
1711
  if return_type == Hash
1534
1712
  accu[pair[0]] = pair[1]
@@ -1580,7 +1758,7 @@ module Addressable
1580
1758
  end
1581
1759
 
1582
1760
  # new_query_values have form [['key1', 'value1'], ['key2', 'value2']]
1583
- buffer = ""
1761
+ buffer = "".dup
1584
1762
  new_query_values.each do |key, value|
1585
1763
  encoded_key = URI.encode_component(
1586
1764
  key, CharacterClasses::UNRESERVED
@@ -1610,7 +1788,7 @@ module Addressable
1610
1788
  #
1611
1789
  # @return [String] The request URI required for an HTTP request.
1612
1790
  def request_uri
1613
- return nil if self.absolute? && self.scheme !~ /^https?$/
1791
+ return nil if self.absolute? && self.scheme !~ /^https?$/i
1614
1792
  return (
1615
1793
  (!self.path.empty? ? self.path : SLASH) +
1616
1794
  (self.query ? "?#{self.query}" : EMPTY_STR)
@@ -1625,21 +1803,20 @@ module Addressable
1625
1803
  if !new_request_uri.respond_to?(:to_str)
1626
1804
  raise TypeError, "Can't convert #{new_request_uri.class} into String."
1627
1805
  end
1628
- if self.absolute? && self.scheme !~ /^https?$/
1806
+ if self.absolute? && self.scheme !~ /^https?$/i
1629
1807
  raise InvalidURIError,
1630
1808
  "Cannot set an HTTP request URI for a non-HTTP URI."
1631
1809
  end
1632
1810
  new_request_uri = new_request_uri.to_str
1633
- path_component = new_request_uri[/^([^\?]*)\?(?:.*)$/, 1]
1811
+ path_component = new_request_uri[/^([^\?]*)\??(?:.*)$/, 1]
1634
1812
  query_component = new_request_uri[/^(?:[^\?]*)\?(.*)$/, 1]
1635
1813
  path_component = path_component.to_s
1636
1814
  path_component = (!path_component.empty? ? path_component : SLASH)
1637
1815
  self.path = path_component
1638
1816
  self.query = query_component
1639
1817
 
1640
- # Reset dependant values
1641
- @uri_string = nil
1642
- @hash = nil
1818
+ # Reset dependent values
1819
+ remove_composite_values
1643
1820
  end
1644
1821
 
1645
1822
  ##
@@ -1647,7 +1824,7 @@ module Addressable
1647
1824
  #
1648
1825
  # @return [String] The fragment component.
1649
1826
  def fragment
1650
- return instance_variable_defined?(:@fragment) ? @fragment : nil
1827
+ return defined?(@fragment) ? @fragment : nil
1651
1828
  end
1652
1829
 
1653
1830
  ##
@@ -1655,13 +1832,20 @@ module Addressable
1655
1832
  #
1656
1833
  # @return [String] The fragment component, normalized.
1657
1834
  def normalized_fragment
1658
- self.fragment && @normalized_fragment ||= (begin
1835
+ return nil unless self.fragment
1836
+ return @normalized_fragment if defined?(@normalized_fragment)
1837
+ @normalized_fragment ||= begin
1659
1838
  component = Addressable::URI.normalize_component(
1660
1839
  self.fragment,
1661
- Addressable::URI::CharacterClasses::FRAGMENT
1840
+ Addressable::URI::NormalizeCharacterClasses::FRAGMENT
1662
1841
  )
1663
1842
  component == "" ? nil : component
1664
- end)
1843
+ end
1844
+ # All normalized values should be UTF-8
1845
+ if @normalized_fragment
1846
+ @normalized_fragment.force_encoding(Encoding::UTF_8)
1847
+ end
1848
+ @normalized_fragment
1665
1849
  end
1666
1850
 
1667
1851
  ##
@@ -1674,10 +1858,9 @@ module Addressable
1674
1858
  end
1675
1859
  @fragment = new_fragment ? new_fragment.to_str : nil
1676
1860
 
1677
- # Reset dependant values
1678
- @normalized_fragment = nil
1679
- @uri_string = nil
1680
- @hash = nil
1861
+ # Reset dependent values
1862
+ remove_instance_variable(:@normalized_fragment) if defined?(@normalized_fragment)
1863
+ remove_composite_values
1681
1864
 
1682
1865
  # Ensure we haven't created an invalid URI
1683
1866
  validate()
@@ -1780,8 +1963,8 @@ module Addressable
1780
1963
  # Section 5.2.3 of RFC 3986
1781
1964
  #
1782
1965
  # Removes the right-most path segment from the base path.
1783
- if base_path =~ /\//
1784
- base_path.gsub!(/\/[^\/]+$/, SLASH)
1966
+ if base_path.include?(SLASH)
1967
+ base_path.sub!(/\/[^\/]+$/, SLASH)
1785
1968
  else
1786
1969
  base_path = EMPTY_STR
1787
1970
  end
@@ -2098,7 +2281,7 @@ module Addressable
2098
2281
  #
2099
2282
  # @return [Integer] A hash of the URI.
2100
2283
  def hash
2101
- return @hash ||= (self.to_s.hash * -1)
2284
+ @hash ||= self.to_s.hash * -1
2102
2285
  end
2103
2286
 
2104
2287
  ##
@@ -2181,18 +2364,16 @@ module Addressable
2181
2364
  raise InvalidURIError,
2182
2365
  "Cannot assemble URI string with ambiguous path: '#{self.path}'"
2183
2366
  end
2184
- @uri_string ||= (begin
2185
- uri_string = ""
2367
+ @uri_string ||= begin
2368
+ uri_string = String.new
2186
2369
  uri_string << "#{self.scheme}:" if self.scheme != nil
2187
2370
  uri_string << "//#{self.authority}" if self.authority != nil
2188
2371
  uri_string << self.path.to_s
2189
2372
  uri_string << "?#{self.query}" if self.query != nil
2190
2373
  uri_string << "##{self.fragment}" if self.fragment != nil
2191
- if uri_string.respond_to?(:force_encoding)
2192
- uri_string.force_encoding(Encoding::UTF_8)
2193
- end
2374
+ uri_string.force_encoding(Encoding::UTF_8)
2194
2375
  uri_string
2195
- end)
2376
+ end
2196
2377
  end
2197
2378
 
2198
2379
  ##
@@ -2232,16 +2413,16 @@ module Addressable
2232
2413
  #
2233
2414
  # @param [Proc] block
2234
2415
  # A set of operations to perform on a given URI.
2235
- def defer_validation(&block)
2236
- raise LocalJumpError, "No block given." unless block
2416
+ def defer_validation
2417
+ raise LocalJumpError, "No block given." unless block_given?
2237
2418
  @validation_deferred = true
2238
- block.call()
2419
+ yield
2239
2420
  @validation_deferred = false
2240
2421
  validate
2241
2422
  return nil
2242
2423
  end
2243
2424
 
2244
- private
2425
+ protected
2245
2426
  SELF_REF = '.'
2246
2427
  PARENT = '..'
2247
2428
 
@@ -2307,6 +2488,19 @@ module Addressable
2307
2488
  raise InvalidURIError,
2308
2489
  "Cannot have a relative path with an authority set: '#{self.to_s}'"
2309
2490
  end
2491
+ if self.path != nil && !self.path.empty? &&
2492
+ self.path[0..1] == SLASH + SLASH && self.authority == nil
2493
+ raise InvalidURIError,
2494
+ "Cannot have a path with two leading slashes " +
2495
+ "without an authority set: '#{self.to_s}'"
2496
+ end
2497
+ unreserved = CharacterClasses::UNRESERVED
2498
+ sub_delims = CharacterClasses::SUB_DELIMS
2499
+ if !self.host.nil? && (self.host =~ /[<>{}\/\\\?\#\@"[[:space:]]]/ ||
2500
+ (self.host[/^\[(.*)\]$/, 1] != nil && self.host[/^\[(.*)\]$/, 1] !~
2501
+ Regexp.new("^[#{unreserved}#{sub_delims}:]*$")))
2502
+ raise InvalidURIError, "Invalid character in host: '#{self.host.to_s}'"
2503
+ end
2310
2504
  return nil
2311
2505
  end
2312
2506
 
@@ -2318,9 +2512,11 @@ module Addressable
2318
2512
  #
2319
2513
  # @return [Addressable::URI] <code>self</code>.
2320
2514
  def replace_self(uri)
2321
- # Reset dependant values
2515
+ # Reset dependent values
2322
2516
  instance_variables.each do |var|
2323
- instance_variable_set(var, nil)
2517
+ if instance_variable_defined?(var) && var != :@validation_deferred
2518
+ remove_instance_variable(var)
2519
+ end
2324
2520
  end
2325
2521
 
2326
2522
  @scheme = uri.scheme
@@ -2335,7 +2531,7 @@ module Addressable
2335
2531
  end
2336
2532
 
2337
2533
  ##
2338
- # Splits path string with "/"(slash).
2534
+ # Splits path string with "/" (slash).
2339
2535
  # It is considered that there is empty string after last slash when
2340
2536
  # path ends with slash.
2341
2537
  #
@@ -2347,5 +2543,14 @@ module Addressable
2347
2543
  splitted << EMPTY_STR if path.end_with? SLASH
2348
2544
  splitted
2349
2545
  end
2546
+
2547
+ ##
2548
+ # Resets composite values for the entire URI
2549
+ #
2550
+ # @api private
2551
+ def remove_composite_values
2552
+ remove_instance_variable(:@uri_string) if defined?(@uri_string)
2553
+ remove_instance_variable(:@hash) if defined?(@hash)
2554
+ end
2350
2555
  end
2351
2556
  end