addressable 2.4.0 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # encoding:utf-8
2
4
  #--
3
- # Copyright (C) 2006-2015 Bob Aman
5
+ # Copyright (C) Bob Aman
4
6
  #
5
7
  # Licensed under the Apache License, Version 2.0 (the "License");
6
8
  # you may not use this file except in compliance with the License.
@@ -18,6 +20,7 @@
18
20
 
19
21
  require "addressable/version"
20
22
  require "addressable/idna"
23
+ require "public_suffix"
21
24
 
22
25
  ##
23
26
  # Addressable is a library for processing links and URIs.
@@ -44,13 +47,22 @@ module Addressable
44
47
  UNRESERVED = ALPHA + DIGIT + "\\-\\.\\_\\~"
45
48
  PCHAR = UNRESERVED + SUB_DELIMS + "\\:\\@"
46
49
  SCHEME = ALPHA + DIGIT + "\\-\\+\\."
47
- HOST = ALPHA + DIGIT + "\\-\\.\\[\\:\\]"
48
- AUTHORITY = PCHAR
50
+ HOST = UNRESERVED + SUB_DELIMS + "\\[\\:\\]"
51
+ AUTHORITY = PCHAR + "\\[\\:\\]"
49
52
  PATH = PCHAR + "\\/"
50
53
  QUERY = PCHAR + "\\/\\?"
51
54
  FRAGMENT = PCHAR + "\\/\\?"
52
55
  end
53
56
 
57
+ module NormalizeCharacterClasses
58
+ HOST = /[^#{CharacterClasses::HOST}]/
59
+ UNRESERVED = /[^#{CharacterClasses::UNRESERVED}]/
60
+ PCHAR = /[^#{CharacterClasses::PCHAR}]/
61
+ SCHEME = /[^#{CharacterClasses::SCHEME}]/
62
+ FRAGMENT = /[^#{CharacterClasses::FRAGMENT}]/
63
+ QUERY = %r{[^a-zA-Z0-9\-\.\_\~\!\$\'\(\)\*\+\,\=\:\@\/\?%]|%(?!2B|2b)}
64
+ end
65
+
54
66
  SLASH = '/'
55
67
  EMPTY_STR = ''
56
68
 
@@ -70,7 +82,7 @@ module Addressable
70
82
  "wais" => 210,
71
83
  "ldap" => 389,
72
84
  "prospero" => 1525
73
- }
85
+ }.freeze
74
86
 
75
87
  ##
76
88
  # Returns a URI object based on the parsed string.
@@ -121,9 +133,9 @@ module Addressable
121
133
  user = userinfo.strip[/^([^:]*):?/, 1]
122
134
  password = userinfo.strip[/:(.*)$/, 1]
123
135
  end
124
- host = authority.gsub(
136
+ host = authority.sub(
125
137
  /^([^\[\]]*)@/, EMPTY_STR
126
- ).gsub(
138
+ ).sub(
127
139
  /:([^:@\[\]]*?)$/, EMPTY_STR
128
140
  )
129
141
  port = authority[/:([^:@\[\]]*?)$/, 1]
@@ -176,33 +188,50 @@ module Addressable
176
188
  raise TypeError, "Can't convert #{uri.class} into String."
177
189
  end
178
190
  # Otherwise, convert to a String
179
- uri = uri.to_str.dup
191
+ uri = uri.to_str.dup.strip
180
192
  hints = {
181
193
  :scheme => "http"
182
194
  }.merge(hints)
183
195
  case uri
184
- when /^http:\/+/
185
- uri.gsub!(/^http:\/+/, "http://")
186
- when /^https:\/+/
187
- uri.gsub!(/^https:\/+/, "https://")
188
- when /^feed:\/+http:\/+/
189
- uri.gsub!(/^feed:\/+http:\/+/, "feed:http://")
190
- when /^feed:\/+/
191
- uri.gsub!(/^feed:\/+/, "feed://")
192
- when /^file:\/+/
193
- uri.gsub!(/^file:\/+/, "file:///")
196
+ when /^http:\//i
197
+ uri.sub!(/^http:\/+/i, "http://")
198
+ when /^https:\//i
199
+ uri.sub!(/^https:\/+/i, "https://")
200
+ when /^feed:\/+http:\//i
201
+ uri.sub!(/^feed:\/+http:\/+/i, "feed:http://")
202
+ when /^feed:\//i
203
+ uri.sub!(/^feed:\/+/i, "feed://")
204
+ when %r[^file:/{4}]i
205
+ uri.sub!(%r[^file:/+]i, "file:////")
206
+ when %r[^file://localhost/]i
207
+ uri.sub!(%r[^file://localhost/+]i, "file:///")
208
+ when %r[^file:/+]i
209
+ uri.sub!(%r[^file:/+]i, "file:///")
194
210
  when /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/
195
- uri.gsub!(/^/, hints[:scheme] + "://")
211
+ uri.sub!(/^/, hints[:scheme] + "://")
212
+ when /\A\d+\..*:\d+\z/
213
+ uri = "#{hints[:scheme]}://#{uri}"
214
+ end
215
+ match = uri.match(URIREGEX)
216
+ fragments = match.captures
217
+ authority = fragments[3]
218
+ if authority && authority.length > 0
219
+ new_authority = authority.tr("\\", "/").gsub(" ", "%20")
220
+ # NOTE: We want offset 4, not 3!
221
+ offset = match.offset(4)
222
+ uri = uri.dup
223
+ uri[offset[0]...offset[1]] = new_authority
196
224
  end
197
225
  parsed = self.parse(uri)
198
226
  if parsed.scheme =~ /^[^\/?#\.]+\.[^\/?#]+$/
199
227
  parsed = self.parse(hints[:scheme] + "://" + uri)
200
228
  end
201
229
  if parsed.path.include?(".")
202
- new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1]
203
- if new_host
230
+ if parsed.path[/\b@\b/]
231
+ parsed.scheme = "mailto" unless parsed.scheme
232
+ elsif new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1]
204
233
  parsed.defer_validation do
205
- new_path = parsed.path.gsub(
234
+ new_path = parsed.path.sub(
206
235
  Regexp.new("^" + Regexp.escape(new_host)), EMPTY_STR)
207
236
  parsed.host = new_host
208
237
  parsed.path = new_path
@@ -253,24 +282,24 @@ module Addressable
253
282
  # Otherwise, convert to a String
254
283
  path = path.to_str.strip
255
284
 
256
- path.gsub!(/^file:\/?\/?/, EMPTY_STR) if path =~ /^file:\/?\/?/
285
+ path.sub!(/^file:\/?\/?/, EMPTY_STR) if path =~ /^file:\/?\/?/
257
286
  path = SLASH + path if path =~ /^([a-zA-Z])[\|:]/
258
287
  uri = self.parse(path)
259
288
 
260
289
  if uri.scheme == nil
261
290
  # Adjust windows-style uris
262
- uri.path.gsub!(/^\/?([a-zA-Z])[\|:][\\\/]/) do
291
+ uri.path.sub!(/^\/?([a-zA-Z])[\|:][\\\/]/) do
263
292
  "/#{$1.downcase}:/"
264
293
  end
265
- uri.path.gsub!(/\\/, SLASH)
294
+ uri.path.tr!("\\", SLASH)
266
295
  if File.exist?(uri.path) &&
267
296
  File.stat(uri.path).directory?
268
- uri.path.gsub!(/\/$/, EMPTY_STR)
297
+ uri.path.chomp!(SLASH)
269
298
  uri.path = uri.path + '/'
270
299
  end
271
300
 
272
301
  # If the path is absolute, set the scheme and host.
273
- if uri.path =~ /^\//
302
+ if uri.path.start_with?(SLASH)
274
303
  uri.scheme = "file"
275
304
  uri.host = EMPTY_STR
276
305
  end
@@ -307,6 +336,21 @@ module Addressable
307
336
  return result
308
337
  end
309
338
 
339
+ ##
340
+ # Tables used to optimize encoding operations in `self.encode_component`
341
+ # and `self.normalize_component`
342
+ SEQUENCE_ENCODING_TABLE = Hash.new do |hash, sequence|
343
+ hash[sequence] = sequence.unpack("C*").map do |c|
344
+ format("%02x", c)
345
+ end.join
346
+ end
347
+
348
+ SEQUENCE_UPCASED_PERCENT_ENCODING_TABLE = Hash.new do |hash, sequence|
349
+ hash[sequence] = sequence.unpack("C*").map do |c|
350
+ format("%%%02X", c)
351
+ end.join
352
+ end
353
+
310
354
  ##
311
355
  # Percent encodes a URI component.
312
356
  #
@@ -367,26 +411,26 @@ module Addressable
367
411
  if character_class.kind_of?(String)
368
412
  character_class = /[^#{character_class}]/
369
413
  end
370
- if component.respond_to?(:force_encoding)
371
- # We can't perform regexps on invalid UTF sequences, but
372
- # here we need to, so switch to ASCII.
373
- component = component.dup
374
- component.force_encoding(Encoding::ASCII_8BIT)
375
- end
414
+ # We can't perform regexps on invalid UTF sequences, but
415
+ # here we need to, so switch to ASCII.
416
+ component = component.dup
417
+ component.force_encoding(Encoding::ASCII_8BIT)
376
418
  # Avoiding gsub! because there are edge cases with frozen strings
377
419
  component = component.gsub(character_class) do |sequence|
378
- (sequence.unpack('C*').map { |c| "%" + ("%02x" % c).upcase }).join
420
+ SEQUENCE_UPCASED_PERCENT_ENCODING_TABLE[sequence]
379
421
  end
380
422
  if upcase_encoded.length > 0
381
- component = component.gsub(/%(#{upcase_encoded.chars.map do |char|
382
- char.unpack('C*').map { |c| '%02x' % c }.join
383
- end.join('|')})/i) { |s| s.upcase }
423
+ upcase_encoded_chars = upcase_encoded.chars.map do |char|
424
+ SEQUENCE_ENCODING_TABLE[char]
425
+ end
426
+ component = component.gsub(/%(#{upcase_encoded_chars.join('|')})/,
427
+ &:upcase)
384
428
  end
385
429
  return component
386
430
  end
387
431
 
388
432
  class << self
389
- alias_method :encode_component, :encode_component
433
+ alias_method :escape_component, :encode_component
390
434
  end
391
435
 
392
436
  ##
@@ -427,14 +471,18 @@ module Addressable
427
471
  end
428
472
  uri = uri.dup
429
473
  # Seriously, only use UTF-8. I'm really not kidding!
430
- uri.force_encoding("utf-8") if uri.respond_to?(:force_encoding)
431
- leave_encoded.force_encoding("utf-8") if leave_encoded.respond_to?(:force_encoding)
474
+ uri.force_encoding("utf-8")
475
+
476
+ unless leave_encoded.empty?
477
+ leave_encoded = leave_encoded.dup.force_encoding("utf-8")
478
+ end
479
+
432
480
  result = uri.gsub(/%[0-9a-f]{2}/iu) do |sequence|
433
481
  c = sequence[1..3].to_i(16).chr
434
- c.force_encoding("utf-8") if c.respond_to?(:force_encoding)
482
+ c.force_encoding("utf-8")
435
483
  leave_encoded.include?(c) ? sequence : c
436
484
  end
437
- result.force_encoding("utf-8") if result.respond_to?(:force_encoding)
485
+ result.force_encoding("utf-8")
438
486
  if return_type == String
439
487
  return result
440
488
  elsif return_type == ::Addressable::URI
@@ -514,19 +562,21 @@ module Addressable
514
562
  character_class = "#{character_class}%" unless character_class.include?('%')
515
563
 
516
564
  "|%(?!#{leave_encoded.chars.map do |char|
517
- seq = char.unpack('C*').map { |c| '%02x' % c }.join
565
+ seq = SEQUENCE_ENCODING_TABLE[char]
518
566
  [seq.upcase, seq.downcase]
519
567
  end.flatten.join('|')})"
520
568
  end
521
569
 
522
- character_class = /[^#{character_class}]#{leave_re}/
523
- end
524
- if component.respond_to?(:force_encoding)
525
- # We can't perform regexps on invalid UTF sequences, but
526
- # here we need to, so switch to ASCII.
527
- component = component.dup
528
- component.force_encoding(Encoding::ASCII_8BIT)
570
+ character_class = if leave_re
571
+ /[^#{character_class}]#{leave_re}/
572
+ else
573
+ /[^#{character_class}]/
574
+ end
529
575
  end
576
+ # We can't perform regexps on invalid UTF sequences, but
577
+ # here we need to, so switch to ASCII.
578
+ component = component.dup
579
+ component.force_encoding(Encoding::ASCII_8BIT)
530
580
  unencoded = self.unencode_component(component, String, leave_encoded)
531
581
  begin
532
582
  encoded = self.encode_component(
@@ -537,9 +587,7 @@ module Addressable
537
587
  rescue ArgumentError
538
588
  encoded = self.encode_component(unencoded)
539
589
  end
540
- if encoded.respond_to?(:force_encoding)
541
- encoded.force_encoding(Encoding::UTF_8)
542
- end
590
+ encoded.force_encoding(Encoding::UTF_8)
543
591
  return encoded
544
592
  end
545
593
 
@@ -843,14 +891,17 @@ module Addressable
843
891
  return nil unless self.scheme
844
892
  @normalized_scheme ||= begin
845
893
  if self.scheme =~ /^\s*ssh\+svn\s*$/i
846
- "svn+ssh"
894
+ "svn+ssh".dup
847
895
  else
848
896
  Addressable::URI.normalize_component(
849
897
  self.scheme.strip.downcase,
850
- Addressable::URI::CharacterClasses::SCHEME
898
+ Addressable::URI::NormalizeCharacterClasses::SCHEME
851
899
  )
852
900
  end
853
901
  end
902
+ # All normalized values should be UTF-8
903
+ @normalized_scheme.force_encoding(Encoding::UTF_8) if @normalized_scheme
904
+ @normalized_scheme
854
905
  end
855
906
 
856
907
  ##
@@ -864,7 +915,7 @@ module Addressable
864
915
  new_scheme = new_scheme.to_str
865
916
  end
866
917
  if new_scheme && new_scheme !~ /\A[a-z][a-z0-9\.\+\-]*\z/i
867
- raise InvalidURIError, "Invalid scheme format: #{new_scheme}"
918
+ raise InvalidURIError, "Invalid scheme format: '#{new_scheme}'"
868
919
  end
869
920
  @scheme = new_scheme
870
921
  @scheme = nil if @scheme.to_s.strip.empty?
@@ -899,10 +950,13 @@ module Addressable
899
950
  else
900
951
  Addressable::URI.normalize_component(
901
952
  self.user.strip,
902
- Addressable::URI::CharacterClasses::UNRESERVED
953
+ Addressable::URI::NormalizeCharacterClasses::UNRESERVED
903
954
  )
904
955
  end
905
956
  end
957
+ # All normalized values should be UTF-8
958
+ @normalized_user.force_encoding(Encoding::UTF_8) if @normalized_user
959
+ @normalized_user
906
960
  end
907
961
 
908
962
  ##
@@ -953,10 +1007,15 @@ module Addressable
953
1007
  else
954
1008
  Addressable::URI.normalize_component(
955
1009
  self.password.strip,
956
- Addressable::URI::CharacterClasses::UNRESERVED
1010
+ Addressable::URI::NormalizeCharacterClasses::UNRESERVED
957
1011
  )
958
1012
  end
959
1013
  end
1014
+ # All normalized values should be UTF-8
1015
+ if @normalized_password
1016
+ @normalized_password.force_encoding(Encoding::UTF_8)
1017
+ end
1018
+ @normalized_password
960
1019
  end
961
1020
 
962
1021
  ##
@@ -1017,11 +1076,16 @@ module Addressable
1017
1076
  if !current_user && !current_password
1018
1077
  nil
1019
1078
  elsif current_user && current_password
1020
- "#{current_user}:#{current_password}"
1079
+ "#{current_user}:#{current_password}".dup
1021
1080
  elsif current_user && !current_password
1022
- "#{current_user}"
1081
+ "#{current_user}".dup
1023
1082
  end
1024
1083
  end
1084
+ # All normalized values should be UTF-8
1085
+ if @normalized_userinfo
1086
+ @normalized_userinfo.force_encoding(Encoding::UTF_8)
1087
+ end
1088
+ @normalized_userinfo
1025
1089
  end
1026
1090
 
1027
1091
  ##
@@ -1067,6 +1131,7 @@ module Addressable
1067
1131
  # @return [String] The host component, normalized.
1068
1132
  def normalized_host
1069
1133
  return nil unless self.host
1134
+
1070
1135
  @normalized_host ||= begin
1071
1136
  if !self.host.strip.empty?
1072
1137
  result = ::Addressable::IDNA.to_ascii(
@@ -1078,12 +1143,18 @@ module Addressable
1078
1143
  end
1079
1144
  result = Addressable::URI.normalize_component(
1080
1145
  result,
1081
- CharacterClasses::HOST)
1146
+ NormalizeCharacterClasses::HOST
1147
+ )
1082
1148
  result
1083
1149
  else
1084
- EMPTY_STR
1150
+ EMPTY_STR.dup
1085
1151
  end
1086
1152
  end
1153
+ # All normalized values should be UTF-8
1154
+ if @normalized_host && !@normalized_host.empty?
1155
+ @normalized_host.force_encoding(Encoding::UTF_8)
1156
+ end
1157
+ @normalized_host
1087
1158
  end
1088
1159
 
1089
1160
  ##
@@ -1096,14 +1167,6 @@ module Addressable
1096
1167
  end
1097
1168
  @host = new_host ? new_host.to_str : nil
1098
1169
 
1099
- unreserved = CharacterClasses::UNRESERVED
1100
- sub_delims = CharacterClasses::SUB_DELIMS
1101
- if !@host.nil? && (@host =~ /[<>{}\/\?\#\@"[[:space:]]]/ ||
1102
- (@host[/^\[(.*)\]$/, 1] != nil && @host[/^\[(.*)\]$/, 1] !~
1103
- Regexp.new("^[#{unreserved}#{sub_delims}:]*$")))
1104
- raise InvalidURIError, "Invalid character in host: '#{@host.to_s}'"
1105
- end
1106
-
1107
1170
  # Reset dependent values
1108
1171
  remove_instance_variable(:@authority) if defined?(@authority)
1109
1172
  remove_instance_variable(:@normalized_host) if defined?(@normalized_host)
@@ -1144,6 +1207,33 @@ module Addressable
1144
1207
  self.host = v
1145
1208
  end
1146
1209
 
1210
+ ##
1211
+ # Returns the top-level domain for this host.
1212
+ #
1213
+ # @example
1214
+ # Addressable::URI.parse("http://www.example.co.uk").tld # => "co.uk"
1215
+ def tld
1216
+ PublicSuffix.parse(self.host, ignore_private: true).tld
1217
+ end
1218
+
1219
+ ##
1220
+ # Sets the top-level domain for this URI.
1221
+ #
1222
+ # @param [String, #to_str] new_tld The new top-level domain.
1223
+ def tld=(new_tld)
1224
+ replaced_tld = host.sub(/#{tld}\z/, new_tld)
1225
+ self.host = PublicSuffix::Domain.new(replaced_tld).to_s
1226
+ end
1227
+
1228
+ ##
1229
+ # Returns the public suffix domain for this host.
1230
+ #
1231
+ # @example
1232
+ # Addressable::URI.parse("http://www.example.co.uk").domain # => "example.co.uk"
1233
+ def domain
1234
+ PublicSuffix.domain(self.host, ignore_private: true)
1235
+ end
1236
+
1147
1237
  ##
1148
1238
  # The authority component for this URI.
1149
1239
  # Combines the user, password, host, and port components.
@@ -1151,7 +1241,7 @@ module Addressable
1151
1241
  # @return [String] The authority component.
1152
1242
  def authority
1153
1243
  self.host && @authority ||= begin
1154
- authority = ""
1244
+ authority = String.new
1155
1245
  if self.userinfo != nil
1156
1246
  authority << "#{self.userinfo}@"
1157
1247
  end
@@ -1170,7 +1260,7 @@ module Addressable
1170
1260
  def normalized_authority
1171
1261
  return nil unless self.authority
1172
1262
  @normalized_authority ||= begin
1173
- authority = ""
1263
+ authority = String.new
1174
1264
  if self.normalized_userinfo != nil
1175
1265
  authority << "#{self.normalized_userinfo}@"
1176
1266
  end
@@ -1180,6 +1270,11 @@ module Addressable
1180
1270
  end
1181
1271
  authority
1182
1272
  end
1273
+ # All normalized values should be UTF-8
1274
+ if @normalized_authority
1275
+ @normalized_authority.force_encoding(Encoding::UTF_8)
1276
+ end
1277
+ @normalized_authority
1183
1278
  end
1184
1279
 
1185
1280
  ##
@@ -1197,9 +1292,9 @@ module Addressable
1197
1292
  new_user = new_userinfo.strip[/^([^:]*):?/, 1]
1198
1293
  new_password = new_userinfo.strip[/:(.*)$/, 1]
1199
1294
  end
1200
- new_host = new_authority.gsub(
1295
+ new_host = new_authority.sub(
1201
1296
  /^([^\[\]]*)@/, EMPTY_STR
1202
- ).gsub(
1297
+ ).sub(
1203
1298
  /:([^:@\[\]]*?)$/, EMPTY_STR
1204
1299
  )
1205
1300
  new_port =
@@ -1383,7 +1478,7 @@ module Addressable
1383
1478
  # @return [String] The components that identify a site.
1384
1479
  def site
1385
1480
  (self.scheme || self.authority) && @site ||= begin
1386
- site_string = ""
1481
+ site_string = "".dup
1387
1482
  site_string << "#{self.scheme}:" if self.scheme != nil
1388
1483
  site_string << "//#{self.authority}" if self.authority != nil
1389
1484
  site_string
@@ -1402,7 +1497,7 @@ module Addressable
1402
1497
  def normalized_site
1403
1498
  return nil unless self.site
1404
1499
  @normalized_site ||= begin
1405
- site_string = ""
1500
+ site_string = "".dup
1406
1501
  if self.normalized_scheme != nil
1407
1502
  site_string << "#{self.normalized_scheme}:"
1408
1503
  end
@@ -1411,6 +1506,9 @@ module Addressable
1411
1506
  end
1412
1507
  site_string
1413
1508
  end
1509
+ # All normalized values should be UTF-8
1510
+ @normalized_site.force_encoding(Encoding::UTF_8) if @normalized_site
1511
+ @normalized_site
1414
1512
  end
1415
1513
 
1416
1514
  ##
@@ -1460,17 +1558,20 @@ module Addressable
1460
1558
  result = path.strip.split(SLASH, -1).map do |segment|
1461
1559
  Addressable::URI.normalize_component(
1462
1560
  segment,
1463
- Addressable::URI::CharacterClasses::PCHAR
1561
+ Addressable::URI::NormalizeCharacterClasses::PCHAR
1464
1562
  )
1465
1563
  end.join(SLASH)
1466
1564
 
1467
1565
  result = URI.normalize_path(result)
1468
1566
  if result.empty? &&
1469
1567
  ["http", "https", "ftp", "tftp"].include?(self.normalized_scheme)
1470
- result = SLASH
1568
+ result = SLASH.dup
1471
1569
  end
1472
1570
  result
1473
1571
  end
1572
+ # All normalized values should be UTF-8
1573
+ @normalized_path.force_encoding(Encoding::UTF_8) if @normalized_path
1574
+ @normalized_path
1474
1575
  end
1475
1576
 
1476
1577
  ##
@@ -1489,6 +1590,9 @@ module Addressable
1489
1590
  # Reset dependent values
1490
1591
  remove_instance_variable(:@normalized_path) if defined?(@normalized_path)
1491
1592
  remove_composite_values
1593
+
1594
+ # Ensure we haven't created an invalid URI
1595
+ validate()
1492
1596
  end
1493
1597
 
1494
1598
  ##
@@ -1497,7 +1601,7 @@ module Addressable
1497
1601
  # @return [String] The path's basename.
1498
1602
  def basename
1499
1603
  # Path cannot be nil
1500
- return File.basename(self.path).gsub(/;[^\/]*$/, EMPTY_STR)
1604
+ return File.basename(self.path).sub(/;[^\/]*$/, EMPTY_STR)
1501
1605
  end
1502
1606
 
1503
1607
  ##
@@ -1529,13 +1633,21 @@ module Addressable
1529
1633
  modified_query_class = Addressable::URI::CharacterClasses::QUERY.dup
1530
1634
  # Make sure possible key-value pair delimiters are escaped.
1531
1635
  modified_query_class.sub!("\\&", "").sub!("\\;", "")
1532
- pairs = (self.query || "").split("&", -1)
1636
+ pairs = (query || "").split("&", -1)
1637
+ pairs.delete_if(&:empty?).uniq! if flags.include?(:compacted)
1533
1638
  pairs.sort! if flags.include?(:sorted)
1534
1639
  component = pairs.map do |pair|
1535
- Addressable::URI.normalize_component(pair, modified_query_class, "+")
1640
+ Addressable::URI.normalize_component(
1641
+ pair,
1642
+ Addressable::URI::NormalizeCharacterClasses::QUERY,
1643
+ "+"
1644
+ )
1536
1645
  end.join("&")
1537
1646
  component == "" ? nil : component
1538
1647
  end
1648
+ # All normalized values should be UTF-8
1649
+ @normalized_query.force_encoding(Encoding::UTF_8) if @normalized_query
1650
+ @normalized_query
1539
1651
  end
1540
1652
 
1541
1653
  ##
@@ -1588,11 +1700,13 @@ module Addressable
1588
1700
  # so it's best to make all changes in-place.
1589
1701
  pair[0] = URI.unencode_component(pair[0])
1590
1702
  if pair[1].respond_to?(:to_str)
1703
+ value = pair[1].to_str
1591
1704
  # I loathe the fact that I have to do this. Stupid HTML 4.01.
1592
1705
  # Treating '+' as a space was just an unbelievably bad idea.
1593
1706
  # There was nothing wrong with '%20'!
1594
1707
  # If it ain't broke, don't fix it!
1595
- pair[1] = URI.unencode_component(pair[1].to_str.gsub(/\+/, " "))
1708
+ value = value.tr("+", " ") if ["http", "https", nil].include?(scheme)
1709
+ pair[1] = URI.unencode_component(value)
1596
1710
  end
1597
1711
  if return_type == Hash
1598
1712
  accu[pair[0]] = pair[1]
@@ -1644,7 +1758,7 @@ module Addressable
1644
1758
  end
1645
1759
 
1646
1760
  # new_query_values have form [['key1', 'value1'], ['key2', 'value2']]
1647
- buffer = ""
1761
+ buffer = "".dup
1648
1762
  new_query_values.each do |key, value|
1649
1763
  encoded_key = URI.encode_component(
1650
1764
  key, CharacterClasses::UNRESERVED
@@ -1674,7 +1788,7 @@ module Addressable
1674
1788
  #
1675
1789
  # @return [String] The request URI required for an HTTP request.
1676
1790
  def request_uri
1677
- return nil if self.absolute? && self.scheme !~ /^https?$/
1791
+ return nil if self.absolute? && self.scheme !~ /^https?$/i
1678
1792
  return (
1679
1793
  (!self.path.empty? ? self.path : SLASH) +
1680
1794
  (self.query ? "?#{self.query}" : EMPTY_STR)
@@ -1689,12 +1803,12 @@ module Addressable
1689
1803
  if !new_request_uri.respond_to?(:to_str)
1690
1804
  raise TypeError, "Can't convert #{new_request_uri.class} into String."
1691
1805
  end
1692
- if self.absolute? && self.scheme !~ /^https?$/
1806
+ if self.absolute? && self.scheme !~ /^https?$/i
1693
1807
  raise InvalidURIError,
1694
1808
  "Cannot set an HTTP request URI for a non-HTTP URI."
1695
1809
  end
1696
1810
  new_request_uri = new_request_uri.to_str
1697
- path_component = new_request_uri[/^([^\?]*)\?(?:.*)$/, 1]
1811
+ path_component = new_request_uri[/^([^\?]*)\??(?:.*)$/, 1]
1698
1812
  query_component = new_request_uri[/^(?:[^\?]*)\?(.*)$/, 1]
1699
1813
  path_component = path_component.to_s
1700
1814
  path_component = (!path_component.empty? ? path_component : SLASH)
@@ -1723,10 +1837,15 @@ module Addressable
1723
1837
  @normalized_fragment ||= begin
1724
1838
  component = Addressable::URI.normalize_component(
1725
1839
  self.fragment,
1726
- Addressable::URI::CharacterClasses::FRAGMENT
1840
+ Addressable::URI::NormalizeCharacterClasses::FRAGMENT
1727
1841
  )
1728
1842
  component == "" ? nil : component
1729
1843
  end
1844
+ # All normalized values should be UTF-8
1845
+ if @normalized_fragment
1846
+ @normalized_fragment.force_encoding(Encoding::UTF_8)
1847
+ end
1848
+ @normalized_fragment
1730
1849
  end
1731
1850
 
1732
1851
  ##
@@ -1844,8 +1963,8 @@ module Addressable
1844
1963
  # Section 5.2.3 of RFC 3986
1845
1964
  #
1846
1965
  # Removes the right-most path segment from the base path.
1847
- if base_path =~ /\//
1848
- base_path.gsub!(/\/[^\/]+$/, SLASH)
1966
+ if base_path.include?(SLASH)
1967
+ base_path.sub!(/\/[^\/]+$/, SLASH)
1849
1968
  else
1850
1969
  base_path = EMPTY_STR
1851
1970
  end
@@ -2246,15 +2365,13 @@ module Addressable
2246
2365
  "Cannot assemble URI string with ambiguous path: '#{self.path}'"
2247
2366
  end
2248
2367
  @uri_string ||= begin
2249
- uri_string = ""
2368
+ uri_string = String.new
2250
2369
  uri_string << "#{self.scheme}:" if self.scheme != nil
2251
2370
  uri_string << "//#{self.authority}" if self.authority != nil
2252
2371
  uri_string << self.path.to_s
2253
2372
  uri_string << "?#{self.query}" if self.query != nil
2254
2373
  uri_string << "##{self.fragment}" if self.fragment != nil
2255
- if uri_string.respond_to?(:force_encoding)
2256
- uri_string.force_encoding(Encoding::UTF_8)
2257
- end
2374
+ uri_string.force_encoding(Encoding::UTF_8)
2258
2375
  uri_string
2259
2376
  end
2260
2377
  end
@@ -2296,10 +2413,10 @@ module Addressable
2296
2413
  #
2297
2414
  # @param [Proc] block
2298
2415
  # A set of operations to perform on a given URI.
2299
- def defer_validation(&block)
2300
- raise LocalJumpError, "No block given." unless block
2416
+ def defer_validation
2417
+ raise LocalJumpError, "No block given." unless block_given?
2301
2418
  @validation_deferred = true
2302
- block.call()
2419
+ yield
2303
2420
  @validation_deferred = false
2304
2421
  validate
2305
2422
  return nil
@@ -2371,6 +2488,19 @@ module Addressable
2371
2488
  raise InvalidURIError,
2372
2489
  "Cannot have a relative path with an authority set: '#{self.to_s}'"
2373
2490
  end
2491
+ if self.path != nil && !self.path.empty? &&
2492
+ self.path[0..1] == SLASH + SLASH && self.authority == nil
2493
+ raise InvalidURIError,
2494
+ "Cannot have a path with two leading slashes " +
2495
+ "without an authority set: '#{self.to_s}'"
2496
+ end
2497
+ unreserved = CharacterClasses::UNRESERVED
2498
+ sub_delims = CharacterClasses::SUB_DELIMS
2499
+ if !self.host.nil? && (self.host =~ /[<>{}\/\\\?\#\@"[[:space:]]]/ ||
2500
+ (self.host[/^\[(.*)\]$/, 1] != nil && self.host[/^\[(.*)\]$/, 1] !~
2501
+ Regexp.new("^[#{unreserved}#{sub_delims}:]*$")))
2502
+ raise InvalidURIError, "Invalid character in host: '#{self.host.to_s}'"
2503
+ end
2374
2504
  return nil
2375
2505
  end
2376
2506
 
@@ -2384,7 +2514,9 @@ module Addressable
2384
2514
  def replace_self(uri)
2385
2515
  # Reset dependent values
2386
2516
  instance_variables.each do |var|
2387
- remove_instance_variable(var) if instance_variable_defined?(var)
2517
+ if instance_variable_defined?(var) && var != :@validation_deferred
2518
+ remove_instance_variable(var)
2519
+ end
2388
2520
  end
2389
2521
 
2390
2522
  @scheme = uri.scheme