addressable 2.4.0 → 2.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # encoding:utf-8
2
4
  #--
3
- # Copyright (C) 2006-2015 Bob Aman
5
+ # Copyright (C) Bob Aman
4
6
  #
5
7
  # Licensed under the Apache License, Version 2.0 (the "License");
6
8
  # you may not use this file except in compliance with the License.
@@ -18,6 +20,7 @@
18
20
 
19
21
  require "addressable/version"
20
22
  require "addressable/idna"
23
+ require "public_suffix"
21
24
 
22
25
  ##
23
26
  # Addressable is a library for processing links and URIs.
@@ -44,13 +47,22 @@ module Addressable
44
47
  UNRESERVED = ALPHA + DIGIT + "\\-\\.\\_\\~"
45
48
  PCHAR = UNRESERVED + SUB_DELIMS + "\\:\\@"
46
49
  SCHEME = ALPHA + DIGIT + "\\-\\+\\."
47
- HOST = ALPHA + DIGIT + "\\-\\.\\[\\:\\]"
48
- AUTHORITY = PCHAR
50
+ HOST = UNRESERVED + SUB_DELIMS + "\\[\\:\\]"
51
+ AUTHORITY = PCHAR + "\\[\\:\\]"
49
52
  PATH = PCHAR + "\\/"
50
53
  QUERY = PCHAR + "\\/\\?"
51
54
  FRAGMENT = PCHAR + "\\/\\?"
52
55
  end
53
56
 
57
+ module NormalizeCharacterClasses
58
+ HOST = /[^#{CharacterClasses::HOST}]/
59
+ UNRESERVED = /[^#{CharacterClasses::UNRESERVED}]/
60
+ PCHAR = /[^#{CharacterClasses::PCHAR}]/
61
+ SCHEME = /[^#{CharacterClasses::SCHEME}]/
62
+ FRAGMENT = /[^#{CharacterClasses::FRAGMENT}]/
63
+ QUERY = %r{[^a-zA-Z0-9\-\.\_\~\!\$\'\(\)\*\+\,\=\:\@\/\?%]|%(?!2B|2b)}
64
+ end
65
+
54
66
  SLASH = '/'
55
67
  EMPTY_STR = ''
56
68
 
@@ -70,7 +82,7 @@ module Addressable
70
82
  "wais" => 210,
71
83
  "ldap" => 389,
72
84
  "prospero" => 1525
73
- }
85
+ }.freeze
74
86
 
75
87
  ##
76
88
  # Returns a URI object based on the parsed string.
@@ -121,9 +133,9 @@ module Addressable
121
133
  user = userinfo.strip[/^([^:]*):?/, 1]
122
134
  password = userinfo.strip[/:(.*)$/, 1]
123
135
  end
124
- host = authority.gsub(
136
+ host = authority.sub(
125
137
  /^([^\[\]]*)@/, EMPTY_STR
126
- ).gsub(
138
+ ).sub(
127
139
  /:([^:@\[\]]*?)$/, EMPTY_STR
128
140
  )
129
141
  port = authority[/:([^:@\[\]]*?)$/, 1]
@@ -176,33 +188,50 @@ module Addressable
176
188
  raise TypeError, "Can't convert #{uri.class} into String."
177
189
  end
178
190
  # Otherwise, convert to a String
179
- uri = uri.to_str.dup
191
+ uri = uri.to_str.dup.strip
180
192
  hints = {
181
193
  :scheme => "http"
182
194
  }.merge(hints)
183
195
  case uri
184
- when /^http:\/+/
185
- uri.gsub!(/^http:\/+/, "http://")
186
- when /^https:\/+/
187
- uri.gsub!(/^https:\/+/, "https://")
188
- when /^feed:\/+http:\/+/
189
- uri.gsub!(/^feed:\/+http:\/+/, "feed:http://")
190
- when /^feed:\/+/
191
- uri.gsub!(/^feed:\/+/, "feed://")
192
- when /^file:\/+/
193
- uri.gsub!(/^file:\/+/, "file:///")
196
+ when /^http:\//i
197
+ uri.sub!(/^http:\/+/i, "http://")
198
+ when /^https:\//i
199
+ uri.sub!(/^https:\/+/i, "https://")
200
+ when /^feed:\/+http:\//i
201
+ uri.sub!(/^feed:\/+http:\/+/i, "feed:http://")
202
+ when /^feed:\//i
203
+ uri.sub!(/^feed:\/+/i, "feed://")
204
+ when %r[^file:/{4}]i
205
+ uri.sub!(%r[^file:/+]i, "file:////")
206
+ when %r[^file://localhost/]i
207
+ uri.sub!(%r[^file://localhost/+]i, "file:///")
208
+ when %r[^file:/+]i
209
+ uri.sub!(%r[^file:/+]i, "file:///")
194
210
  when /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/
195
- uri.gsub!(/^/, hints[:scheme] + "://")
211
+ uri.sub!(/^/, hints[:scheme] + "://")
212
+ when /\A\d+\..*:\d+\z/
213
+ uri = "#{hints[:scheme]}://#{uri}"
214
+ end
215
+ match = uri.match(URIREGEX)
216
+ fragments = match.captures
217
+ authority = fragments[3]
218
+ if authority && authority.length > 0
219
+ new_authority = authority.tr("\\", "/").gsub(" ", "%20")
220
+ # NOTE: We want offset 4, not 3!
221
+ offset = match.offset(4)
222
+ uri = uri.dup
223
+ uri[offset[0]...offset[1]] = new_authority
196
224
  end
197
225
  parsed = self.parse(uri)
198
226
  if parsed.scheme =~ /^[^\/?#\.]+\.[^\/?#]+$/
199
227
  parsed = self.parse(hints[:scheme] + "://" + uri)
200
228
  end
201
229
  if parsed.path.include?(".")
202
- new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1]
203
- if new_host
230
+ if parsed.path[/\b@\b/]
231
+ parsed.scheme = "mailto" unless parsed.scheme
232
+ elsif new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1]
204
233
  parsed.defer_validation do
205
- new_path = parsed.path.gsub(
234
+ new_path = parsed.path.sub(
206
235
  Regexp.new("^" + Regexp.escape(new_host)), EMPTY_STR)
207
236
  parsed.host = new_host
208
237
  parsed.path = new_path
@@ -253,24 +282,24 @@ module Addressable
253
282
  # Otherwise, convert to a String
254
283
  path = path.to_str.strip
255
284
 
256
- path.gsub!(/^file:\/?\/?/, EMPTY_STR) if path =~ /^file:\/?\/?/
285
+ path.sub!(/^file:\/?\/?/, EMPTY_STR) if path =~ /^file:\/?\/?/
257
286
  path = SLASH + path if path =~ /^([a-zA-Z])[\|:]/
258
287
  uri = self.parse(path)
259
288
 
260
289
  if uri.scheme == nil
261
290
  # Adjust windows-style uris
262
- uri.path.gsub!(/^\/?([a-zA-Z])[\|:][\\\/]/) do
291
+ uri.path.sub!(/^\/?([a-zA-Z])[\|:][\\\/]/) do
263
292
  "/#{$1.downcase}:/"
264
293
  end
265
- uri.path.gsub!(/\\/, SLASH)
294
+ uri.path.tr!("\\", SLASH)
266
295
  if File.exist?(uri.path) &&
267
296
  File.stat(uri.path).directory?
268
- uri.path.gsub!(/\/$/, EMPTY_STR)
297
+ uri.path.chomp!(SLASH)
269
298
  uri.path = uri.path + '/'
270
299
  end
271
300
 
272
301
  # If the path is absolute, set the scheme and host.
273
- if uri.path =~ /^\//
302
+ if uri.path.start_with?(SLASH)
274
303
  uri.scheme = "file"
275
304
  uri.host = EMPTY_STR
276
305
  end
@@ -307,6 +336,21 @@ module Addressable
307
336
  return result
308
337
  end
309
338
 
339
+ ##
340
+ # Tables used to optimize encoding operations in `self.encode_component`
341
+ # and `self.normalize_component`
342
+ SEQUENCE_ENCODING_TABLE = Hash.new do |hash, sequence|
343
+ hash[sequence] = sequence.unpack("C*").map do |c|
344
+ format("%02x", c)
345
+ end.join
346
+ end
347
+
348
+ SEQUENCE_UPCASED_PERCENT_ENCODING_TABLE = Hash.new do |hash, sequence|
349
+ hash[sequence] = sequence.unpack("C*").map do |c|
350
+ format("%%%02X", c)
351
+ end.join
352
+ end
353
+
310
354
  ##
311
355
  # Percent encodes a URI component.
312
356
  #
@@ -367,26 +411,26 @@ module Addressable
367
411
  if character_class.kind_of?(String)
368
412
  character_class = /[^#{character_class}]/
369
413
  end
370
- if component.respond_to?(:force_encoding)
371
- # We can't perform regexps on invalid UTF sequences, but
372
- # here we need to, so switch to ASCII.
373
- component = component.dup
374
- component.force_encoding(Encoding::ASCII_8BIT)
375
- end
414
+ # We can't perform regexps on invalid UTF sequences, but
415
+ # here we need to, so switch to ASCII.
416
+ component = component.dup
417
+ component.force_encoding(Encoding::ASCII_8BIT)
376
418
  # Avoiding gsub! because there are edge cases with frozen strings
377
419
  component = component.gsub(character_class) do |sequence|
378
- (sequence.unpack('C*').map { |c| "%" + ("%02x" % c).upcase }).join
420
+ SEQUENCE_UPCASED_PERCENT_ENCODING_TABLE[sequence]
379
421
  end
380
422
  if upcase_encoded.length > 0
381
- component = component.gsub(/%(#{upcase_encoded.chars.map do |char|
382
- char.unpack('C*').map { |c| '%02x' % c }.join
383
- end.join('|')})/i) { |s| s.upcase }
423
+ upcase_encoded_chars = upcase_encoded.chars.map do |char|
424
+ SEQUENCE_ENCODING_TABLE[char]
425
+ end
426
+ component = component.gsub(/%(#{upcase_encoded_chars.join('|')})/,
427
+ &:upcase)
384
428
  end
385
429
  return component
386
430
  end
387
431
 
388
432
  class << self
389
- alias_method :encode_component, :encode_component
433
+ alias_method :escape_component, :encode_component
390
434
  end
391
435
 
392
436
  ##
@@ -427,14 +471,18 @@ module Addressable
427
471
  end
428
472
  uri = uri.dup
429
473
  # Seriously, only use UTF-8. I'm really not kidding!
430
- uri.force_encoding("utf-8") if uri.respond_to?(:force_encoding)
431
- leave_encoded.force_encoding("utf-8") if leave_encoded.respond_to?(:force_encoding)
474
+ uri.force_encoding("utf-8")
475
+
476
+ unless leave_encoded.empty?
477
+ leave_encoded = leave_encoded.dup.force_encoding("utf-8")
478
+ end
479
+
432
480
  result = uri.gsub(/%[0-9a-f]{2}/iu) do |sequence|
433
481
  c = sequence[1..3].to_i(16).chr
434
- c.force_encoding("utf-8") if c.respond_to?(:force_encoding)
482
+ c.force_encoding("utf-8")
435
483
  leave_encoded.include?(c) ? sequence : c
436
484
  end
437
- result.force_encoding("utf-8") if result.respond_to?(:force_encoding)
485
+ result.force_encoding("utf-8")
438
486
  if return_type == String
439
487
  return result
440
488
  elsif return_type == ::Addressable::URI
@@ -514,19 +562,21 @@ module Addressable
514
562
  character_class = "#{character_class}%" unless character_class.include?('%')
515
563
 
516
564
  "|%(?!#{leave_encoded.chars.map do |char|
517
- seq = char.unpack('C*').map { |c| '%02x' % c }.join
565
+ seq = SEQUENCE_ENCODING_TABLE[char]
518
566
  [seq.upcase, seq.downcase]
519
567
  end.flatten.join('|')})"
520
568
  end
521
569
 
522
- character_class = /[^#{character_class}]#{leave_re}/
523
- end
524
- if component.respond_to?(:force_encoding)
525
- # We can't perform regexps on invalid UTF sequences, but
526
- # here we need to, so switch to ASCII.
527
- component = component.dup
528
- component.force_encoding(Encoding::ASCII_8BIT)
570
+ character_class = if leave_re
571
+ /[^#{character_class}]#{leave_re}/
572
+ else
573
+ /[^#{character_class}]/
574
+ end
529
575
  end
576
+ # We can't perform regexps on invalid UTF sequences, but
577
+ # here we need to, so switch to ASCII.
578
+ component = component.dup
579
+ component.force_encoding(Encoding::ASCII_8BIT)
530
580
  unencoded = self.unencode_component(component, String, leave_encoded)
531
581
  begin
532
582
  encoded = self.encode_component(
@@ -537,9 +587,7 @@ module Addressable
537
587
  rescue ArgumentError
538
588
  encoded = self.encode_component(unencoded)
539
589
  end
540
- if encoded.respond_to?(:force_encoding)
541
- encoded.force_encoding(Encoding::UTF_8)
542
- end
590
+ encoded.force_encoding(Encoding::UTF_8)
543
591
  return encoded
544
592
  end
545
593
 
@@ -843,14 +891,17 @@ module Addressable
843
891
  return nil unless self.scheme
844
892
  @normalized_scheme ||= begin
845
893
  if self.scheme =~ /^\s*ssh\+svn\s*$/i
846
- "svn+ssh"
894
+ "svn+ssh".dup
847
895
  else
848
896
  Addressable::URI.normalize_component(
849
897
  self.scheme.strip.downcase,
850
- Addressable::URI::CharacterClasses::SCHEME
898
+ Addressable::URI::NormalizeCharacterClasses::SCHEME
851
899
  )
852
900
  end
853
901
  end
902
+ # All normalized values should be UTF-8
903
+ @normalized_scheme.force_encoding(Encoding::UTF_8) if @normalized_scheme
904
+ @normalized_scheme
854
905
  end
855
906
 
856
907
  ##
@@ -864,7 +915,7 @@ module Addressable
864
915
  new_scheme = new_scheme.to_str
865
916
  end
866
917
  if new_scheme && new_scheme !~ /\A[a-z][a-z0-9\.\+\-]*\z/i
867
- raise InvalidURIError, "Invalid scheme format: #{new_scheme}"
918
+ raise InvalidURIError, "Invalid scheme format: '#{new_scheme}'"
868
919
  end
869
920
  @scheme = new_scheme
870
921
  @scheme = nil if @scheme.to_s.strip.empty?
@@ -899,10 +950,13 @@ module Addressable
899
950
  else
900
951
  Addressable::URI.normalize_component(
901
952
  self.user.strip,
902
- Addressable::URI::CharacterClasses::UNRESERVED
953
+ Addressable::URI::NormalizeCharacterClasses::UNRESERVED
903
954
  )
904
955
  end
905
956
  end
957
+ # All normalized values should be UTF-8
958
+ @normalized_user.force_encoding(Encoding::UTF_8) if @normalized_user
959
+ @normalized_user
906
960
  end
907
961
 
908
962
  ##
@@ -953,10 +1007,15 @@ module Addressable
953
1007
  else
954
1008
  Addressable::URI.normalize_component(
955
1009
  self.password.strip,
956
- Addressable::URI::CharacterClasses::UNRESERVED
1010
+ Addressable::URI::NormalizeCharacterClasses::UNRESERVED
957
1011
  )
958
1012
  end
959
1013
  end
1014
+ # All normalized values should be UTF-8
1015
+ if @normalized_password
1016
+ @normalized_password.force_encoding(Encoding::UTF_8)
1017
+ end
1018
+ @normalized_password
960
1019
  end
961
1020
 
962
1021
  ##
@@ -1017,11 +1076,16 @@ module Addressable
1017
1076
  if !current_user && !current_password
1018
1077
  nil
1019
1078
  elsif current_user && current_password
1020
- "#{current_user}:#{current_password}"
1079
+ "#{current_user}:#{current_password}".dup
1021
1080
  elsif current_user && !current_password
1022
- "#{current_user}"
1081
+ "#{current_user}".dup
1023
1082
  end
1024
1083
  end
1084
+ # All normalized values should be UTF-8
1085
+ if @normalized_userinfo
1086
+ @normalized_userinfo.force_encoding(Encoding::UTF_8)
1087
+ end
1088
+ @normalized_userinfo
1025
1089
  end
1026
1090
 
1027
1091
  ##
@@ -1067,6 +1131,7 @@ module Addressable
1067
1131
  # @return [String] The host component, normalized.
1068
1132
  def normalized_host
1069
1133
  return nil unless self.host
1134
+
1070
1135
  @normalized_host ||= begin
1071
1136
  if !self.host.strip.empty?
1072
1137
  result = ::Addressable::IDNA.to_ascii(
@@ -1078,12 +1143,18 @@ module Addressable
1078
1143
  end
1079
1144
  result = Addressable::URI.normalize_component(
1080
1145
  result,
1081
- CharacterClasses::HOST)
1146
+ NormalizeCharacterClasses::HOST
1147
+ )
1082
1148
  result
1083
1149
  else
1084
- EMPTY_STR
1150
+ EMPTY_STR.dup
1085
1151
  end
1086
1152
  end
1153
+ # All normalized values should be UTF-8
1154
+ if @normalized_host && !@normalized_host.empty?
1155
+ @normalized_host.force_encoding(Encoding::UTF_8)
1156
+ end
1157
+ @normalized_host
1087
1158
  end
1088
1159
 
1089
1160
  ##
@@ -1096,14 +1167,6 @@ module Addressable
1096
1167
  end
1097
1168
  @host = new_host ? new_host.to_str : nil
1098
1169
 
1099
- unreserved = CharacterClasses::UNRESERVED
1100
- sub_delims = CharacterClasses::SUB_DELIMS
1101
- if !@host.nil? && (@host =~ /[<>{}\/\?\#\@"[[:space:]]]/ ||
1102
- (@host[/^\[(.*)\]$/, 1] != nil && @host[/^\[(.*)\]$/, 1] !~
1103
- Regexp.new("^[#{unreserved}#{sub_delims}:]*$")))
1104
- raise InvalidURIError, "Invalid character in host: '#{@host.to_s}'"
1105
- end
1106
-
1107
1170
  # Reset dependent values
1108
1171
  remove_instance_variable(:@authority) if defined?(@authority)
1109
1172
  remove_instance_variable(:@normalized_host) if defined?(@normalized_host)
@@ -1144,6 +1207,33 @@ module Addressable
1144
1207
  self.host = v
1145
1208
  end
1146
1209
 
1210
+ ##
1211
+ # Returns the top-level domain for this host.
1212
+ #
1213
+ # @example
1214
+ # Addressable::URI.parse("http://www.example.co.uk").tld # => "co.uk"
1215
+ def tld
1216
+ PublicSuffix.parse(self.host, ignore_private: true).tld
1217
+ end
1218
+
1219
+ ##
1220
+ # Sets the top-level domain for this URI.
1221
+ #
1222
+ # @param [String, #to_str] new_tld The new top-level domain.
1223
+ def tld=(new_tld)
1224
+ replaced_tld = host.sub(/#{tld}\z/, new_tld)
1225
+ self.host = PublicSuffix::Domain.new(replaced_tld).to_s
1226
+ end
1227
+
1228
+ ##
1229
+ # Returns the public suffix domain for this host.
1230
+ #
1231
+ # @example
1232
+ # Addressable::URI.parse("http://www.example.co.uk").domain # => "example.co.uk"
1233
+ def domain
1234
+ PublicSuffix.domain(self.host, ignore_private: true)
1235
+ end
1236
+
1147
1237
  ##
1148
1238
  # The authority component for this URI.
1149
1239
  # Combines the user, password, host, and port components.
@@ -1151,7 +1241,7 @@ module Addressable
1151
1241
  # @return [String] The authority component.
1152
1242
  def authority
1153
1243
  self.host && @authority ||= begin
1154
- authority = ""
1244
+ authority = String.new
1155
1245
  if self.userinfo != nil
1156
1246
  authority << "#{self.userinfo}@"
1157
1247
  end
@@ -1170,7 +1260,7 @@ module Addressable
1170
1260
  def normalized_authority
1171
1261
  return nil unless self.authority
1172
1262
  @normalized_authority ||= begin
1173
- authority = ""
1263
+ authority = String.new
1174
1264
  if self.normalized_userinfo != nil
1175
1265
  authority << "#{self.normalized_userinfo}@"
1176
1266
  end
@@ -1180,6 +1270,11 @@ module Addressable
1180
1270
  end
1181
1271
  authority
1182
1272
  end
1273
+ # All normalized values should be UTF-8
1274
+ if @normalized_authority
1275
+ @normalized_authority.force_encoding(Encoding::UTF_8)
1276
+ end
1277
+ @normalized_authority
1183
1278
  end
1184
1279
 
1185
1280
  ##
@@ -1197,9 +1292,9 @@ module Addressable
1197
1292
  new_user = new_userinfo.strip[/^([^:]*):?/, 1]
1198
1293
  new_password = new_userinfo.strip[/:(.*)$/, 1]
1199
1294
  end
1200
- new_host = new_authority.gsub(
1295
+ new_host = new_authority.sub(
1201
1296
  /^([^\[\]]*)@/, EMPTY_STR
1202
- ).gsub(
1297
+ ).sub(
1203
1298
  /:([^:@\[\]]*?)$/, EMPTY_STR
1204
1299
  )
1205
1300
  new_port =
@@ -1383,7 +1478,7 @@ module Addressable
1383
1478
  # @return [String] The components that identify a site.
1384
1479
  def site
1385
1480
  (self.scheme || self.authority) && @site ||= begin
1386
- site_string = ""
1481
+ site_string = "".dup
1387
1482
  site_string << "#{self.scheme}:" if self.scheme != nil
1388
1483
  site_string << "//#{self.authority}" if self.authority != nil
1389
1484
  site_string
@@ -1402,7 +1497,7 @@ module Addressable
1402
1497
  def normalized_site
1403
1498
  return nil unless self.site
1404
1499
  @normalized_site ||= begin
1405
- site_string = ""
1500
+ site_string = "".dup
1406
1501
  if self.normalized_scheme != nil
1407
1502
  site_string << "#{self.normalized_scheme}:"
1408
1503
  end
@@ -1411,6 +1506,9 @@ module Addressable
1411
1506
  end
1412
1507
  site_string
1413
1508
  end
1509
+ # All normalized values should be UTF-8
1510
+ @normalized_site.force_encoding(Encoding::UTF_8) if @normalized_site
1511
+ @normalized_site
1414
1512
  end
1415
1513
 
1416
1514
  ##
@@ -1460,17 +1558,20 @@ module Addressable
1460
1558
  result = path.strip.split(SLASH, -1).map do |segment|
1461
1559
  Addressable::URI.normalize_component(
1462
1560
  segment,
1463
- Addressable::URI::CharacterClasses::PCHAR
1561
+ Addressable::URI::NormalizeCharacterClasses::PCHAR
1464
1562
  )
1465
1563
  end.join(SLASH)
1466
1564
 
1467
1565
  result = URI.normalize_path(result)
1468
1566
  if result.empty? &&
1469
1567
  ["http", "https", "ftp", "tftp"].include?(self.normalized_scheme)
1470
- result = SLASH
1568
+ result = SLASH.dup
1471
1569
  end
1472
1570
  result
1473
1571
  end
1572
+ # All normalized values should be UTF-8
1573
+ @normalized_path.force_encoding(Encoding::UTF_8) if @normalized_path
1574
+ @normalized_path
1474
1575
  end
1475
1576
 
1476
1577
  ##
@@ -1489,6 +1590,9 @@ module Addressable
1489
1590
  # Reset dependent values
1490
1591
  remove_instance_variable(:@normalized_path) if defined?(@normalized_path)
1491
1592
  remove_composite_values
1593
+
1594
+ # Ensure we haven't created an invalid URI
1595
+ validate()
1492
1596
  end
1493
1597
 
1494
1598
  ##
@@ -1497,7 +1601,7 @@ module Addressable
1497
1601
  # @return [String] The path's basename.
1498
1602
  def basename
1499
1603
  # Path cannot be nil
1500
- return File.basename(self.path).gsub(/;[^\/]*$/, EMPTY_STR)
1604
+ return File.basename(self.path).sub(/;[^\/]*$/, EMPTY_STR)
1501
1605
  end
1502
1606
 
1503
1607
  ##
@@ -1529,13 +1633,21 @@ module Addressable
1529
1633
  modified_query_class = Addressable::URI::CharacterClasses::QUERY.dup
1530
1634
  # Make sure possible key-value pair delimiters are escaped.
1531
1635
  modified_query_class.sub!("\\&", "").sub!("\\;", "")
1532
- pairs = (self.query || "").split("&", -1)
1636
+ pairs = (query || "").split("&", -1)
1637
+ pairs.delete_if(&:empty?).uniq! if flags.include?(:compacted)
1533
1638
  pairs.sort! if flags.include?(:sorted)
1534
1639
  component = pairs.map do |pair|
1535
- Addressable::URI.normalize_component(pair, modified_query_class, "+")
1640
+ Addressable::URI.normalize_component(
1641
+ pair,
1642
+ Addressable::URI::NormalizeCharacterClasses::QUERY,
1643
+ "+"
1644
+ )
1536
1645
  end.join("&")
1537
1646
  component == "" ? nil : component
1538
1647
  end
1648
+ # All normalized values should be UTF-8
1649
+ @normalized_query.force_encoding(Encoding::UTF_8) if @normalized_query
1650
+ @normalized_query
1539
1651
  end
1540
1652
 
1541
1653
  ##
@@ -1588,11 +1700,13 @@ module Addressable
1588
1700
  # so it's best to make all changes in-place.
1589
1701
  pair[0] = URI.unencode_component(pair[0])
1590
1702
  if pair[1].respond_to?(:to_str)
1703
+ value = pair[1].to_str
1591
1704
  # I loathe the fact that I have to do this. Stupid HTML 4.01.
1592
1705
  # Treating '+' as a space was just an unbelievably bad idea.
1593
1706
  # There was nothing wrong with '%20'!
1594
1707
  # If it ain't broke, don't fix it!
1595
- pair[1] = URI.unencode_component(pair[1].to_str.gsub(/\+/, " "))
1708
+ value = value.tr("+", " ") if ["http", "https", nil].include?(scheme)
1709
+ pair[1] = URI.unencode_component(value)
1596
1710
  end
1597
1711
  if return_type == Hash
1598
1712
  accu[pair[0]] = pair[1]
@@ -1644,7 +1758,7 @@ module Addressable
1644
1758
  end
1645
1759
 
1646
1760
  # new_query_values have form [['key1', 'value1'], ['key2', 'value2']]
1647
- buffer = ""
1761
+ buffer = "".dup
1648
1762
  new_query_values.each do |key, value|
1649
1763
  encoded_key = URI.encode_component(
1650
1764
  key, CharacterClasses::UNRESERVED
@@ -1674,7 +1788,7 @@ module Addressable
1674
1788
  #
1675
1789
  # @return [String] The request URI required for an HTTP request.
1676
1790
  def request_uri
1677
- return nil if self.absolute? && self.scheme !~ /^https?$/
1791
+ return nil if self.absolute? && self.scheme !~ /^https?$/i
1678
1792
  return (
1679
1793
  (!self.path.empty? ? self.path : SLASH) +
1680
1794
  (self.query ? "?#{self.query}" : EMPTY_STR)
@@ -1689,12 +1803,12 @@ module Addressable
1689
1803
  if !new_request_uri.respond_to?(:to_str)
1690
1804
  raise TypeError, "Can't convert #{new_request_uri.class} into String."
1691
1805
  end
1692
- if self.absolute? && self.scheme !~ /^https?$/
1806
+ if self.absolute? && self.scheme !~ /^https?$/i
1693
1807
  raise InvalidURIError,
1694
1808
  "Cannot set an HTTP request URI for a non-HTTP URI."
1695
1809
  end
1696
1810
  new_request_uri = new_request_uri.to_str
1697
- path_component = new_request_uri[/^([^\?]*)\?(?:.*)$/, 1]
1811
+ path_component = new_request_uri[/^([^\?]*)\??(?:.*)$/, 1]
1698
1812
  query_component = new_request_uri[/^(?:[^\?]*)\?(.*)$/, 1]
1699
1813
  path_component = path_component.to_s
1700
1814
  path_component = (!path_component.empty? ? path_component : SLASH)
@@ -1723,10 +1837,15 @@ module Addressable
1723
1837
  @normalized_fragment ||= begin
1724
1838
  component = Addressable::URI.normalize_component(
1725
1839
  self.fragment,
1726
- Addressable::URI::CharacterClasses::FRAGMENT
1840
+ Addressable::URI::NormalizeCharacterClasses::FRAGMENT
1727
1841
  )
1728
1842
  component == "" ? nil : component
1729
1843
  end
1844
+ # All normalized values should be UTF-8
1845
+ if @normalized_fragment
1846
+ @normalized_fragment.force_encoding(Encoding::UTF_8)
1847
+ end
1848
+ @normalized_fragment
1730
1849
  end
1731
1850
 
1732
1851
  ##
@@ -1844,8 +1963,8 @@ module Addressable
1844
1963
  # Section 5.2.3 of RFC 3986
1845
1964
  #
1846
1965
  # Removes the right-most path segment from the base path.
1847
- if base_path =~ /\//
1848
- base_path.gsub!(/\/[^\/]+$/, SLASH)
1966
+ if base_path.include?(SLASH)
1967
+ base_path.sub!(/\/[^\/]+$/, SLASH)
1849
1968
  else
1850
1969
  base_path = EMPTY_STR
1851
1970
  end
@@ -2246,15 +2365,13 @@ module Addressable
2246
2365
  "Cannot assemble URI string with ambiguous path: '#{self.path}'"
2247
2366
  end
2248
2367
  @uri_string ||= begin
2249
- uri_string = ""
2368
+ uri_string = String.new
2250
2369
  uri_string << "#{self.scheme}:" if self.scheme != nil
2251
2370
  uri_string << "//#{self.authority}" if self.authority != nil
2252
2371
  uri_string << self.path.to_s
2253
2372
  uri_string << "?#{self.query}" if self.query != nil
2254
2373
  uri_string << "##{self.fragment}" if self.fragment != nil
2255
- if uri_string.respond_to?(:force_encoding)
2256
- uri_string.force_encoding(Encoding::UTF_8)
2257
- end
2374
+ uri_string.force_encoding(Encoding::UTF_8)
2258
2375
  uri_string
2259
2376
  end
2260
2377
  end
@@ -2296,10 +2413,10 @@ module Addressable
2296
2413
  #
2297
2414
  # @param [Proc] block
2298
2415
  # A set of operations to perform on a given URI.
2299
- def defer_validation(&block)
2300
- raise LocalJumpError, "No block given." unless block
2416
+ def defer_validation
2417
+ raise LocalJumpError, "No block given." unless block_given?
2301
2418
  @validation_deferred = true
2302
- block.call()
2419
+ yield
2303
2420
  @validation_deferred = false
2304
2421
  validate
2305
2422
  return nil
@@ -2371,6 +2488,19 @@ module Addressable
2371
2488
  raise InvalidURIError,
2372
2489
  "Cannot have a relative path with an authority set: '#{self.to_s}'"
2373
2490
  end
2491
+ if self.path != nil && !self.path.empty? &&
2492
+ self.path[0..1] == SLASH + SLASH && self.authority == nil
2493
+ raise InvalidURIError,
2494
+ "Cannot have a path with two leading slashes " +
2495
+ "without an authority set: '#{self.to_s}'"
2496
+ end
2497
+ unreserved = CharacterClasses::UNRESERVED
2498
+ sub_delims = CharacterClasses::SUB_DELIMS
2499
+ if !self.host.nil? && (self.host =~ /[<>{}\/\\\?\#\@"[[:space:]]]/ ||
2500
+ (self.host[/^\[(.*)\]$/, 1] != nil && self.host[/^\[(.*)\]$/, 1] !~
2501
+ Regexp.new("^[#{unreserved}#{sub_delims}:]*$")))
2502
+ raise InvalidURIError, "Invalid character in host: '#{self.host.to_s}'"
2503
+ end
2374
2504
  return nil
2375
2505
  end
2376
2506
 
@@ -2384,7 +2514,9 @@ module Addressable
2384
2514
  def replace_self(uri)
2385
2515
  # Reset dependent values
2386
2516
  instance_variables.each do |var|
2387
- remove_instance_variable(var) if instance_variable_defined?(var)
2517
+ if instance_variable_defined?(var) && var != :@validation_deferred
2518
+ remove_instance_variable(var)
2519
+ end
2388
2520
  end
2389
2521
 
2390
2522
  @scheme = uri.scheme