addressable 2.4.0 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +54 -0
- data/Gemfile +13 -12
- data/README.md +31 -15
- data/Rakefile +5 -3
- data/addressable.gemspec +23 -20
- data/lib/addressable/idna/native.rb +11 -5
- data/lib/addressable/idna/pure.rb +61 -55
- data/lib/addressable/idna.rb +3 -1
- data/lib/addressable/template.rb +64 -84
- data/lib/addressable/uri.rb +228 -96
- data/lib/addressable/version.rb +4 -2
- data/lib/addressable.rb +2 -0
- data/spec/addressable/idna_spec.rb +35 -3
- data/spec/addressable/net_http_compat_spec.rb +3 -1
- data/spec/addressable/security_spec.rb +3 -1
- data/spec/addressable/template_spec.rb +77 -3
- data/spec/addressable/uri_spec.rb +663 -203
- data/spec/spec_helper.rb +12 -0
- data/tasks/clobber.rake +2 -0
- data/tasks/gem.rake +9 -14
- data/tasks/git.rake +2 -0
- data/tasks/metrics.rake +2 -0
- data/tasks/profile.rake +72 -0
- data/tasks/rspec.rake +3 -1
- data/tasks/yard.rake +2 -0
- metadata +36 -11
- data/spec/addressable/rack_mount_compat_spec.rb +0 -104
data/lib/addressable/uri.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# encoding:utf-8
|
2
4
|
#--
|
3
|
-
# Copyright (C)
|
5
|
+
# Copyright (C) Bob Aman
|
4
6
|
#
|
5
7
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
6
8
|
# you may not use this file except in compliance with the License.
|
@@ -18,6 +20,7 @@
|
|
18
20
|
|
19
21
|
require "addressable/version"
|
20
22
|
require "addressable/idna"
|
23
|
+
require "public_suffix"
|
21
24
|
|
22
25
|
##
|
23
26
|
# Addressable is a library for processing links and URIs.
|
@@ -44,13 +47,22 @@ module Addressable
|
|
44
47
|
UNRESERVED = ALPHA + DIGIT + "\\-\\.\\_\\~"
|
45
48
|
PCHAR = UNRESERVED + SUB_DELIMS + "\\:\\@"
|
46
49
|
SCHEME = ALPHA + DIGIT + "\\-\\+\\."
|
47
|
-
HOST =
|
48
|
-
AUTHORITY = PCHAR
|
50
|
+
HOST = UNRESERVED + SUB_DELIMS + "\\[\\:\\]"
|
51
|
+
AUTHORITY = PCHAR + "\\[\\:\\]"
|
49
52
|
PATH = PCHAR + "\\/"
|
50
53
|
QUERY = PCHAR + "\\/\\?"
|
51
54
|
FRAGMENT = PCHAR + "\\/\\?"
|
52
55
|
end
|
53
56
|
|
57
|
+
module NormalizeCharacterClasses
|
58
|
+
HOST = /[^#{CharacterClasses::HOST}]/
|
59
|
+
UNRESERVED = /[^#{CharacterClasses::UNRESERVED}]/
|
60
|
+
PCHAR = /[^#{CharacterClasses::PCHAR}]/
|
61
|
+
SCHEME = /[^#{CharacterClasses::SCHEME}]/
|
62
|
+
FRAGMENT = /[^#{CharacterClasses::FRAGMENT}]/
|
63
|
+
QUERY = %r{[^a-zA-Z0-9\-\.\_\~\!\$\'\(\)\*\+\,\=\:\@\/\?%]|%(?!2B|2b)}
|
64
|
+
end
|
65
|
+
|
54
66
|
SLASH = '/'
|
55
67
|
EMPTY_STR = ''
|
56
68
|
|
@@ -70,7 +82,7 @@ module Addressable
|
|
70
82
|
"wais" => 210,
|
71
83
|
"ldap" => 389,
|
72
84
|
"prospero" => 1525
|
73
|
-
}
|
85
|
+
}.freeze
|
74
86
|
|
75
87
|
##
|
76
88
|
# Returns a URI object based on the parsed string.
|
@@ -121,9 +133,9 @@ module Addressable
|
|
121
133
|
user = userinfo.strip[/^([^:]*):?/, 1]
|
122
134
|
password = userinfo.strip[/:(.*)$/, 1]
|
123
135
|
end
|
124
|
-
host = authority.
|
136
|
+
host = authority.sub(
|
125
137
|
/^([^\[\]]*)@/, EMPTY_STR
|
126
|
-
).
|
138
|
+
).sub(
|
127
139
|
/:([^:@\[\]]*?)$/, EMPTY_STR
|
128
140
|
)
|
129
141
|
port = authority[/:([^:@\[\]]*?)$/, 1]
|
@@ -176,33 +188,50 @@ module Addressable
|
|
176
188
|
raise TypeError, "Can't convert #{uri.class} into String."
|
177
189
|
end
|
178
190
|
# Otherwise, convert to a String
|
179
|
-
uri = uri.to_str.dup
|
191
|
+
uri = uri.to_str.dup.strip
|
180
192
|
hints = {
|
181
193
|
:scheme => "http"
|
182
194
|
}.merge(hints)
|
183
195
|
case uri
|
184
|
-
when /^http
|
185
|
-
uri.
|
186
|
-
when /^https
|
187
|
-
uri.
|
188
|
-
when /^feed:\/+http
|
189
|
-
uri.
|
190
|
-
when /^feed
|
191
|
-
uri.
|
192
|
-
when
|
193
|
-
uri.
|
196
|
+
when /^http:\//i
|
197
|
+
uri.sub!(/^http:\/+/i, "http://")
|
198
|
+
when /^https:\//i
|
199
|
+
uri.sub!(/^https:\/+/i, "https://")
|
200
|
+
when /^feed:\/+http:\//i
|
201
|
+
uri.sub!(/^feed:\/+http:\/+/i, "feed:http://")
|
202
|
+
when /^feed:\//i
|
203
|
+
uri.sub!(/^feed:\/+/i, "feed://")
|
204
|
+
when %r[^file:/{4}]i
|
205
|
+
uri.sub!(%r[^file:/+]i, "file:////")
|
206
|
+
when %r[^file://localhost/]i
|
207
|
+
uri.sub!(%r[^file://localhost/+]i, "file:///")
|
208
|
+
when %r[^file:/+]i
|
209
|
+
uri.sub!(%r[^file:/+]i, "file:///")
|
194
210
|
when /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/
|
195
|
-
uri.
|
211
|
+
uri.sub!(/^/, hints[:scheme] + "://")
|
212
|
+
when /\A\d+\..*:\d+\z/
|
213
|
+
uri = "#{hints[:scheme]}://#{uri}"
|
214
|
+
end
|
215
|
+
match = uri.match(URIREGEX)
|
216
|
+
fragments = match.captures
|
217
|
+
authority = fragments[3]
|
218
|
+
if authority && authority.length > 0
|
219
|
+
new_authority = authority.tr("\\", "/").gsub(" ", "%20")
|
220
|
+
# NOTE: We want offset 4, not 3!
|
221
|
+
offset = match.offset(4)
|
222
|
+
uri = uri.dup
|
223
|
+
uri[offset[0]...offset[1]] = new_authority
|
196
224
|
end
|
197
225
|
parsed = self.parse(uri)
|
198
226
|
if parsed.scheme =~ /^[^\/?#\.]+\.[^\/?#]+$/
|
199
227
|
parsed = self.parse(hints[:scheme] + "://" + uri)
|
200
228
|
end
|
201
229
|
if parsed.path.include?(".")
|
202
|
-
|
203
|
-
|
230
|
+
if parsed.path[/\b@\b/]
|
231
|
+
parsed.scheme = "mailto" unless parsed.scheme
|
232
|
+
elsif new_host = parsed.path[/^([^\/]+\.[^\/]*)/, 1]
|
204
233
|
parsed.defer_validation do
|
205
|
-
new_path = parsed.path.
|
234
|
+
new_path = parsed.path.sub(
|
206
235
|
Regexp.new("^" + Regexp.escape(new_host)), EMPTY_STR)
|
207
236
|
parsed.host = new_host
|
208
237
|
parsed.path = new_path
|
@@ -253,24 +282,24 @@ module Addressable
|
|
253
282
|
# Otherwise, convert to a String
|
254
283
|
path = path.to_str.strip
|
255
284
|
|
256
|
-
path.
|
285
|
+
path.sub!(/^file:\/?\/?/, EMPTY_STR) if path =~ /^file:\/?\/?/
|
257
286
|
path = SLASH + path if path =~ /^([a-zA-Z])[\|:]/
|
258
287
|
uri = self.parse(path)
|
259
288
|
|
260
289
|
if uri.scheme == nil
|
261
290
|
# Adjust windows-style uris
|
262
|
-
uri.path.
|
291
|
+
uri.path.sub!(/^\/?([a-zA-Z])[\|:][\\\/]/) do
|
263
292
|
"/#{$1.downcase}:/"
|
264
293
|
end
|
265
|
-
uri.path.
|
294
|
+
uri.path.tr!("\\", SLASH)
|
266
295
|
if File.exist?(uri.path) &&
|
267
296
|
File.stat(uri.path).directory?
|
268
|
-
uri.path.
|
297
|
+
uri.path.chomp!(SLASH)
|
269
298
|
uri.path = uri.path + '/'
|
270
299
|
end
|
271
300
|
|
272
301
|
# If the path is absolute, set the scheme and host.
|
273
|
-
if uri.path
|
302
|
+
if uri.path.start_with?(SLASH)
|
274
303
|
uri.scheme = "file"
|
275
304
|
uri.host = EMPTY_STR
|
276
305
|
end
|
@@ -307,6 +336,21 @@ module Addressable
|
|
307
336
|
return result
|
308
337
|
end
|
309
338
|
|
339
|
+
##
|
340
|
+
# Tables used to optimize encoding operations in `self.encode_component`
|
341
|
+
# and `self.normalize_component`
|
342
|
+
SEQUENCE_ENCODING_TABLE = Hash.new do |hash, sequence|
|
343
|
+
hash[sequence] = sequence.unpack("C*").map do |c|
|
344
|
+
format("%02x", c)
|
345
|
+
end.join
|
346
|
+
end
|
347
|
+
|
348
|
+
SEQUENCE_UPCASED_PERCENT_ENCODING_TABLE = Hash.new do |hash, sequence|
|
349
|
+
hash[sequence] = sequence.unpack("C*").map do |c|
|
350
|
+
format("%%%02X", c)
|
351
|
+
end.join
|
352
|
+
end
|
353
|
+
|
310
354
|
##
|
311
355
|
# Percent encodes a URI component.
|
312
356
|
#
|
@@ -367,26 +411,26 @@ module Addressable
|
|
367
411
|
if character_class.kind_of?(String)
|
368
412
|
character_class = /[^#{character_class}]/
|
369
413
|
end
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
component.force_encoding(Encoding::ASCII_8BIT)
|
375
|
-
end
|
414
|
+
# We can't perform regexps on invalid UTF sequences, but
|
415
|
+
# here we need to, so switch to ASCII.
|
416
|
+
component = component.dup
|
417
|
+
component.force_encoding(Encoding::ASCII_8BIT)
|
376
418
|
# Avoiding gsub! because there are edge cases with frozen strings
|
377
419
|
component = component.gsub(character_class) do |sequence|
|
378
|
-
|
420
|
+
SEQUENCE_UPCASED_PERCENT_ENCODING_TABLE[sequence]
|
379
421
|
end
|
380
422
|
if upcase_encoded.length > 0
|
381
|
-
|
382
|
-
char
|
383
|
-
end
|
423
|
+
upcase_encoded_chars = upcase_encoded.chars.map do |char|
|
424
|
+
SEQUENCE_ENCODING_TABLE[char]
|
425
|
+
end
|
426
|
+
component = component.gsub(/%(#{upcase_encoded_chars.join('|')})/,
|
427
|
+
&:upcase)
|
384
428
|
end
|
385
429
|
return component
|
386
430
|
end
|
387
431
|
|
388
432
|
class << self
|
389
|
-
alias_method :
|
433
|
+
alias_method :escape_component, :encode_component
|
390
434
|
end
|
391
435
|
|
392
436
|
##
|
@@ -427,14 +471,18 @@ module Addressable
|
|
427
471
|
end
|
428
472
|
uri = uri.dup
|
429
473
|
# Seriously, only use UTF-8. I'm really not kidding!
|
430
|
-
uri.force_encoding("utf-8")
|
431
|
-
|
474
|
+
uri.force_encoding("utf-8")
|
475
|
+
|
476
|
+
unless leave_encoded.empty?
|
477
|
+
leave_encoded = leave_encoded.dup.force_encoding("utf-8")
|
478
|
+
end
|
479
|
+
|
432
480
|
result = uri.gsub(/%[0-9a-f]{2}/iu) do |sequence|
|
433
481
|
c = sequence[1..3].to_i(16).chr
|
434
|
-
c.force_encoding("utf-8")
|
482
|
+
c.force_encoding("utf-8")
|
435
483
|
leave_encoded.include?(c) ? sequence : c
|
436
484
|
end
|
437
|
-
result.force_encoding("utf-8")
|
485
|
+
result.force_encoding("utf-8")
|
438
486
|
if return_type == String
|
439
487
|
return result
|
440
488
|
elsif return_type == ::Addressable::URI
|
@@ -514,19 +562,21 @@ module Addressable
|
|
514
562
|
character_class = "#{character_class}%" unless character_class.include?('%')
|
515
563
|
|
516
564
|
"|%(?!#{leave_encoded.chars.map do |char|
|
517
|
-
seq = char
|
565
|
+
seq = SEQUENCE_ENCODING_TABLE[char]
|
518
566
|
[seq.upcase, seq.downcase]
|
519
567
|
end.flatten.join('|')})"
|
520
568
|
end
|
521
569
|
|
522
|
-
character_class =
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
component = component.dup
|
528
|
-
component.force_encoding(Encoding::ASCII_8BIT)
|
570
|
+
character_class = if leave_re
|
571
|
+
/[^#{character_class}]#{leave_re}/
|
572
|
+
else
|
573
|
+
/[^#{character_class}]/
|
574
|
+
end
|
529
575
|
end
|
576
|
+
# We can't perform regexps on invalid UTF sequences, but
|
577
|
+
# here we need to, so switch to ASCII.
|
578
|
+
component = component.dup
|
579
|
+
component.force_encoding(Encoding::ASCII_8BIT)
|
530
580
|
unencoded = self.unencode_component(component, String, leave_encoded)
|
531
581
|
begin
|
532
582
|
encoded = self.encode_component(
|
@@ -537,9 +587,7 @@ module Addressable
|
|
537
587
|
rescue ArgumentError
|
538
588
|
encoded = self.encode_component(unencoded)
|
539
589
|
end
|
540
|
-
|
541
|
-
encoded.force_encoding(Encoding::UTF_8)
|
542
|
-
end
|
590
|
+
encoded.force_encoding(Encoding::UTF_8)
|
543
591
|
return encoded
|
544
592
|
end
|
545
593
|
|
@@ -843,14 +891,17 @@ module Addressable
|
|
843
891
|
return nil unless self.scheme
|
844
892
|
@normalized_scheme ||= begin
|
845
893
|
if self.scheme =~ /^\s*ssh\+svn\s*$/i
|
846
|
-
"svn+ssh"
|
894
|
+
"svn+ssh".dup
|
847
895
|
else
|
848
896
|
Addressable::URI.normalize_component(
|
849
897
|
self.scheme.strip.downcase,
|
850
|
-
Addressable::URI::
|
898
|
+
Addressable::URI::NormalizeCharacterClasses::SCHEME
|
851
899
|
)
|
852
900
|
end
|
853
901
|
end
|
902
|
+
# All normalized values should be UTF-8
|
903
|
+
@normalized_scheme.force_encoding(Encoding::UTF_8) if @normalized_scheme
|
904
|
+
@normalized_scheme
|
854
905
|
end
|
855
906
|
|
856
907
|
##
|
@@ -864,7 +915,7 @@ module Addressable
|
|
864
915
|
new_scheme = new_scheme.to_str
|
865
916
|
end
|
866
917
|
if new_scheme && new_scheme !~ /\A[a-z][a-z0-9\.\+\-]*\z/i
|
867
|
-
raise InvalidURIError, "Invalid scheme format: #{new_scheme}"
|
918
|
+
raise InvalidURIError, "Invalid scheme format: '#{new_scheme}'"
|
868
919
|
end
|
869
920
|
@scheme = new_scheme
|
870
921
|
@scheme = nil if @scheme.to_s.strip.empty?
|
@@ -899,10 +950,13 @@ module Addressable
|
|
899
950
|
else
|
900
951
|
Addressable::URI.normalize_component(
|
901
952
|
self.user.strip,
|
902
|
-
Addressable::URI::
|
953
|
+
Addressable::URI::NormalizeCharacterClasses::UNRESERVED
|
903
954
|
)
|
904
955
|
end
|
905
956
|
end
|
957
|
+
# All normalized values should be UTF-8
|
958
|
+
@normalized_user.force_encoding(Encoding::UTF_8) if @normalized_user
|
959
|
+
@normalized_user
|
906
960
|
end
|
907
961
|
|
908
962
|
##
|
@@ -953,10 +1007,15 @@ module Addressable
|
|
953
1007
|
else
|
954
1008
|
Addressable::URI.normalize_component(
|
955
1009
|
self.password.strip,
|
956
|
-
Addressable::URI::
|
1010
|
+
Addressable::URI::NormalizeCharacterClasses::UNRESERVED
|
957
1011
|
)
|
958
1012
|
end
|
959
1013
|
end
|
1014
|
+
# All normalized values should be UTF-8
|
1015
|
+
if @normalized_password
|
1016
|
+
@normalized_password.force_encoding(Encoding::UTF_8)
|
1017
|
+
end
|
1018
|
+
@normalized_password
|
960
1019
|
end
|
961
1020
|
|
962
1021
|
##
|
@@ -1017,11 +1076,16 @@ module Addressable
|
|
1017
1076
|
if !current_user && !current_password
|
1018
1077
|
nil
|
1019
1078
|
elsif current_user && current_password
|
1020
|
-
"#{current_user}:#{current_password}"
|
1079
|
+
"#{current_user}:#{current_password}".dup
|
1021
1080
|
elsif current_user && !current_password
|
1022
|
-
"#{current_user}"
|
1081
|
+
"#{current_user}".dup
|
1023
1082
|
end
|
1024
1083
|
end
|
1084
|
+
# All normalized values should be UTF-8
|
1085
|
+
if @normalized_userinfo
|
1086
|
+
@normalized_userinfo.force_encoding(Encoding::UTF_8)
|
1087
|
+
end
|
1088
|
+
@normalized_userinfo
|
1025
1089
|
end
|
1026
1090
|
|
1027
1091
|
##
|
@@ -1067,6 +1131,7 @@ module Addressable
|
|
1067
1131
|
# @return [String] The host component, normalized.
|
1068
1132
|
def normalized_host
|
1069
1133
|
return nil unless self.host
|
1134
|
+
|
1070
1135
|
@normalized_host ||= begin
|
1071
1136
|
if !self.host.strip.empty?
|
1072
1137
|
result = ::Addressable::IDNA.to_ascii(
|
@@ -1078,12 +1143,18 @@ module Addressable
|
|
1078
1143
|
end
|
1079
1144
|
result = Addressable::URI.normalize_component(
|
1080
1145
|
result,
|
1081
|
-
|
1146
|
+
NormalizeCharacterClasses::HOST
|
1147
|
+
)
|
1082
1148
|
result
|
1083
1149
|
else
|
1084
|
-
EMPTY_STR
|
1150
|
+
EMPTY_STR.dup
|
1085
1151
|
end
|
1086
1152
|
end
|
1153
|
+
# All normalized values should be UTF-8
|
1154
|
+
if @normalized_host && !@normalized_host.empty?
|
1155
|
+
@normalized_host.force_encoding(Encoding::UTF_8)
|
1156
|
+
end
|
1157
|
+
@normalized_host
|
1087
1158
|
end
|
1088
1159
|
|
1089
1160
|
##
|
@@ -1096,14 +1167,6 @@ module Addressable
|
|
1096
1167
|
end
|
1097
1168
|
@host = new_host ? new_host.to_str : nil
|
1098
1169
|
|
1099
|
-
unreserved = CharacterClasses::UNRESERVED
|
1100
|
-
sub_delims = CharacterClasses::SUB_DELIMS
|
1101
|
-
if !@host.nil? && (@host =~ /[<>{}\/\?\#\@"[[:space:]]]/ ||
|
1102
|
-
(@host[/^\[(.*)\]$/, 1] != nil && @host[/^\[(.*)\]$/, 1] !~
|
1103
|
-
Regexp.new("^[#{unreserved}#{sub_delims}:]*$")))
|
1104
|
-
raise InvalidURIError, "Invalid character in host: '#{@host.to_s}'"
|
1105
|
-
end
|
1106
|
-
|
1107
1170
|
# Reset dependent values
|
1108
1171
|
remove_instance_variable(:@authority) if defined?(@authority)
|
1109
1172
|
remove_instance_variable(:@normalized_host) if defined?(@normalized_host)
|
@@ -1144,6 +1207,33 @@ module Addressable
|
|
1144
1207
|
self.host = v
|
1145
1208
|
end
|
1146
1209
|
|
1210
|
+
##
|
1211
|
+
# Returns the top-level domain for this host.
|
1212
|
+
#
|
1213
|
+
# @example
|
1214
|
+
# Addressable::URI.parse("http://www.example.co.uk").tld # => "co.uk"
|
1215
|
+
def tld
|
1216
|
+
PublicSuffix.parse(self.host, ignore_private: true).tld
|
1217
|
+
end
|
1218
|
+
|
1219
|
+
##
|
1220
|
+
# Sets the top-level domain for this URI.
|
1221
|
+
#
|
1222
|
+
# @param [String, #to_str] new_tld The new top-level domain.
|
1223
|
+
def tld=(new_tld)
|
1224
|
+
replaced_tld = host.sub(/#{tld}\z/, new_tld)
|
1225
|
+
self.host = PublicSuffix::Domain.new(replaced_tld).to_s
|
1226
|
+
end
|
1227
|
+
|
1228
|
+
##
|
1229
|
+
# Returns the public suffix domain for this host.
|
1230
|
+
#
|
1231
|
+
# @example
|
1232
|
+
# Addressable::URI.parse("http://www.example.co.uk").domain # => "example.co.uk"
|
1233
|
+
def domain
|
1234
|
+
PublicSuffix.domain(self.host, ignore_private: true)
|
1235
|
+
end
|
1236
|
+
|
1147
1237
|
##
|
1148
1238
|
# The authority component for this URI.
|
1149
1239
|
# Combines the user, password, host, and port components.
|
@@ -1151,7 +1241,7 @@ module Addressable
|
|
1151
1241
|
# @return [String] The authority component.
|
1152
1242
|
def authority
|
1153
1243
|
self.host && @authority ||= begin
|
1154
|
-
authority =
|
1244
|
+
authority = String.new
|
1155
1245
|
if self.userinfo != nil
|
1156
1246
|
authority << "#{self.userinfo}@"
|
1157
1247
|
end
|
@@ -1170,7 +1260,7 @@ module Addressable
|
|
1170
1260
|
def normalized_authority
|
1171
1261
|
return nil unless self.authority
|
1172
1262
|
@normalized_authority ||= begin
|
1173
|
-
authority =
|
1263
|
+
authority = String.new
|
1174
1264
|
if self.normalized_userinfo != nil
|
1175
1265
|
authority << "#{self.normalized_userinfo}@"
|
1176
1266
|
end
|
@@ -1180,6 +1270,11 @@ module Addressable
|
|
1180
1270
|
end
|
1181
1271
|
authority
|
1182
1272
|
end
|
1273
|
+
# All normalized values should be UTF-8
|
1274
|
+
if @normalized_authority
|
1275
|
+
@normalized_authority.force_encoding(Encoding::UTF_8)
|
1276
|
+
end
|
1277
|
+
@normalized_authority
|
1183
1278
|
end
|
1184
1279
|
|
1185
1280
|
##
|
@@ -1197,9 +1292,9 @@ module Addressable
|
|
1197
1292
|
new_user = new_userinfo.strip[/^([^:]*):?/, 1]
|
1198
1293
|
new_password = new_userinfo.strip[/:(.*)$/, 1]
|
1199
1294
|
end
|
1200
|
-
new_host = new_authority.
|
1295
|
+
new_host = new_authority.sub(
|
1201
1296
|
/^([^\[\]]*)@/, EMPTY_STR
|
1202
|
-
).
|
1297
|
+
).sub(
|
1203
1298
|
/:([^:@\[\]]*?)$/, EMPTY_STR
|
1204
1299
|
)
|
1205
1300
|
new_port =
|
@@ -1383,7 +1478,7 @@ module Addressable
|
|
1383
1478
|
# @return [String] The components that identify a site.
|
1384
1479
|
def site
|
1385
1480
|
(self.scheme || self.authority) && @site ||= begin
|
1386
|
-
site_string = ""
|
1481
|
+
site_string = "".dup
|
1387
1482
|
site_string << "#{self.scheme}:" if self.scheme != nil
|
1388
1483
|
site_string << "//#{self.authority}" if self.authority != nil
|
1389
1484
|
site_string
|
@@ -1402,7 +1497,7 @@ module Addressable
|
|
1402
1497
|
def normalized_site
|
1403
1498
|
return nil unless self.site
|
1404
1499
|
@normalized_site ||= begin
|
1405
|
-
site_string = ""
|
1500
|
+
site_string = "".dup
|
1406
1501
|
if self.normalized_scheme != nil
|
1407
1502
|
site_string << "#{self.normalized_scheme}:"
|
1408
1503
|
end
|
@@ -1411,6 +1506,9 @@ module Addressable
|
|
1411
1506
|
end
|
1412
1507
|
site_string
|
1413
1508
|
end
|
1509
|
+
# All normalized values should be UTF-8
|
1510
|
+
@normalized_site.force_encoding(Encoding::UTF_8) if @normalized_site
|
1511
|
+
@normalized_site
|
1414
1512
|
end
|
1415
1513
|
|
1416
1514
|
##
|
@@ -1460,17 +1558,20 @@ module Addressable
|
|
1460
1558
|
result = path.strip.split(SLASH, -1).map do |segment|
|
1461
1559
|
Addressable::URI.normalize_component(
|
1462
1560
|
segment,
|
1463
|
-
Addressable::URI::
|
1561
|
+
Addressable::URI::NormalizeCharacterClasses::PCHAR
|
1464
1562
|
)
|
1465
1563
|
end.join(SLASH)
|
1466
1564
|
|
1467
1565
|
result = URI.normalize_path(result)
|
1468
1566
|
if result.empty? &&
|
1469
1567
|
["http", "https", "ftp", "tftp"].include?(self.normalized_scheme)
|
1470
|
-
result = SLASH
|
1568
|
+
result = SLASH.dup
|
1471
1569
|
end
|
1472
1570
|
result
|
1473
1571
|
end
|
1572
|
+
# All normalized values should be UTF-8
|
1573
|
+
@normalized_path.force_encoding(Encoding::UTF_8) if @normalized_path
|
1574
|
+
@normalized_path
|
1474
1575
|
end
|
1475
1576
|
|
1476
1577
|
##
|
@@ -1489,6 +1590,9 @@ module Addressable
|
|
1489
1590
|
# Reset dependent values
|
1490
1591
|
remove_instance_variable(:@normalized_path) if defined?(@normalized_path)
|
1491
1592
|
remove_composite_values
|
1593
|
+
|
1594
|
+
# Ensure we haven't created an invalid URI
|
1595
|
+
validate()
|
1492
1596
|
end
|
1493
1597
|
|
1494
1598
|
##
|
@@ -1497,7 +1601,7 @@ module Addressable
|
|
1497
1601
|
# @return [String] The path's basename.
|
1498
1602
|
def basename
|
1499
1603
|
# Path cannot be nil
|
1500
|
-
return File.basename(self.path).
|
1604
|
+
return File.basename(self.path).sub(/;[^\/]*$/, EMPTY_STR)
|
1501
1605
|
end
|
1502
1606
|
|
1503
1607
|
##
|
@@ -1529,13 +1633,21 @@ module Addressable
|
|
1529
1633
|
modified_query_class = Addressable::URI::CharacterClasses::QUERY.dup
|
1530
1634
|
# Make sure possible key-value pair delimiters are escaped.
|
1531
1635
|
modified_query_class.sub!("\\&", "").sub!("\\;", "")
|
1532
|
-
pairs = (
|
1636
|
+
pairs = (query || "").split("&", -1)
|
1637
|
+
pairs.delete_if(&:empty?).uniq! if flags.include?(:compacted)
|
1533
1638
|
pairs.sort! if flags.include?(:sorted)
|
1534
1639
|
component = pairs.map do |pair|
|
1535
|
-
Addressable::URI.normalize_component(
|
1640
|
+
Addressable::URI.normalize_component(
|
1641
|
+
pair,
|
1642
|
+
Addressable::URI::NormalizeCharacterClasses::QUERY,
|
1643
|
+
"+"
|
1644
|
+
)
|
1536
1645
|
end.join("&")
|
1537
1646
|
component == "" ? nil : component
|
1538
1647
|
end
|
1648
|
+
# All normalized values should be UTF-8
|
1649
|
+
@normalized_query.force_encoding(Encoding::UTF_8) if @normalized_query
|
1650
|
+
@normalized_query
|
1539
1651
|
end
|
1540
1652
|
|
1541
1653
|
##
|
@@ -1588,11 +1700,13 @@ module Addressable
|
|
1588
1700
|
# so it's best to make all changes in-place.
|
1589
1701
|
pair[0] = URI.unencode_component(pair[0])
|
1590
1702
|
if pair[1].respond_to?(:to_str)
|
1703
|
+
value = pair[1].to_str
|
1591
1704
|
# I loathe the fact that I have to do this. Stupid HTML 4.01.
|
1592
1705
|
# Treating '+' as a space was just an unbelievably bad idea.
|
1593
1706
|
# There was nothing wrong with '%20'!
|
1594
1707
|
# If it ain't broke, don't fix it!
|
1595
|
-
|
1708
|
+
value = value.tr("+", " ") if ["http", "https", nil].include?(scheme)
|
1709
|
+
pair[1] = URI.unencode_component(value)
|
1596
1710
|
end
|
1597
1711
|
if return_type == Hash
|
1598
1712
|
accu[pair[0]] = pair[1]
|
@@ -1644,7 +1758,7 @@ module Addressable
|
|
1644
1758
|
end
|
1645
1759
|
|
1646
1760
|
# new_query_values have form [['key1', 'value1'], ['key2', 'value2']]
|
1647
|
-
buffer = ""
|
1761
|
+
buffer = "".dup
|
1648
1762
|
new_query_values.each do |key, value|
|
1649
1763
|
encoded_key = URI.encode_component(
|
1650
1764
|
key, CharacterClasses::UNRESERVED
|
@@ -1674,7 +1788,7 @@ module Addressable
|
|
1674
1788
|
#
|
1675
1789
|
# @return [String] The request URI required for an HTTP request.
|
1676
1790
|
def request_uri
|
1677
|
-
return nil if self.absolute? && self.scheme !~ /^https?$/
|
1791
|
+
return nil if self.absolute? && self.scheme !~ /^https?$/i
|
1678
1792
|
return (
|
1679
1793
|
(!self.path.empty? ? self.path : SLASH) +
|
1680
1794
|
(self.query ? "?#{self.query}" : EMPTY_STR)
|
@@ -1689,12 +1803,12 @@ module Addressable
|
|
1689
1803
|
if !new_request_uri.respond_to?(:to_str)
|
1690
1804
|
raise TypeError, "Can't convert #{new_request_uri.class} into String."
|
1691
1805
|
end
|
1692
|
-
if self.absolute? && self.scheme !~ /^https?$/
|
1806
|
+
if self.absolute? && self.scheme !~ /^https?$/i
|
1693
1807
|
raise InvalidURIError,
|
1694
1808
|
"Cannot set an HTTP request URI for a non-HTTP URI."
|
1695
1809
|
end
|
1696
1810
|
new_request_uri = new_request_uri.to_str
|
1697
|
-
path_component = new_request_uri[/^([^\?]*)
|
1811
|
+
path_component = new_request_uri[/^([^\?]*)\??(?:.*)$/, 1]
|
1698
1812
|
query_component = new_request_uri[/^(?:[^\?]*)\?(.*)$/, 1]
|
1699
1813
|
path_component = path_component.to_s
|
1700
1814
|
path_component = (!path_component.empty? ? path_component : SLASH)
|
@@ -1723,10 +1837,15 @@ module Addressable
|
|
1723
1837
|
@normalized_fragment ||= begin
|
1724
1838
|
component = Addressable::URI.normalize_component(
|
1725
1839
|
self.fragment,
|
1726
|
-
Addressable::URI::
|
1840
|
+
Addressable::URI::NormalizeCharacterClasses::FRAGMENT
|
1727
1841
|
)
|
1728
1842
|
component == "" ? nil : component
|
1729
1843
|
end
|
1844
|
+
# All normalized values should be UTF-8
|
1845
|
+
if @normalized_fragment
|
1846
|
+
@normalized_fragment.force_encoding(Encoding::UTF_8)
|
1847
|
+
end
|
1848
|
+
@normalized_fragment
|
1730
1849
|
end
|
1731
1850
|
|
1732
1851
|
##
|
@@ -1844,8 +1963,8 @@ module Addressable
|
|
1844
1963
|
# Section 5.2.3 of RFC 3986
|
1845
1964
|
#
|
1846
1965
|
# Removes the right-most path segment from the base path.
|
1847
|
-
if base_path
|
1848
|
-
base_path.
|
1966
|
+
if base_path.include?(SLASH)
|
1967
|
+
base_path.sub!(/\/[^\/]+$/, SLASH)
|
1849
1968
|
else
|
1850
1969
|
base_path = EMPTY_STR
|
1851
1970
|
end
|
@@ -2246,15 +2365,13 @@ module Addressable
|
|
2246
2365
|
"Cannot assemble URI string with ambiguous path: '#{self.path}'"
|
2247
2366
|
end
|
2248
2367
|
@uri_string ||= begin
|
2249
|
-
uri_string =
|
2368
|
+
uri_string = String.new
|
2250
2369
|
uri_string << "#{self.scheme}:" if self.scheme != nil
|
2251
2370
|
uri_string << "//#{self.authority}" if self.authority != nil
|
2252
2371
|
uri_string << self.path.to_s
|
2253
2372
|
uri_string << "?#{self.query}" if self.query != nil
|
2254
2373
|
uri_string << "##{self.fragment}" if self.fragment != nil
|
2255
|
-
|
2256
|
-
uri_string.force_encoding(Encoding::UTF_8)
|
2257
|
-
end
|
2374
|
+
uri_string.force_encoding(Encoding::UTF_8)
|
2258
2375
|
uri_string
|
2259
2376
|
end
|
2260
2377
|
end
|
@@ -2296,10 +2413,10 @@ module Addressable
|
|
2296
2413
|
#
|
2297
2414
|
# @param [Proc] block
|
2298
2415
|
# A set of operations to perform on a given URI.
|
2299
|
-
def defer_validation
|
2300
|
-
raise LocalJumpError, "No block given." unless
|
2416
|
+
def defer_validation
|
2417
|
+
raise LocalJumpError, "No block given." unless block_given?
|
2301
2418
|
@validation_deferred = true
|
2302
|
-
|
2419
|
+
yield
|
2303
2420
|
@validation_deferred = false
|
2304
2421
|
validate
|
2305
2422
|
return nil
|
@@ -2371,6 +2488,19 @@ module Addressable
|
|
2371
2488
|
raise InvalidURIError,
|
2372
2489
|
"Cannot have a relative path with an authority set: '#{self.to_s}'"
|
2373
2490
|
end
|
2491
|
+
if self.path != nil && !self.path.empty? &&
|
2492
|
+
self.path[0..1] == SLASH + SLASH && self.authority == nil
|
2493
|
+
raise InvalidURIError,
|
2494
|
+
"Cannot have a path with two leading slashes " +
|
2495
|
+
"without an authority set: '#{self.to_s}'"
|
2496
|
+
end
|
2497
|
+
unreserved = CharacterClasses::UNRESERVED
|
2498
|
+
sub_delims = CharacterClasses::SUB_DELIMS
|
2499
|
+
if !self.host.nil? && (self.host =~ /[<>{}\/\\\?\#\@"[[:space:]]]/ ||
|
2500
|
+
(self.host[/^\[(.*)\]$/, 1] != nil && self.host[/^\[(.*)\]$/, 1] !~
|
2501
|
+
Regexp.new("^[#{unreserved}#{sub_delims}:]*$")))
|
2502
|
+
raise InvalidURIError, "Invalid character in host: '#{self.host.to_s}'"
|
2503
|
+
end
|
2374
2504
|
return nil
|
2375
2505
|
end
|
2376
2506
|
|
@@ -2384,7 +2514,9 @@ module Addressable
|
|
2384
2514
|
def replace_self(uri)
|
2385
2515
|
# Reset dependent values
|
2386
2516
|
instance_variables.each do |var|
|
2387
|
-
|
2517
|
+
if instance_variable_defined?(var) && var != :@validation_deferred
|
2518
|
+
remove_instance_variable(var)
|
2519
|
+
end
|
2388
2520
|
end
|
2389
2521
|
|
2390
2522
|
@scheme = uri.scheme
|