uri-idna 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -2
- data/lib/uri/idna/base_processing.rb +12 -6
- data/lib/uri/idna/data/bidi_classes.rb +1973 -0
- data/lib/uri/idna/data/codepoint_classes.rb +1226 -0
- data/lib/uri/idna/data/joining_types.rb +839 -0
- data/lib/uri/idna/data/leading_combiners.rb +321 -0
- data/lib/uri/idna/data/scripts.rb +108 -0
- data/lib/uri/idna/data/unicode_version.rb +10 -0
- data/lib/uri/idna/data/uts46.rb +8459 -8179
- data/lib/uri/idna/data/virama_combining_classes.rb +67 -0
- data/lib/uri/idna/idna2008/processing.rb +13 -28
- data/lib/uri/idna/punycode.rb +11 -9
- data/lib/uri/idna/uts46/mapping.rb +39 -37
- data/lib/uri/idna/uts46/processing.rb +14 -15
- data/lib/uri/idna/validation/bidi.rb +34 -52
- data/lib/uri/idna/validation/contextj.rb +62 -0
- data/lib/uri/idna/validation/contexto.rb +61 -0
- data/lib/uri/idna/validation/idna_permitted.rb +30 -0
- data/lib/uri/idna/validation/label.rb +1 -14
- data/lib/uri/idna/validation/leading_combining.rb +23 -0
- data/lib/uri/idna/version.rb +1 -1
- metadata +15 -7
- data/lib/uri/idna/data/idna.rb +0 -4697
- data/lib/uri/idna/intranges.rb +0 -57
- data/lib/uri/idna/validation/codepoint.rb +0 -128
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This file is automatically generated by bin/generate
|
4
|
+
# Unicode version 15.1.0
|
5
|
+
|
6
|
+
module URI
|
7
|
+
module IDNA
|
8
|
+
VIRAMA_COMBINING_CLASSES = "["\
|
9
|
+
"\u094D"\
|
10
|
+
"\u09CD"\
|
11
|
+
"\u0A4D"\
|
12
|
+
"\u0ACD"\
|
13
|
+
"\u0B4D"\
|
14
|
+
"\u0BCD"\
|
15
|
+
"\u0C4D"\
|
16
|
+
"\u0CCD"\
|
17
|
+
"\u0D3B\u0D3C"\
|
18
|
+
"\u0D4D"\
|
19
|
+
"\u0DCA"\
|
20
|
+
"\u0E3A"\
|
21
|
+
"\u0EBA"\
|
22
|
+
"\u0F84"\
|
23
|
+
"\u1039\u103A"\
|
24
|
+
"\u1714\u1715"\
|
25
|
+
"\u1734"\
|
26
|
+
"\u17D2"\
|
27
|
+
"\u1A60"\
|
28
|
+
"\u1B44"\
|
29
|
+
"\u1BAA\u1BAB"\
|
30
|
+
"\u1BF2\u1BF3"\
|
31
|
+
"\u2D7F"\
|
32
|
+
"\uA806"\
|
33
|
+
"\uA82C"\
|
34
|
+
"\uA8C4"\
|
35
|
+
"\uA953"\
|
36
|
+
"\uA9C0"\
|
37
|
+
"\uAAF6"\
|
38
|
+
"\uABED"\
|
39
|
+
"\u{10A3F}"\
|
40
|
+
"\u{11046}"\
|
41
|
+
"\u{11070}"\
|
42
|
+
"\u{1107F}"\
|
43
|
+
"\u{110B9}"\
|
44
|
+
"\u{11133}\u{11134}"\
|
45
|
+
"\u{111C0}"\
|
46
|
+
"\u{11235}"\
|
47
|
+
"\u{112EA}"\
|
48
|
+
"\u{1134D}"\
|
49
|
+
"\u{11442}"\
|
50
|
+
"\u{114C2}"\
|
51
|
+
"\u{115BF}"\
|
52
|
+
"\u{1163F}"\
|
53
|
+
"\u{116B6}"\
|
54
|
+
"\u{1172B}"\
|
55
|
+
"\u{11839}"\
|
56
|
+
"\u{1193D}\u{1193E}"\
|
57
|
+
"\u{119E0}"\
|
58
|
+
"\u{11A34}"\
|
59
|
+
"\u{11A47}"\
|
60
|
+
"\u{11A99}"\
|
61
|
+
"\u{11C3F}"\
|
62
|
+
"\u{11D44}\u{11D45}"\
|
63
|
+
"\u{11D97}"\
|
64
|
+
"\u{11F41}\u{11F42}"\
|
65
|
+
"]"
|
66
|
+
end
|
67
|
+
end
|
@@ -1,17 +1,21 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative "options"
|
4
|
+
require_relative "../validation/contextj"
|
5
|
+
require_relative "../validation/contexto"
|
6
|
+
require_relative "../validation/idna_permitted"
|
7
|
+
require_relative "../validation/leading_combining"
|
4
8
|
|
5
9
|
module URI
|
6
10
|
module IDNA
|
7
11
|
module IDNA2008
|
8
12
|
class Processing < BaseProcessing
|
9
|
-
|
10
|
-
|
11
|
-
def options_class
|
13
|
+
def self.options_class
|
12
14
|
Options
|
13
15
|
end
|
14
16
|
|
17
|
+
private
|
18
|
+
|
15
19
|
def validate(label)
|
16
20
|
return if label.empty?
|
17
21
|
|
@@ -21,35 +25,16 @@ module URI
|
|
21
25
|
else
|
22
26
|
Validation::Label.check_ace_prefix(label)
|
23
27
|
end
|
24
|
-
Validation::
|
25
|
-
|
26
|
-
label
|
27
|
-
|
28
|
-
next if Validation::Codepoint.check_contextj(label, cp, pos)
|
29
|
-
rescue InvalidCodepointContextError => e
|
30
|
-
next unless options.check_joiners?
|
31
|
-
|
32
|
-
raise e
|
33
|
-
end
|
34
|
-
|
35
|
-
begin
|
36
|
-
next if Validation::Codepoint.check_contexto(label, cp, pos)
|
37
|
-
rescue InvalidCodepointContextError => e
|
38
|
-
next unless options.check_others?
|
39
|
-
|
40
|
-
raise e
|
41
|
-
end
|
42
|
-
|
43
|
-
Validation::Codepoint.check_idna_validity(label, cp, pos)
|
44
|
-
end
|
28
|
+
Validation::LeadingCombining.call(label) if options.leading_combining?
|
29
|
+
Validation::ContextJ.call(label) if options.check_joiners?
|
30
|
+
Validation::ContextO.call(label) if options.check_others?
|
31
|
+
Validation::IDNAPermitted.call(label)
|
45
32
|
Validation::Bidi.call(label) if check_bidi?
|
46
33
|
end
|
47
34
|
|
48
35
|
def punycode_decode(label)
|
49
36
|
return label unless label.start_with?(ACE_PREFIX)
|
50
37
|
|
51
|
-
raise Error, "A-label must not end with a hyphen" if label[-1] == "-"
|
52
|
-
|
53
38
|
super
|
54
39
|
end
|
55
40
|
end
|
@@ -95,9 +80,9 @@ module URI
|
|
95
80
|
|
96
81
|
Validation::Label.check_length(a_ulabel) if options.verify_dns_length?
|
97
82
|
|
98
|
-
if alabel && ulabel &&
|
83
|
+
if alabel && ulabel && a_ulabel != alabel
|
99
84
|
raise Error,
|
100
|
-
"Provided alabel #{alabel.inspect} doesn't match de-punycoded ulabel #{
|
85
|
+
"Provided alabel #{alabel.inspect} doesn't match de-punycoded ulabel #{a_ulabel.inspect}"
|
101
86
|
end
|
102
87
|
|
103
88
|
a_ulabel
|
data/lib/uri/idna/punycode.rb
CHANGED
@@ -13,6 +13,7 @@ module URI
|
|
13
13
|
DAMP = 700
|
14
14
|
INITIAL_BIAS = 72
|
15
15
|
INITIAL_N = 0x80
|
16
|
+
ADAPT_THRESHOLD = ((BASE - TMIN) * TMAX) / 2
|
16
17
|
|
17
18
|
DELIMITER = 0x2D
|
18
19
|
MAXINT = 0x7FFFFFFF
|
@@ -30,7 +31,9 @@ module URI
|
|
30
31
|
end
|
31
32
|
|
32
33
|
def encode_digit(d)
|
33
|
-
d + 22
|
34
|
+
return d + 22 if d >= 26
|
35
|
+
|
36
|
+
d + 97
|
34
37
|
end
|
35
38
|
|
36
39
|
def adapt(delta, num_points, first_time)
|
@@ -38,7 +41,7 @@ module URI
|
|
38
41
|
delta += (delta / num_points)
|
39
42
|
|
40
43
|
k = 0
|
41
|
-
while delta >
|
44
|
+
while delta > ADAPT_THRESHOLD
|
42
45
|
delta /= BASE - TMIN
|
43
46
|
k += BASE
|
44
47
|
end
|
@@ -47,18 +50,17 @@ module URI
|
|
47
50
|
|
48
51
|
def encode(input)
|
49
52
|
input = input.codepoints
|
50
|
-
output = []
|
51
53
|
|
52
54
|
n = INITIAL_N
|
53
55
|
delta = 0
|
54
56
|
bias = INITIAL_BIAS
|
55
57
|
|
56
|
-
input.
|
58
|
+
output = input.select { |cp| cp < 0x80 }
|
57
59
|
h = b = output.length
|
58
60
|
|
59
61
|
output << DELIMITER if b > 0
|
60
|
-
|
61
|
-
while h <
|
62
|
+
input_length = input.length
|
63
|
+
while h < input_length
|
62
64
|
m = MAXINT
|
63
65
|
input.each do |cp|
|
64
66
|
m = cp if cp >= n && cp < m
|
@@ -116,15 +118,15 @@ module URI
|
|
116
118
|
|
117
119
|
b = input.rindex(DELIMITER) || 0
|
118
120
|
|
119
|
-
0.
|
120
|
-
cp = input[idx]
|
121
|
+
input[0, b].each do |cp|
|
121
122
|
raise PunycodeError, "Invalid input" unless cp < 0x80
|
122
123
|
|
123
124
|
output << cp
|
124
125
|
end
|
125
126
|
|
126
127
|
inc = b > 0 ? b + 1 : 0
|
127
|
-
|
128
|
+
input_length = input.length
|
129
|
+
while inc < input_length
|
128
130
|
old_i = i
|
129
131
|
w = 1
|
130
132
|
k = BASE
|
@@ -8,52 +8,54 @@ module URI
|
|
8
8
|
# https://www.unicode.org/reports/tr46/#IDNA_Mapping_Table
|
9
9
|
module Mapping
|
10
10
|
class << self
|
11
|
+
STATUS_D_REGEX = Regexp.new(REGEX_D_STRING, Regexp::EXTENDED).freeze
|
12
|
+
REGEX_STD3_M_REGEX = Regexp.new(REGEX_STD3_M_STRING, Regexp::EXTENDED).freeze
|
13
|
+
|
14
|
+
MAP_REGEX = Regexp.new("#{REGEX_M_STRING}|#{REGEX_I_STRING}").freeze
|
15
|
+
REGEX_NOT_V = Regexp.new("[^#{REGEX_V_STRING}]").freeze
|
16
|
+
REGEX_NOT_VD = Regexp.new("[^#{REGEX_V_STRING}|#{REGEX_D_STRING}]").freeze
|
17
|
+
REGEX_NOT_V3 = Regexp.new("[^#{REGEX_V_STRING}|#{REGEX_STD3_M_STRING}|#{REGEX_STD3_V_STRING}]").freeze
|
18
|
+
REGEX_NOT_VD3 = Regexp.new(
|
19
|
+
"[^#{REGEX_V_STRING}|#{REGEX_D_STRING}|#{REGEX_STD3_M_STRING}|#{REGEX_STD3_V_STRING}]",
|
20
|
+
).freeze
|
21
|
+
|
11
22
|
def call(domain_name, transitional_processing: false, use_std3_ascii_rules: true)
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
output += if transitional_processing && codepoint == 7838
|
20
|
-
[115, 115]
|
21
|
-
else
|
22
|
-
replacement
|
23
|
-
end
|
24
|
-
when "D" # deviation
|
25
|
-
if transitional_processing
|
26
|
-
output += replacement
|
27
|
-
else
|
28
|
-
output << codepoint
|
29
|
-
end
|
30
|
-
when "3" # disallowed_STD3_valid, disallowed_STD3_mapped
|
31
|
-
if use_std3_ascii_rules || !replacement
|
32
|
-
output << codepoint
|
33
|
-
else
|
34
|
-
output += replacement
|
35
|
-
end
|
36
|
-
when "I" # ignored
|
37
|
-
next
|
23
|
+
return domain_name.downcase if domain_name.ascii_only?
|
24
|
+
|
25
|
+
output = domain_name.gsub(MAP_REGEX) do |match|
|
26
|
+
if transitional_processing && match == "\u1E9E"
|
27
|
+
"ss"
|
28
|
+
else
|
29
|
+
REPLACEMENTS[match]
|
38
30
|
end
|
39
31
|
end
|
40
|
-
output.
|
32
|
+
output.gsub!(STATUS_D_REGEX, REPLACEMENTS) if transitional_processing
|
33
|
+
output.gsub!(REGEX_STD3_M_REGEX, REPLACEMENTS) unless use_std3_ascii_rules
|
34
|
+
|
35
|
+
output.ascii_only? ? output : output.unicode_normalize!(:nfc)
|
41
36
|
end
|
42
37
|
|
43
|
-
def
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
38
|
+
def validate_label_status(label, transitional_processing:, use_std3_ascii_rules:)
|
39
|
+
regex =
|
40
|
+
if transitional_processing && use_std3_ascii_rules
|
41
|
+
REGEX_NOT_V
|
42
|
+
elsif transitional_processing
|
43
|
+
REGEX_NOT_V3
|
44
|
+
elsif use_std3_ascii_rules
|
45
|
+
REGEX_NOT_VD
|
46
|
+
else
|
47
|
+
REGEX_NOT_VD3
|
48
|
+
end
|
49
|
+
|
50
|
+
return unless (pos = label.index(regex))
|
48
51
|
|
49
|
-
raise InvalidCodepointError,
|
52
|
+
raise InvalidCodepointError, error_message(label, pos)
|
50
53
|
end
|
51
54
|
|
52
|
-
|
53
|
-
return UTS46_DATA[codepoint] if codepoint < 256
|
55
|
+
private
|
54
56
|
|
55
|
-
|
56
|
-
|
57
|
+
def error_message(label, pos)
|
58
|
+
format("Codepoint U+%04X at position %d of %p not allowed in UTS46", label[pos].ord, pos + 1, label)
|
57
59
|
end
|
58
60
|
end
|
59
61
|
end
|
@@ -2,12 +2,18 @@
|
|
2
2
|
|
3
3
|
require_relative "mapping"
|
4
4
|
require_relative "options"
|
5
|
+
require_relative "../validation/contextj"
|
6
|
+
require_relative "../validation/leading_combining"
|
5
7
|
|
6
8
|
module URI
|
7
9
|
module IDNA
|
8
10
|
module UTS46
|
9
11
|
# https://www.unicode.org/reports/tr46/#Processing
|
10
12
|
class Processing < BaseProcessing
|
13
|
+
def self.options_class
|
14
|
+
Options
|
15
|
+
end
|
16
|
+
|
11
17
|
def initialize(domain_name, **options)
|
12
18
|
super
|
13
19
|
@domain_name = Mapping.call(
|
@@ -40,10 +46,6 @@ module URI
|
|
40
46
|
|
41
47
|
private
|
42
48
|
|
43
|
-
def options_class
|
44
|
-
Options
|
45
|
-
end
|
46
|
-
|
47
49
|
# https://www.unicode.org/reports/tr46/#Validity_Criteria
|
48
50
|
def validate(label, transitional_processing: options.transitional_processing?)
|
49
51
|
return if label.empty?
|
@@ -56,16 +58,13 @@ module URI
|
|
56
58
|
Validation::Label.check_ace_prefix(label)
|
57
59
|
end
|
58
60
|
Validation::Label.check_dot(label)
|
59
|
-
Validation::
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
Validation::Codepoint.check_contextj(label, cp, pos) if options.check_joiners?
|
68
|
-
end
|
61
|
+
Validation::LeadingCombining.call(label)
|
62
|
+
Mapping.validate_label_status(
|
63
|
+
label,
|
64
|
+
transitional_processing: transitional_processing,
|
65
|
+
use_std3_ascii_rules: options.use_std3_ascii_rules?,
|
66
|
+
)
|
67
|
+
Validation::ContextJ.call(label) if options.check_joiners?
|
69
68
|
Validation::Bidi.call(label) if check_bidi?
|
70
69
|
end
|
71
70
|
end
|
@@ -76,7 +75,7 @@ module URI
|
|
76
75
|
|
77
76
|
# https://www.unicode.org/reports/tr46/#ToASCII
|
78
77
|
class ToASCII < Processing
|
79
|
-
def options_class
|
78
|
+
def self.options_class
|
80
79
|
ToASCIIOptions
|
81
80
|
end
|
82
81
|
|
@@ -1,5 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require_relative "../data/bidi_classes"
|
4
|
+
|
3
5
|
module URI
|
4
6
|
module IDNA
|
5
7
|
module Validation
|
@@ -8,55 +10,46 @@ module URI
|
|
8
10
|
# https://datatracker.ietf.org/doc/html/rfc5893#section-2
|
9
11
|
module Bidi
|
10
12
|
class << self
|
13
|
+
BIDI_R1_RTL = Regexp.new(BIDI_CLASSES["RTL"]).freeze
|
14
|
+
BIDI_R1_LTR = Regexp.new(BIDI_CLASSES["L"]).freeze
|
15
|
+
BIDI_R2 = Regexp.new("#{BIDI_CLASSES['L']}|#{BIDI_CLASSES['UNUSED']}").freeze
|
16
|
+
BIDI_R3 = Regexp.new(
|
17
|
+
"(?:#{"#{BIDI_CLASSES['RTL']}|#{BIDI_CLASSES['AN']}|#{BIDI_CLASSES['EN']}"})#{BIDI_CLASSES['NSM']}*\\z",
|
18
|
+
).freeze
|
19
|
+
BIDI_R4_EN = Regexp.new(BIDI_CLASSES["EN"]).freeze
|
20
|
+
BIDI_R4_AN = Regexp.new(BIDI_CLASSES["AN"]).freeze
|
21
|
+
BIDI_R5 = Regexp.new("#{BIDI_CLASSES['RTL']}|#{BIDI_CLASSES['AN']}").freeze
|
22
|
+
BIDI_R6 = Regexp.new("(?:#{"#{BIDI_CLASSES['L']}|#{BIDI_CLASSES['EN']}"})#{BIDI_CLASSES['NSM']}*\\z").freeze
|
23
|
+
|
11
24
|
def call(label)
|
12
25
|
# Bidi rule 1
|
13
|
-
if
|
14
|
-
rtl = true
|
15
|
-
elsif bidi_class(label[0].ord, "L")
|
26
|
+
if BIDI_R1_LTR.match?(label[0])
|
16
27
|
rtl = false
|
28
|
+
elsif BIDI_R1_RTL.match?(label[0])
|
29
|
+
rtl = true
|
17
30
|
else
|
18
31
|
raise BidiError, "First codepoint in label #{label} must be directionality L, R or AL"
|
19
32
|
end
|
20
33
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
# Bidi rule 2
|
26
|
-
if bidi_class(cp, "L") || bidi_class(cp, "UNUSED")
|
27
|
-
raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a right-to-left label"
|
28
|
-
end
|
29
|
-
|
30
|
-
# Bidi rule 3
|
31
|
-
direction = bidi_class(cp, "RTL") || bidi_class(cp, "EN") || bidi_class(cp, "AN")
|
32
|
-
if direction
|
33
|
-
valid_ending = true
|
34
|
-
elsif !bidi_class(cp, "NSM")
|
35
|
-
valid_ending = false
|
36
|
-
end
|
37
|
-
# Bidi rule 4
|
38
|
-
if %w[EN AN].include?(direction)
|
39
|
-
number_type ||= direction
|
40
|
-
raise BidiError, "Can not mix numeral types in a right-to-left label" if number_type != direction
|
41
|
-
end
|
42
|
-
else
|
43
|
-
# Bidi rule 5
|
44
|
-
if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
|
45
|
-
raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a left-to-right label"
|
46
|
-
end
|
47
|
-
|
48
|
-
# Bidi rule 6
|
49
|
-
if bidi_class(cp, "L") || bidi_class(cp, "EN")
|
50
|
-
valid_ending = true
|
51
|
-
elsif !bidi_class(cp, "NSM")
|
52
|
-
valid_ending = false
|
53
|
-
end
|
34
|
+
if rtl
|
35
|
+
# Bidi rule 2
|
36
|
+
if (pos = label.index(BIDI_R2))
|
37
|
+
raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a right-to-left label"
|
54
38
|
end
|
39
|
+
# Bidi rule 3
|
40
|
+
raise BidiError, "Label ends with illegal codepoint directionality" unless label.match?(BIDI_R3)
|
41
|
+
# Bidi rule 4
|
42
|
+
if label.match?(BIDI_R4_EN) && label.match?(BIDI_R4_AN)
|
43
|
+
raise BidiError, "Can not mix numeral types in a right-to-left label"
|
44
|
+
end
|
45
|
+
else
|
46
|
+
# Bidi rule 5
|
47
|
+
if (pos = label.index(BIDI_R5))
|
48
|
+
raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a left-to-right label"
|
49
|
+
end
|
50
|
+
# Bidi rule 6
|
51
|
+
raise BidiError, "Label ends with illegal codepoint directionality" unless label.match?(BIDI_R6)
|
55
52
|
end
|
56
|
-
|
57
|
-
raise BidiError, "Label ends with illegal codepoint directionality" unless valid_ending
|
58
|
-
|
59
|
-
true
|
60
53
|
end
|
61
54
|
|
62
55
|
# https://www.rfc-editor.org/rfc/rfc5891.html#section-4.2.3.4
|
@@ -71,22 +64,11 @@ module URI
|
|
71
64
|
end
|
72
65
|
next if label.ascii_only?
|
73
66
|
|
74
|
-
label.
|
75
|
-
next if cp < 256
|
76
|
-
return true if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
|
77
|
-
end
|
67
|
+
return true if label.match?(BIDI_R5)
|
78
68
|
end
|
79
69
|
|
80
70
|
false
|
81
71
|
end
|
82
|
-
|
83
|
-
private
|
84
|
-
|
85
|
-
def bidi_class(codepoint, bidi_class)
|
86
|
-
return bidi_class if Intranges.contain?(codepoint, BIDI_CLASSES[bidi_class])
|
87
|
-
|
88
|
-
false
|
89
|
-
end
|
90
72
|
end
|
91
73
|
end
|
92
74
|
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../data/codepoint_classes"
|
4
|
+
require_relative "../data/joining_types"
|
5
|
+
require_relative "../data/virama_combining_classes"
|
6
|
+
|
7
|
+
module URI
|
8
|
+
module IDNA
|
9
|
+
module Validation
|
10
|
+
# https://datatracker.ietf.org/doc/html/rfc5892
|
11
|
+
module ContextJ
|
12
|
+
class << self
|
13
|
+
CONTEXTJ_REGEX = Regexp.new("[#{CODEPOINT_CLASSES['CONTEXTJ']}]").freeze
|
14
|
+
|
15
|
+
def call(label)
|
16
|
+
return if label.ascii_only?
|
17
|
+
|
18
|
+
offset = 0
|
19
|
+
while (pos = label.index(CONTEXTJ_REGEX, offset))
|
20
|
+
raise InvalidCodepointContextError, error_message(label, pos) unless valid_contextj?(label, pos)
|
21
|
+
|
22
|
+
offset = pos + 1
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def valid_contextj?(label, pos)
|
29
|
+
case label[pos]
|
30
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
|
31
|
+
when "\u200c"
|
32
|
+
return true if pos > 0 && VIRAMA_COMBINING_CLASSES.match?(label[pos - 1])
|
33
|
+
|
34
|
+
ok = false
|
35
|
+
(pos - 1).downto(0) do |i|
|
36
|
+
joining_type = JOINING_TYPES[label[i]]
|
37
|
+
if [0x4c, 0x44].include?(joining_type)
|
38
|
+
ok = true
|
39
|
+
break
|
40
|
+
end
|
41
|
+
end
|
42
|
+
return false unless ok
|
43
|
+
|
44
|
+
(pos + 1).upto(label.length - 1) do |i|
|
45
|
+
joining_type = JOINING_TYPES[label[i]]
|
46
|
+
return true if [0x52, 0x44].include?(joining_type)
|
47
|
+
end
|
48
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
|
49
|
+
when "\u200d"
|
50
|
+
return VIRAMA_COMBINING_CLASSES.match?(label[pos - 1]) if pos > 0
|
51
|
+
end
|
52
|
+
false
|
53
|
+
end
|
54
|
+
|
55
|
+
def error_message(label, pos)
|
56
|
+
format("ContextJ codepoint U+%04X at position %d of %p not allowed", label[pos].ord, pos + 1, label)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../data/codepoint_classes"
|
4
|
+
require_relative "../data/scripts"
|
5
|
+
|
6
|
+
module URI
|
7
|
+
module IDNA
|
8
|
+
module Validation
|
9
|
+
# https://datatracker.ietf.org/doc/html/rfc5892
|
10
|
+
module ContextO
|
11
|
+
class << self
|
12
|
+
CONTEXTO_REGEX = Regexp.new("[#{CODEPOINT_CLASSES['CONTEXTO']}]").freeze
|
13
|
+
CONTEXTO_A4_REGEX = Regexp.new(SCRIPTS["Greek"])
|
14
|
+
CONTEXTO_A5_REGEX = Regexp.new(SCRIPTS["Hebrew"])
|
15
|
+
CONTEXTO_A7_REGEX = Regexp.new("#{SCRIPTS['Hiragana']}|#{SCRIPTS['Katakana']}|#{SCRIPTS['Han']}").freeze
|
16
|
+
CONTEXTO_A8_REGEX = /[\u06F0-\u06F9]/.freeze
|
17
|
+
CONTEXTO_A9_REGEX = /[\u0660-\u0669]/.freeze
|
18
|
+
|
19
|
+
def call(label)
|
20
|
+
offset = 0
|
21
|
+
while (pos = label.index(CONTEXTO_REGEX, offset))
|
22
|
+
raise InvalidCodepointContextError, error_message(label, pos) unless valid_contexto?(label, pos)
|
23
|
+
|
24
|
+
offset = pos + 1
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def valid_contexto?(label, pos)
|
31
|
+
case label[pos]
|
32
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
|
33
|
+
when "\u00b7"
|
34
|
+
pos > 0 && pos < label.length - 1 ? (label[pos - 1] == "\u006c" && label[pos + 1] == "\u006c") : false
|
35
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
|
36
|
+
when "\u0375"
|
37
|
+
pos < label.length - 1 ? CONTEXTO_A4_REGEX.match?(label[pos + 1]) : false
|
38
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
|
39
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
|
40
|
+
when "\u05f3", "\u05f4"
|
41
|
+
pos > 0 ? CONTEXTO_A5_REGEX.match?(label[pos - 1]) : false
|
42
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
|
43
|
+
when "\u30fb"
|
44
|
+
CONTEXTO_A7_REGEX.match?(label)
|
45
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
|
46
|
+
when "\u0660".."\u0669"
|
47
|
+
!CONTEXTO_A8_REGEX.match?(label)
|
48
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
|
49
|
+
when "\u06f0".."\u06f9"
|
50
|
+
!CONTEXTO_A9_REGEX.match?(label)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def error_message(label, pos)
|
55
|
+
format("ContextO codepoint U+%04X at position %d of %p not allowed", label[pos].ord, pos + 1, label)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../data/codepoint_classes"
|
4
|
+
|
5
|
+
module URI
|
6
|
+
module IDNA
|
7
|
+
module Validation
|
8
|
+
module IDNAPermitted
|
9
|
+
class << self
|
10
|
+
IDNA_REGEX = Regexp.new(
|
11
|
+
"[^(#{CODEPOINT_CLASSES['PVALID']}|#{CODEPOINT_CLASSES['CONTEXTJ']}|#{CODEPOINT_CLASSES['CONTEXTO']})]",
|
12
|
+
).freeze
|
13
|
+
|
14
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.2
|
15
|
+
def call(label)
|
16
|
+
return unless (pos = label.index(IDNA_REGEX))
|
17
|
+
|
18
|
+
raise InvalidCodepointError, error_message(label, pos)
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def error_message(label, pos)
|
24
|
+
format("Codepoint U+%04X at position %d of %p not allowed in IDNA2008", label[pos].ord, pos + 1, label)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -1,8 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative "../intranges"
|
4
|
-
require_relative "../data/idna"
|
5
|
-
|
6
3
|
module URI
|
7
4
|
module IDNA
|
8
5
|
module Validation
|
@@ -11,7 +8,7 @@ module URI
|
|
11
8
|
# 4.1. Input to IDNA Registration
|
12
9
|
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.1
|
13
10
|
def check_nfc(label)
|
14
|
-
return if label.unicode_normalized?(:nfc)
|
11
|
+
return if label.ascii_only? || label.unicode_normalized?(:nfc)
|
15
12
|
|
16
13
|
raise Error, "Label must be in Unicode Normalization Form NFC"
|
17
14
|
end
|
@@ -38,16 +35,6 @@ module URI
|
|
38
35
|
raise Error, "Label must not begin with `xn--`"
|
39
36
|
end
|
40
37
|
|
41
|
-
# 4.2.3.2. Leading Combining Marks
|
42
|
-
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
|
43
|
-
def check_leading_combining(label)
|
44
|
-
cp = label[0].ord
|
45
|
-
return if cp < 256
|
46
|
-
return unless Intranges.contain?(cp, INITIAL_COMBINERS)
|
47
|
-
|
48
|
-
raise Error, "Label begins with an illegal combining character"
|
49
|
-
end
|
50
|
-
|
51
38
|
def check_dot(label)
|
52
39
|
raise Error, "Label must not contain a U+002E ( . ) FULL STOP" if label.include?(".")
|
53
40
|
end
|