uri-idna 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -2
- data/lib/uri/idna/base_processing.rb +12 -6
- data/lib/uri/idna/data/bidi_classes.rb +1973 -0
- data/lib/uri/idna/data/codepoint_classes.rb +1226 -0
- data/lib/uri/idna/data/joining_types.rb +839 -0
- data/lib/uri/idna/data/leading_combiners.rb +321 -0
- data/lib/uri/idna/data/scripts.rb +108 -0
- data/lib/uri/idna/data/unicode_version.rb +10 -0
- data/lib/uri/idna/data/uts46.rb +8459 -8179
- data/lib/uri/idna/data/virama_combining_classes.rb +67 -0
- data/lib/uri/idna/idna2008/processing.rb +13 -28
- data/lib/uri/idna/punycode.rb +11 -9
- data/lib/uri/idna/uts46/mapping.rb +39 -37
- data/lib/uri/idna/uts46/processing.rb +14 -15
- data/lib/uri/idna/validation/bidi.rb +34 -52
- data/lib/uri/idna/validation/contextj.rb +62 -0
- data/lib/uri/idna/validation/contexto.rb +61 -0
- data/lib/uri/idna/validation/idna_permitted.rb +30 -0
- data/lib/uri/idna/validation/label.rb +1 -14
- data/lib/uri/idna/validation/leading_combining.rb +23 -0
- data/lib/uri/idna/version.rb +1 -1
- metadata +15 -7
- data/lib/uri/idna/data/idna.rb +0 -4697
- data/lib/uri/idna/intranges.rb +0 -57
- data/lib/uri/idna/validation/codepoint.rb +0 -128
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This file is automatically generated by bin/generate
|
4
|
+
# Unicode version 15.1.0
|
5
|
+
|
6
|
+
module URI
|
7
|
+
module IDNA
|
8
|
+
VIRAMA_COMBINING_CLASSES = "["\
|
9
|
+
"\u094D"\
|
10
|
+
"\u09CD"\
|
11
|
+
"\u0A4D"\
|
12
|
+
"\u0ACD"\
|
13
|
+
"\u0B4D"\
|
14
|
+
"\u0BCD"\
|
15
|
+
"\u0C4D"\
|
16
|
+
"\u0CCD"\
|
17
|
+
"\u0D3B\u0D3C"\
|
18
|
+
"\u0D4D"\
|
19
|
+
"\u0DCA"\
|
20
|
+
"\u0E3A"\
|
21
|
+
"\u0EBA"\
|
22
|
+
"\u0F84"\
|
23
|
+
"\u1039\u103A"\
|
24
|
+
"\u1714\u1715"\
|
25
|
+
"\u1734"\
|
26
|
+
"\u17D2"\
|
27
|
+
"\u1A60"\
|
28
|
+
"\u1B44"\
|
29
|
+
"\u1BAA\u1BAB"\
|
30
|
+
"\u1BF2\u1BF3"\
|
31
|
+
"\u2D7F"\
|
32
|
+
"\uA806"\
|
33
|
+
"\uA82C"\
|
34
|
+
"\uA8C4"\
|
35
|
+
"\uA953"\
|
36
|
+
"\uA9C0"\
|
37
|
+
"\uAAF6"\
|
38
|
+
"\uABED"\
|
39
|
+
"\u{10A3F}"\
|
40
|
+
"\u{11046}"\
|
41
|
+
"\u{11070}"\
|
42
|
+
"\u{1107F}"\
|
43
|
+
"\u{110B9}"\
|
44
|
+
"\u{11133}\u{11134}"\
|
45
|
+
"\u{111C0}"\
|
46
|
+
"\u{11235}"\
|
47
|
+
"\u{112EA}"\
|
48
|
+
"\u{1134D}"\
|
49
|
+
"\u{11442}"\
|
50
|
+
"\u{114C2}"\
|
51
|
+
"\u{115BF}"\
|
52
|
+
"\u{1163F}"\
|
53
|
+
"\u{116B6}"\
|
54
|
+
"\u{1172B}"\
|
55
|
+
"\u{11839}"\
|
56
|
+
"\u{1193D}\u{1193E}"\
|
57
|
+
"\u{119E0}"\
|
58
|
+
"\u{11A34}"\
|
59
|
+
"\u{11A47}"\
|
60
|
+
"\u{11A99}"\
|
61
|
+
"\u{11C3F}"\
|
62
|
+
"\u{11D44}\u{11D45}"\
|
63
|
+
"\u{11D97}"\
|
64
|
+
"\u{11F41}\u{11F42}"\
|
65
|
+
"]"
|
66
|
+
end
|
67
|
+
end
|
@@ -1,17 +1,21 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative "options"
|
4
|
+
require_relative "../validation/contextj"
|
5
|
+
require_relative "../validation/contexto"
|
6
|
+
require_relative "../validation/idna_permitted"
|
7
|
+
require_relative "../validation/leading_combining"
|
4
8
|
|
5
9
|
module URI
|
6
10
|
module IDNA
|
7
11
|
module IDNA2008
|
8
12
|
class Processing < BaseProcessing
|
9
|
-
|
10
|
-
|
11
|
-
def options_class
|
13
|
+
def self.options_class
|
12
14
|
Options
|
13
15
|
end
|
14
16
|
|
17
|
+
private
|
18
|
+
|
15
19
|
def validate(label)
|
16
20
|
return if label.empty?
|
17
21
|
|
@@ -21,35 +25,16 @@ module URI
|
|
21
25
|
else
|
22
26
|
Validation::Label.check_ace_prefix(label)
|
23
27
|
end
|
24
|
-
Validation::
|
25
|
-
|
26
|
-
label
|
27
|
-
|
28
|
-
next if Validation::Codepoint.check_contextj(label, cp, pos)
|
29
|
-
rescue InvalidCodepointContextError => e
|
30
|
-
next unless options.check_joiners?
|
31
|
-
|
32
|
-
raise e
|
33
|
-
end
|
34
|
-
|
35
|
-
begin
|
36
|
-
next if Validation::Codepoint.check_contexto(label, cp, pos)
|
37
|
-
rescue InvalidCodepointContextError => e
|
38
|
-
next unless options.check_others?
|
39
|
-
|
40
|
-
raise e
|
41
|
-
end
|
42
|
-
|
43
|
-
Validation::Codepoint.check_idna_validity(label, cp, pos)
|
44
|
-
end
|
28
|
+
Validation::LeadingCombining.call(label) if options.leading_combining?
|
29
|
+
Validation::ContextJ.call(label) if options.check_joiners?
|
30
|
+
Validation::ContextO.call(label) if options.check_others?
|
31
|
+
Validation::IDNAPermitted.call(label)
|
45
32
|
Validation::Bidi.call(label) if check_bidi?
|
46
33
|
end
|
47
34
|
|
48
35
|
def punycode_decode(label)
|
49
36
|
return label unless label.start_with?(ACE_PREFIX)
|
50
37
|
|
51
|
-
raise Error, "A-label must not end with a hyphen" if label[-1] == "-"
|
52
|
-
|
53
38
|
super
|
54
39
|
end
|
55
40
|
end
|
@@ -95,9 +80,9 @@ module URI
|
|
95
80
|
|
96
81
|
Validation::Label.check_length(a_ulabel) if options.verify_dns_length?
|
97
82
|
|
98
|
-
if alabel && ulabel &&
|
83
|
+
if alabel && ulabel && a_ulabel != alabel
|
99
84
|
raise Error,
|
100
|
-
"Provided alabel #{alabel.inspect} doesn't match de-punycoded ulabel #{
|
85
|
+
"Provided alabel #{alabel.inspect} doesn't match de-punycoded ulabel #{a_ulabel.inspect}"
|
101
86
|
end
|
102
87
|
|
103
88
|
a_ulabel
|
data/lib/uri/idna/punycode.rb
CHANGED
@@ -13,6 +13,7 @@ module URI
|
|
13
13
|
DAMP = 700
|
14
14
|
INITIAL_BIAS = 72
|
15
15
|
INITIAL_N = 0x80
|
16
|
+
ADAPT_THRESHOLD = ((BASE - TMIN) * TMAX) / 2
|
16
17
|
|
17
18
|
DELIMITER = 0x2D
|
18
19
|
MAXINT = 0x7FFFFFFF
|
@@ -30,7 +31,9 @@ module URI
|
|
30
31
|
end
|
31
32
|
|
32
33
|
def encode_digit(d)
|
33
|
-
d + 22
|
34
|
+
return d + 22 if d >= 26
|
35
|
+
|
36
|
+
d + 97
|
34
37
|
end
|
35
38
|
|
36
39
|
def adapt(delta, num_points, first_time)
|
@@ -38,7 +41,7 @@ module URI
|
|
38
41
|
delta += (delta / num_points)
|
39
42
|
|
40
43
|
k = 0
|
41
|
-
while delta >
|
44
|
+
while delta > ADAPT_THRESHOLD
|
42
45
|
delta /= BASE - TMIN
|
43
46
|
k += BASE
|
44
47
|
end
|
@@ -47,18 +50,17 @@ module URI
|
|
47
50
|
|
48
51
|
def encode(input)
|
49
52
|
input = input.codepoints
|
50
|
-
output = []
|
51
53
|
|
52
54
|
n = INITIAL_N
|
53
55
|
delta = 0
|
54
56
|
bias = INITIAL_BIAS
|
55
57
|
|
56
|
-
input.
|
58
|
+
output = input.select { |cp| cp < 0x80 }
|
57
59
|
h = b = output.length
|
58
60
|
|
59
61
|
output << DELIMITER if b > 0
|
60
|
-
|
61
|
-
while h <
|
62
|
+
input_length = input.length
|
63
|
+
while h < input_length
|
62
64
|
m = MAXINT
|
63
65
|
input.each do |cp|
|
64
66
|
m = cp if cp >= n && cp < m
|
@@ -116,15 +118,15 @@ module URI
|
|
116
118
|
|
117
119
|
b = input.rindex(DELIMITER) || 0
|
118
120
|
|
119
|
-
0.
|
120
|
-
cp = input[idx]
|
121
|
+
input[0, b].each do |cp|
|
121
122
|
raise PunycodeError, "Invalid input" unless cp < 0x80
|
122
123
|
|
123
124
|
output << cp
|
124
125
|
end
|
125
126
|
|
126
127
|
inc = b > 0 ? b + 1 : 0
|
127
|
-
|
128
|
+
input_length = input.length
|
129
|
+
while inc < input_length
|
128
130
|
old_i = i
|
129
131
|
w = 1
|
130
132
|
k = BASE
|
@@ -8,52 +8,54 @@ module URI
|
|
8
8
|
# https://www.unicode.org/reports/tr46/#IDNA_Mapping_Table
|
9
9
|
module Mapping
|
10
10
|
class << self
|
11
|
+
STATUS_D_REGEX = Regexp.new(REGEX_D_STRING, Regexp::EXTENDED).freeze
|
12
|
+
REGEX_STD3_M_REGEX = Regexp.new(REGEX_STD3_M_STRING, Regexp::EXTENDED).freeze
|
13
|
+
|
14
|
+
MAP_REGEX = Regexp.new("#{REGEX_M_STRING}|#{REGEX_I_STRING}").freeze
|
15
|
+
REGEX_NOT_V = Regexp.new("[^#{REGEX_V_STRING}]").freeze
|
16
|
+
REGEX_NOT_VD = Regexp.new("[^#{REGEX_V_STRING}|#{REGEX_D_STRING}]").freeze
|
17
|
+
REGEX_NOT_V3 = Regexp.new("[^#{REGEX_V_STRING}|#{REGEX_STD3_M_STRING}|#{REGEX_STD3_V_STRING}]").freeze
|
18
|
+
REGEX_NOT_VD3 = Regexp.new(
|
19
|
+
"[^#{REGEX_V_STRING}|#{REGEX_D_STRING}|#{REGEX_STD3_M_STRING}|#{REGEX_STD3_V_STRING}]",
|
20
|
+
).freeze
|
21
|
+
|
11
22
|
def call(domain_name, transitional_processing: false, use_std3_ascii_rules: true)
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
output += if transitional_processing && codepoint == 7838
|
20
|
-
[115, 115]
|
21
|
-
else
|
22
|
-
replacement
|
23
|
-
end
|
24
|
-
when "D" # deviation
|
25
|
-
if transitional_processing
|
26
|
-
output += replacement
|
27
|
-
else
|
28
|
-
output << codepoint
|
29
|
-
end
|
30
|
-
when "3" # disallowed_STD3_valid, disallowed_STD3_mapped
|
31
|
-
if use_std3_ascii_rules || !replacement
|
32
|
-
output << codepoint
|
33
|
-
else
|
34
|
-
output += replacement
|
35
|
-
end
|
36
|
-
when "I" # ignored
|
37
|
-
next
|
23
|
+
return domain_name.downcase if domain_name.ascii_only?
|
24
|
+
|
25
|
+
output = domain_name.gsub(MAP_REGEX) do |match|
|
26
|
+
if transitional_processing && match == "\u1E9E"
|
27
|
+
"ss"
|
28
|
+
else
|
29
|
+
REPLACEMENTS[match]
|
38
30
|
end
|
39
31
|
end
|
40
|
-
output.
|
32
|
+
output.gsub!(STATUS_D_REGEX, REPLACEMENTS) if transitional_processing
|
33
|
+
output.gsub!(REGEX_STD3_M_REGEX, REPLACEMENTS) unless use_std3_ascii_rules
|
34
|
+
|
35
|
+
output.ascii_only? ? output : output.unicode_normalize!(:nfc)
|
41
36
|
end
|
42
37
|
|
43
|
-
def
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
38
|
+
def validate_label_status(label, transitional_processing:, use_std3_ascii_rules:)
|
39
|
+
regex =
|
40
|
+
if transitional_processing && use_std3_ascii_rules
|
41
|
+
REGEX_NOT_V
|
42
|
+
elsif transitional_processing
|
43
|
+
REGEX_NOT_V3
|
44
|
+
elsif use_std3_ascii_rules
|
45
|
+
REGEX_NOT_VD
|
46
|
+
else
|
47
|
+
REGEX_NOT_VD3
|
48
|
+
end
|
49
|
+
|
50
|
+
return unless (pos = label.index(regex))
|
48
51
|
|
49
|
-
raise InvalidCodepointError,
|
52
|
+
raise InvalidCodepointError, error_message(label, pos)
|
50
53
|
end
|
51
54
|
|
52
|
-
|
53
|
-
return UTS46_DATA[codepoint] if codepoint < 256
|
55
|
+
private
|
54
56
|
|
55
|
-
|
56
|
-
|
57
|
+
def error_message(label, pos)
|
58
|
+
format("Codepoint U+%04X at position %d of %p not allowed in UTS46", label[pos].ord, pos + 1, label)
|
57
59
|
end
|
58
60
|
end
|
59
61
|
end
|
@@ -2,12 +2,18 @@
|
|
2
2
|
|
3
3
|
require_relative "mapping"
|
4
4
|
require_relative "options"
|
5
|
+
require_relative "../validation/contextj"
|
6
|
+
require_relative "../validation/leading_combining"
|
5
7
|
|
6
8
|
module URI
|
7
9
|
module IDNA
|
8
10
|
module UTS46
|
9
11
|
# https://www.unicode.org/reports/tr46/#Processing
|
10
12
|
class Processing < BaseProcessing
|
13
|
+
def self.options_class
|
14
|
+
Options
|
15
|
+
end
|
16
|
+
|
11
17
|
def initialize(domain_name, **options)
|
12
18
|
super
|
13
19
|
@domain_name = Mapping.call(
|
@@ -40,10 +46,6 @@ module URI
|
|
40
46
|
|
41
47
|
private
|
42
48
|
|
43
|
-
def options_class
|
44
|
-
Options
|
45
|
-
end
|
46
|
-
|
47
49
|
# https://www.unicode.org/reports/tr46/#Validity_Criteria
|
48
50
|
def validate(label, transitional_processing: options.transitional_processing?)
|
49
51
|
return if label.empty?
|
@@ -56,16 +58,13 @@ module URI
|
|
56
58
|
Validation::Label.check_ace_prefix(label)
|
57
59
|
end
|
58
60
|
Validation::Label.check_dot(label)
|
59
|
-
Validation::
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
Validation::Codepoint.check_contextj(label, cp, pos) if options.check_joiners?
|
68
|
-
end
|
61
|
+
Validation::LeadingCombining.call(label)
|
62
|
+
Mapping.validate_label_status(
|
63
|
+
label,
|
64
|
+
transitional_processing: transitional_processing,
|
65
|
+
use_std3_ascii_rules: options.use_std3_ascii_rules?,
|
66
|
+
)
|
67
|
+
Validation::ContextJ.call(label) if options.check_joiners?
|
69
68
|
Validation::Bidi.call(label) if check_bidi?
|
70
69
|
end
|
71
70
|
end
|
@@ -76,7 +75,7 @@ module URI
|
|
76
75
|
|
77
76
|
# https://www.unicode.org/reports/tr46/#ToASCII
|
78
77
|
class ToASCII < Processing
|
79
|
-
def options_class
|
78
|
+
def self.options_class
|
80
79
|
ToASCIIOptions
|
81
80
|
end
|
82
81
|
|
@@ -1,5 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require_relative "../data/bidi_classes"
|
4
|
+
|
3
5
|
module URI
|
4
6
|
module IDNA
|
5
7
|
module Validation
|
@@ -8,55 +10,46 @@ module URI
|
|
8
10
|
# https://datatracker.ietf.org/doc/html/rfc5893#section-2
|
9
11
|
module Bidi
|
10
12
|
class << self
|
13
|
+
BIDI_R1_RTL = Regexp.new(BIDI_CLASSES["RTL"]).freeze
|
14
|
+
BIDI_R1_LTR = Regexp.new(BIDI_CLASSES["L"]).freeze
|
15
|
+
BIDI_R2 = Regexp.new("#{BIDI_CLASSES['L']}|#{BIDI_CLASSES['UNUSED']}").freeze
|
16
|
+
BIDI_R3 = Regexp.new(
|
17
|
+
"(?:#{"#{BIDI_CLASSES['RTL']}|#{BIDI_CLASSES['AN']}|#{BIDI_CLASSES['EN']}"})#{BIDI_CLASSES['NSM']}*\\z",
|
18
|
+
).freeze
|
19
|
+
BIDI_R4_EN = Regexp.new(BIDI_CLASSES["EN"]).freeze
|
20
|
+
BIDI_R4_AN = Regexp.new(BIDI_CLASSES["AN"]).freeze
|
21
|
+
BIDI_R5 = Regexp.new("#{BIDI_CLASSES['RTL']}|#{BIDI_CLASSES['AN']}").freeze
|
22
|
+
BIDI_R6 = Regexp.new("(?:#{"#{BIDI_CLASSES['L']}|#{BIDI_CLASSES['EN']}"})#{BIDI_CLASSES['NSM']}*\\z").freeze
|
23
|
+
|
11
24
|
def call(label)
|
12
25
|
# Bidi rule 1
|
13
|
-
if
|
14
|
-
rtl = true
|
15
|
-
elsif bidi_class(label[0].ord, "L")
|
26
|
+
if BIDI_R1_LTR.match?(label[0])
|
16
27
|
rtl = false
|
28
|
+
elsif BIDI_R1_RTL.match?(label[0])
|
29
|
+
rtl = true
|
17
30
|
else
|
18
31
|
raise BidiError, "First codepoint in label #{label} must be directionality L, R or AL"
|
19
32
|
end
|
20
33
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
# Bidi rule 2
|
26
|
-
if bidi_class(cp, "L") || bidi_class(cp, "UNUSED")
|
27
|
-
raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a right-to-left label"
|
28
|
-
end
|
29
|
-
|
30
|
-
# Bidi rule 3
|
31
|
-
direction = bidi_class(cp, "RTL") || bidi_class(cp, "EN") || bidi_class(cp, "AN")
|
32
|
-
if direction
|
33
|
-
valid_ending = true
|
34
|
-
elsif !bidi_class(cp, "NSM")
|
35
|
-
valid_ending = false
|
36
|
-
end
|
37
|
-
# Bidi rule 4
|
38
|
-
if %w[EN AN].include?(direction)
|
39
|
-
number_type ||= direction
|
40
|
-
raise BidiError, "Can not mix numeral types in a right-to-left label" if number_type != direction
|
41
|
-
end
|
42
|
-
else
|
43
|
-
# Bidi rule 5
|
44
|
-
if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
|
45
|
-
raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a left-to-right label"
|
46
|
-
end
|
47
|
-
|
48
|
-
# Bidi rule 6
|
49
|
-
if bidi_class(cp, "L") || bidi_class(cp, "EN")
|
50
|
-
valid_ending = true
|
51
|
-
elsif !bidi_class(cp, "NSM")
|
52
|
-
valid_ending = false
|
53
|
-
end
|
34
|
+
if rtl
|
35
|
+
# Bidi rule 2
|
36
|
+
if (pos = label.index(BIDI_R2))
|
37
|
+
raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a right-to-left label"
|
54
38
|
end
|
39
|
+
# Bidi rule 3
|
40
|
+
raise BidiError, "Label ends with illegal codepoint directionality" unless label.match?(BIDI_R3)
|
41
|
+
# Bidi rule 4
|
42
|
+
if label.match?(BIDI_R4_EN) && label.match?(BIDI_R4_AN)
|
43
|
+
raise BidiError, "Can not mix numeral types in a right-to-left label"
|
44
|
+
end
|
45
|
+
else
|
46
|
+
# Bidi rule 5
|
47
|
+
if (pos = label.index(BIDI_R5))
|
48
|
+
raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a left-to-right label"
|
49
|
+
end
|
50
|
+
# Bidi rule 6
|
51
|
+
raise BidiError, "Label ends with illegal codepoint directionality" unless label.match?(BIDI_R6)
|
55
52
|
end
|
56
|
-
|
57
|
-
raise BidiError, "Label ends with illegal codepoint directionality" unless valid_ending
|
58
|
-
|
59
|
-
true
|
60
53
|
end
|
61
54
|
|
62
55
|
# https://www.rfc-editor.org/rfc/rfc5891.html#section-4.2.3.4
|
@@ -71,22 +64,11 @@ module URI
|
|
71
64
|
end
|
72
65
|
next if label.ascii_only?
|
73
66
|
|
74
|
-
label.
|
75
|
-
next if cp < 256
|
76
|
-
return true if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
|
77
|
-
end
|
67
|
+
return true if label.match?(BIDI_R5)
|
78
68
|
end
|
79
69
|
|
80
70
|
false
|
81
71
|
end
|
82
|
-
|
83
|
-
private
|
84
|
-
|
85
|
-
def bidi_class(codepoint, bidi_class)
|
86
|
-
return bidi_class if Intranges.contain?(codepoint, BIDI_CLASSES[bidi_class])
|
87
|
-
|
88
|
-
false
|
89
|
-
end
|
90
72
|
end
|
91
73
|
end
|
92
74
|
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../data/codepoint_classes"
|
4
|
+
require_relative "../data/joining_types"
|
5
|
+
require_relative "../data/virama_combining_classes"
|
6
|
+
|
7
|
+
module URI
|
8
|
+
module IDNA
|
9
|
+
module Validation
|
10
|
+
# https://datatracker.ietf.org/doc/html/rfc5892
|
11
|
+
module ContextJ
|
12
|
+
class << self
|
13
|
+
CONTEXTJ_REGEX = Regexp.new("[#{CODEPOINT_CLASSES['CONTEXTJ']}]").freeze
|
14
|
+
|
15
|
+
def call(label)
|
16
|
+
return if label.ascii_only?
|
17
|
+
|
18
|
+
offset = 0
|
19
|
+
while (pos = label.index(CONTEXTJ_REGEX, offset))
|
20
|
+
raise InvalidCodepointContextError, error_message(label, pos) unless valid_contextj?(label, pos)
|
21
|
+
|
22
|
+
offset = pos + 1
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def valid_contextj?(label, pos)
|
29
|
+
case label[pos]
|
30
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
|
31
|
+
when "\u200c"
|
32
|
+
return true if pos > 0 && VIRAMA_COMBINING_CLASSES.match?(label[pos - 1])
|
33
|
+
|
34
|
+
ok = false
|
35
|
+
(pos - 1).downto(0) do |i|
|
36
|
+
joining_type = JOINING_TYPES[label[i]]
|
37
|
+
if [0x4c, 0x44].include?(joining_type)
|
38
|
+
ok = true
|
39
|
+
break
|
40
|
+
end
|
41
|
+
end
|
42
|
+
return false unless ok
|
43
|
+
|
44
|
+
(pos + 1).upto(label.length - 1) do |i|
|
45
|
+
joining_type = JOINING_TYPES[label[i]]
|
46
|
+
return true if [0x52, 0x44].include?(joining_type)
|
47
|
+
end
|
48
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
|
49
|
+
when "\u200d"
|
50
|
+
return VIRAMA_COMBINING_CLASSES.match?(label[pos - 1]) if pos > 0
|
51
|
+
end
|
52
|
+
false
|
53
|
+
end
|
54
|
+
|
55
|
+
def error_message(label, pos)
|
56
|
+
format("ContextJ codepoint U+%04X at position %d of %p not allowed", label[pos].ord, pos + 1, label)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../data/codepoint_classes"
|
4
|
+
require_relative "../data/scripts"
|
5
|
+
|
6
|
+
module URI
|
7
|
+
module IDNA
|
8
|
+
module Validation
|
9
|
+
# https://datatracker.ietf.org/doc/html/rfc5892
|
10
|
+
module ContextO
|
11
|
+
class << self
|
12
|
+
CONTEXTO_REGEX = Regexp.new("[#{CODEPOINT_CLASSES['CONTEXTO']}]").freeze
|
13
|
+
CONTEXTO_A4_REGEX = Regexp.new(SCRIPTS["Greek"])
|
14
|
+
CONTEXTO_A5_REGEX = Regexp.new(SCRIPTS["Hebrew"])
|
15
|
+
CONTEXTO_A7_REGEX = Regexp.new("#{SCRIPTS['Hiragana']}|#{SCRIPTS['Katakana']}|#{SCRIPTS['Han']}").freeze
|
16
|
+
CONTEXTO_A8_REGEX = /[\u06F0-\u06F9]/.freeze
|
17
|
+
CONTEXTO_A9_REGEX = /[\u0660-\u0669]/.freeze
|
18
|
+
|
19
|
+
def call(label)
|
20
|
+
offset = 0
|
21
|
+
while (pos = label.index(CONTEXTO_REGEX, offset))
|
22
|
+
raise InvalidCodepointContextError, error_message(label, pos) unless valid_contexto?(label, pos)
|
23
|
+
|
24
|
+
offset = pos + 1
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def valid_contexto?(label, pos)
|
31
|
+
case label[pos]
|
32
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
|
33
|
+
when "\u00b7"
|
34
|
+
pos > 0 && pos < label.length - 1 ? (label[pos - 1] == "\u006c" && label[pos + 1] == "\u006c") : false
|
35
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
|
36
|
+
when "\u0375"
|
37
|
+
pos < label.length - 1 ? CONTEXTO_A4_REGEX.match?(label[pos + 1]) : false
|
38
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
|
39
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
|
40
|
+
when "\u05f3", "\u05f4"
|
41
|
+
pos > 0 ? CONTEXTO_A5_REGEX.match?(label[pos - 1]) : false
|
42
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
|
43
|
+
when "\u30fb"
|
44
|
+
CONTEXTO_A7_REGEX.match?(label)
|
45
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
|
46
|
+
when "\u0660".."\u0669"
|
47
|
+
!CONTEXTO_A8_REGEX.match?(label)
|
48
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
|
49
|
+
when "\u06f0".."\u06f9"
|
50
|
+
!CONTEXTO_A9_REGEX.match?(label)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def error_message(label, pos)
|
55
|
+
format("ContextO codepoint U+%04X at position %d of %p not allowed", label[pos].ord, pos + 1, label)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../data/codepoint_classes"
|
4
|
+
|
5
|
+
module URI
|
6
|
+
module IDNA
|
7
|
+
module Validation
|
8
|
+
module IDNAPermitted
|
9
|
+
class << self
|
10
|
+
IDNA_REGEX = Regexp.new(
|
11
|
+
"[^(#{CODEPOINT_CLASSES['PVALID']}|#{CODEPOINT_CLASSES['CONTEXTJ']}|#{CODEPOINT_CLASSES['CONTEXTO']})]",
|
12
|
+
).freeze
|
13
|
+
|
14
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.2
|
15
|
+
def call(label)
|
16
|
+
return unless (pos = label.index(IDNA_REGEX))
|
17
|
+
|
18
|
+
raise InvalidCodepointError, error_message(label, pos)
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def error_message(label, pos)
|
24
|
+
format("Codepoint U+%04X at position %d of %p not allowed in IDNA2008", label[pos].ord, pos + 1, label)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -1,8 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative "../intranges"
|
4
|
-
require_relative "../data/idna"
|
5
|
-
|
6
3
|
module URI
|
7
4
|
module IDNA
|
8
5
|
module Validation
|
@@ -11,7 +8,7 @@ module URI
|
|
11
8
|
# 4.1. Input to IDNA Registration
|
12
9
|
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.1
|
13
10
|
def check_nfc(label)
|
14
|
-
return if label.unicode_normalized?(:nfc)
|
11
|
+
return if label.ascii_only? || label.unicode_normalized?(:nfc)
|
15
12
|
|
16
13
|
raise Error, "Label must be in Unicode Normalization Form NFC"
|
17
14
|
end
|
@@ -38,16 +35,6 @@ module URI
|
|
38
35
|
raise Error, "Label must not begin with `xn--`"
|
39
36
|
end
|
40
37
|
|
41
|
-
# 4.2.3.2. Leading Combining Marks
|
42
|
-
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
|
43
|
-
def check_leading_combining(label)
|
44
|
-
cp = label[0].ord
|
45
|
-
return if cp < 256
|
46
|
-
return unless Intranges.contain?(cp, INITIAL_COMBINERS)
|
47
|
-
|
48
|
-
raise Error, "Label begins with an illegal combining character"
|
49
|
-
end
|
50
|
-
|
51
38
|
def check_dot(label)
|
52
39
|
raise Error, "Label must not contain a U+002E ( . ) FULL STOP" if label.include?(".")
|
53
40
|
end
|