uri-idna 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -1
- data/README.md +149 -45
- data/lib/uri/idna/base_processing.rb +65 -0
- data/lib/uri/idna/data/idna.rb +11 -6
- data/lib/uri/idna/data/uts46.rb +4 -6
- data/lib/uri/idna/idna2008/options.rb +59 -0
- data/lib/uri/idna/idna2008/processing.rb +158 -0
- data/lib/uri/idna/intranges.rb +12 -4
- data/lib/uri/idna/punycode.rb +11 -15
- data/lib/uri/idna/uts46/mapping.rb +61 -0
- data/lib/uri/idna/uts46/options.rb +75 -0
- data/lib/uri/idna/uts46/processing.rb +98 -0
- data/lib/uri/idna/validation/bidi.rb +14 -13
- data/lib/uri/idna/validation/codepoint.rb +122 -0
- data/lib/uri/idna/validation/label.rb +70 -0
- data/lib/uri/idna/version.rb +1 -1
- data/lib/uri/idna/whatwg/processing.rb +35 -0
- data/lib/uri/idna.rb +30 -24
- data/lib/uri-idna.rb +3 -0
- metadata +12 -5
- data/lib/uri/idna/process.rb +0 -139
- data/lib/uri/idna/uts46.rb +0 -60
- data/lib/uri/idna/validation.rb +0 -199
@@ -0,0 +1,158 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "options"
|
4
|
+
|
5
|
+
module URI
|
6
|
+
module IDNA
|
7
|
+
module IDNA2008
|
8
|
+
class Processing < BaseProcessing
|
9
|
+
private
|
10
|
+
|
11
|
+
def options_class
|
12
|
+
Options
|
13
|
+
end
|
14
|
+
|
15
|
+
def validate(label)
|
16
|
+
return if label.empty?
|
17
|
+
|
18
|
+
Validation::Label.check_nfc(label)
|
19
|
+
if options.check_hyphens?
|
20
|
+
Validation::Label.check_hyphen34(label)
|
21
|
+
else
|
22
|
+
Validation::Label.check_ace_prefix(label)
|
23
|
+
end
|
24
|
+
Validation::Label.check_leading_combining(label) if options.leading_combining?
|
25
|
+
|
26
|
+
label.each_codepoint.with_index do |cp, pos|
|
27
|
+
begin
|
28
|
+
next if Validation::Codepoint.check_contextj(label, cp, pos)
|
29
|
+
rescue InvalidCodepointContextError => e
|
30
|
+
next unless options.check_joiners?
|
31
|
+
|
32
|
+
raise e
|
33
|
+
end
|
34
|
+
|
35
|
+
begin
|
36
|
+
next if Validation::Codepoint.check_contexto(label, cp, pos)
|
37
|
+
rescue InvalidCodepointContextError => e
|
38
|
+
next unless options.check_others?
|
39
|
+
|
40
|
+
raise e
|
41
|
+
end
|
42
|
+
|
43
|
+
Validation::Codepoint.check_idna_validity(label, cp, pos)
|
44
|
+
end
|
45
|
+
Validation::Bidi.call(label) if check_bidi?
|
46
|
+
end
|
47
|
+
|
48
|
+
def check_bidi?
|
49
|
+
return @check_bidi if instance_variable_defined?(:@check_bidi)
|
50
|
+
|
51
|
+
@check_bidi = options.check_bidi? && Validation::Bidi.check?(domain_name)
|
52
|
+
end
|
53
|
+
|
54
|
+
def punycode_decode(label)
|
55
|
+
return label unless label.start_with?(ACE_PREFIX)
|
56
|
+
|
57
|
+
raise Error, "A-label must not end with a hyphen" if label[-1] == "-"
|
58
|
+
|
59
|
+
super
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4
|
64
|
+
class Registration < Processing
|
65
|
+
def initialize(alabel: nil, ulabel: nil, **options)
|
66
|
+
raise ArgumentError, "Provide alabel or ulabel" if alabel.nil? && ulabel.nil?
|
67
|
+
|
68
|
+
@alabel = alabel
|
69
|
+
@ulabel = ulabel
|
70
|
+
|
71
|
+
super(ulabel || alabel, **options)
|
72
|
+
end
|
73
|
+
|
74
|
+
def call
|
75
|
+
alabels, alabel_trailing_dot = split_domain(alabel.encode("UTF-8").unicode_normalize(:nfc)) if alabel
|
76
|
+
ulabels, ulabel_trailing_dot = split_domain(ulabel.encode("UTF-8").unicode_normalize(:nfc)) if ulabel
|
77
|
+
|
78
|
+
if alabels && ulabels && (alabels.size != ulabels.size || alabel_trailing_dot != ulabel_trailing_dot)
|
79
|
+
raise Error, "alabel doesn't match ulabel"
|
80
|
+
end
|
81
|
+
|
82
|
+
trailing_dot = alabel_trailing_dot || ulabel_trailing_dot
|
83
|
+
size = (alabels || ulabels).size
|
84
|
+
|
85
|
+
result = Array.new(size) do |i|
|
86
|
+
alabel = alabels&.[](i)
|
87
|
+
ulabel = ulabels&.[](i)
|
88
|
+
|
89
|
+
raise Error, "Provided alabel must be downcased" if alabel && alabel.downcase != alabel
|
90
|
+
|
91
|
+
if alabel
|
92
|
+
u_alabel = punycode_decode(alabel)
|
93
|
+
if ulabel && u_alabel != ulabel
|
94
|
+
raise Error,
|
95
|
+
"Provided ulabel #{ulabel.inspect} doesn't match punycoded alabel #{u_alabel.inspect}"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
validate(ulabel || punycode_decode(alabel))
|
100
|
+
a_ulabel = punycode_encode(ulabel || punycode_decode(alabel))
|
101
|
+
|
102
|
+
Validation::Label.check_length(a_ulabel) if options.verify_dns_length?
|
103
|
+
|
104
|
+
if alabel && ulabel && (a_ulabel != alabel) && (a_ulabel != alabel)
|
105
|
+
raise Error,
|
106
|
+
"Provided alabel #{alabel.inspect} doesn't match de-punycoded ulabel #{u_alabel.inspect}"
|
107
|
+
end
|
108
|
+
|
109
|
+
a_ulabel
|
110
|
+
end
|
111
|
+
|
112
|
+
result = join_labels(result, trailing_dot)
|
113
|
+
|
114
|
+
Validation::Label.check_domain_length(result) if options.verify_dns_length?
|
115
|
+
result
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
attr_reader :ulabel, :alabel
|
121
|
+
|
122
|
+
def validate(label)
|
123
|
+
Validation::Label.check_hyphen_sides(label) if options.check_hyphens?
|
124
|
+
super
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# # https://datatracker.ietf.org/doc/html/rfc5891#section-5
|
129
|
+
class Lookup < Processing
|
130
|
+
def call
|
131
|
+
domain = domain_name.encode("UTF-8").unicode_normalize(:nfc)
|
132
|
+
|
133
|
+
result = process_labels(domain) do |label|
|
134
|
+
orig_label = label
|
135
|
+
alabel_input = label.start_with?(ACE_PREFIX)
|
136
|
+
|
137
|
+
label = punycode_decode(label)
|
138
|
+
|
139
|
+
validate(label)
|
140
|
+
|
141
|
+
label = punycode_encode(label)
|
142
|
+
|
143
|
+
Validation::Label.check_length(label) if options.verify_dns_length?
|
144
|
+
|
145
|
+
if alabel_input && orig_label != label
|
146
|
+
raise Error, "Resulting label #{label.inspect} doesn't match initial label #{orig_label.inspect}"
|
147
|
+
end
|
148
|
+
|
149
|
+
label
|
150
|
+
end
|
151
|
+
|
152
|
+
Validation::Label.check_domain_length(result) if options.verify_dns_length?
|
153
|
+
result
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
data/lib/uri/idna/intranges.rb
CHANGED
@@ -23,12 +23,14 @@ module URI
|
|
23
23
|
# we could be immediately ahead of a tuple (start, end)
|
24
24
|
# with start < int_ <= end
|
25
25
|
if pos > 0
|
26
|
-
|
26
|
+
r = ranges[pos - 1]
|
27
|
+
left = decode_left(r)
|
28
|
+
right = decode_right(r)
|
27
29
|
return true if left <= int && int < right
|
28
30
|
end
|
29
31
|
# or we could be immediately behind a tuple (int_, end)
|
30
32
|
if pos < ranges.length
|
31
|
-
left
|
33
|
+
left = decode_left(ranges[pos])
|
32
34
|
return true if left == int
|
33
35
|
end
|
34
36
|
false
|
@@ -40,8 +42,14 @@ module URI
|
|
40
42
|
(start << 32) | finish
|
41
43
|
end
|
42
44
|
|
43
|
-
|
44
|
-
|
45
|
+
MASK = ((1 << 32) - 1)
|
46
|
+
|
47
|
+
def decode_right(r)
|
48
|
+
r & MASK
|
49
|
+
end
|
50
|
+
|
51
|
+
def decode_left(r)
|
52
|
+
r >> 32
|
45
53
|
end
|
46
54
|
end
|
47
55
|
end
|
data/lib/uri/idna/punycode.rb
CHANGED
@@ -47,20 +47,21 @@ module URI
|
|
47
47
|
|
48
48
|
def encode(input)
|
49
49
|
input = input.codepoints
|
50
|
+
output = []
|
50
51
|
|
51
52
|
n = INITIAL_N
|
52
53
|
delta = 0
|
53
54
|
bias = INITIAL_BIAS
|
54
55
|
|
55
|
-
|
56
|
+
input.each { |cp| output << cp if cp < 0x80 }
|
56
57
|
h = b = output.length
|
57
58
|
|
58
59
|
output << DELIMITER if b > 0
|
59
60
|
|
60
61
|
while h < input.length
|
61
62
|
m = MAXINT
|
62
|
-
input.each do |
|
63
|
-
m =
|
63
|
+
input.each do |cp|
|
64
|
+
m = cp if cp >= n && cp < m
|
64
65
|
end
|
65
66
|
|
66
67
|
raise PunycodeError, "Arithmetic overflow" if m - n > (MAXINT - delta) / (h + 1)
|
@@ -68,12 +69,12 @@ module URI
|
|
68
69
|
delta += (m - n) * (h + 1)
|
69
70
|
n = m
|
70
71
|
|
71
|
-
input.each do |
|
72
|
-
if
|
72
|
+
input.each do |cp|
|
73
|
+
if cp < n
|
73
74
|
delta += 1
|
74
75
|
raise PunycodeError, "Arithmetic overflow" if delta > MAXINT
|
75
76
|
end
|
76
|
-
next unless
|
77
|
+
next unless cp == n
|
77
78
|
|
78
79
|
q = delta
|
79
80
|
k = BASE
|
@@ -115,10 +116,11 @@ module URI
|
|
115
116
|
|
116
117
|
b = input.rindex(DELIMITER) || 0
|
117
118
|
|
118
|
-
|
119
|
-
|
119
|
+
0.upto(b - 1) do |idx|
|
120
|
+
cp = input[idx]
|
121
|
+
raise PunycodeError, "Invalid input" unless cp < 0x80
|
120
122
|
|
121
|
-
output <<
|
123
|
+
output << cp
|
122
124
|
end
|
123
125
|
|
124
126
|
inc = b > 0 ? b + 1 : 0
|
@@ -162,12 +164,6 @@ module URI
|
|
162
164
|
|
163
165
|
output.pack("U*")
|
164
166
|
end
|
165
|
-
|
166
|
-
private
|
167
|
-
|
168
|
-
def basic?(codepoint)
|
169
|
-
codepoint < 0x80
|
170
|
-
end
|
171
167
|
end
|
172
168
|
end
|
173
169
|
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../data/uts46"
|
4
|
+
|
5
|
+
module URI
|
6
|
+
module IDNA
|
7
|
+
module UTS46
|
8
|
+
# https://www.unicode.org/reports/tr46/#IDNA_Mapping_Table
|
9
|
+
module Mapping
|
10
|
+
class << self
|
11
|
+
def call(domain_name, transitional_processing: false, use_std3_ascii_rules: true)
|
12
|
+
output = +""
|
13
|
+
domain_name.each_codepoint do |codepoint|
|
14
|
+
_, status, replacement = status(codepoint)
|
15
|
+
case status
|
16
|
+
when "V", "X" # valid, disallowed
|
17
|
+
output << codepoint.chr(Encoding::UTF_8)
|
18
|
+
when "M" # mapped
|
19
|
+
output << if transitional_processing && codepoint == 7838
|
20
|
+
"ss"
|
21
|
+
else
|
22
|
+
replacement
|
23
|
+
end
|
24
|
+
when "D" # deviation
|
25
|
+
output << (transitional_processing ? replacement : codepoint.chr(Encoding::UTF_8))
|
26
|
+
when "3" # disallowed_STD3_valid, disallowed_STD3_mapped
|
27
|
+
output << if use_std3_ascii_rules
|
28
|
+
codepoint.chr(Encoding::UTF_8)
|
29
|
+
else
|
30
|
+
(replacement || codepoint.chr(Encoding::UTF_8))
|
31
|
+
end
|
32
|
+
when "I" # ignored
|
33
|
+
next
|
34
|
+
end
|
35
|
+
end
|
36
|
+
output.unicode_normalize(:nfc)
|
37
|
+
end
|
38
|
+
|
39
|
+
def validate_status(label, cp, pos, transitional_processing:, use_std3_ascii_rules:)
|
40
|
+
_, status, = status(cp)
|
41
|
+
return if status == "V"
|
42
|
+
return if !transitional_processing && status == "D"
|
43
|
+
return if !use_std3_ascii_rules && status == "3"
|
44
|
+
|
45
|
+
raise InvalidCodepointError, Validation::Codepoint.cp_error_message(label, cp, pos)
|
46
|
+
end
|
47
|
+
|
48
|
+
def status(codepoint)
|
49
|
+
index =
|
50
|
+
if codepoint < 256
|
51
|
+
codepoint
|
52
|
+
else
|
53
|
+
(UTS46_DATA.bsearch_index { |x| x[0] > codepoint } || UTS46_DATA.length) - 1
|
54
|
+
end
|
55
|
+
UTS46_DATA[index] || []
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module URI
|
4
|
+
module IDNA
|
5
|
+
module UTS46
|
6
|
+
# Options for ToUnicode operation
|
7
|
+
# https://www.unicode.org/reports/tr46/#ToUnicode
|
8
|
+
class Options
|
9
|
+
USE_STD3_ASCII_RULES = 1 << 0
|
10
|
+
CHECK_HYPHENS = 1 << 1
|
11
|
+
CHECK_BIDI = 1 << 2
|
12
|
+
CHECK_JOINERS = 1 << 3
|
13
|
+
TRANSITIONAL_PROCESSING = 1 << 4
|
14
|
+
IGNORE_INVALID_PUNYCODE = 1 << 5
|
15
|
+
|
16
|
+
def initialize(
|
17
|
+
use_std3_ascii_rules: true,
|
18
|
+
check_hyphens: true,
|
19
|
+
check_bidi: true,
|
20
|
+
check_joiners: true,
|
21
|
+
transitional_processing: false,
|
22
|
+
ignore_invalid_punycode: false
|
23
|
+
)
|
24
|
+
@flags = 0
|
25
|
+
@flags |= USE_STD3_ASCII_RULES if use_std3_ascii_rules
|
26
|
+
@flags |= CHECK_HYPHENS if check_hyphens
|
27
|
+
@flags |= CHECK_BIDI if check_bidi
|
28
|
+
@flags |= CHECK_JOINERS if check_joiners
|
29
|
+
@flags |= TRANSITIONAL_PROCESSING if transitional_processing
|
30
|
+
@flags |= IGNORE_INVALID_PUNYCODE if ignore_invalid_punycode
|
31
|
+
end
|
32
|
+
|
33
|
+
def use_std3_ascii_rules?
|
34
|
+
(@flags & USE_STD3_ASCII_RULES) != 0
|
35
|
+
end
|
36
|
+
|
37
|
+
def check_hyphens?
|
38
|
+
(@flags & CHECK_HYPHENS) != 0
|
39
|
+
end
|
40
|
+
|
41
|
+
def check_bidi?
|
42
|
+
(@flags & CHECK_BIDI) != 0
|
43
|
+
end
|
44
|
+
|
45
|
+
def check_joiners?
|
46
|
+
(@flags & CHECK_JOINERS) != 0
|
47
|
+
end
|
48
|
+
|
49
|
+
def transitional_processing?
|
50
|
+
(@flags & TRANSITIONAL_PROCESSING) != 0
|
51
|
+
end
|
52
|
+
|
53
|
+
def ignore_invalid_punycode?
|
54
|
+
(@flags & IGNORE_INVALID_PUNYCODE) != 0
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Options for ToASCII operation
|
59
|
+
# https://www.unicode.org/reports/tr46/#ToASCII
|
60
|
+
class ToASCIIOptions < Options
|
61
|
+
VERIFY_DNS_LENGTH = 1 << 6
|
62
|
+
|
63
|
+
def initialize(verify_dns_length: true, **options)
|
64
|
+
@flags_extended = 0
|
65
|
+
@flags_extended |= VERIFY_DNS_LENGTH if verify_dns_length
|
66
|
+
super(**options)
|
67
|
+
end
|
68
|
+
|
69
|
+
def verify_dns_length?
|
70
|
+
(@flags_extended & VERIFY_DNS_LENGTH) != 0
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "mapping"
|
4
|
+
require_relative "options"
|
5
|
+
|
6
|
+
module URI
|
7
|
+
module IDNA
|
8
|
+
module UTS46
|
9
|
+
# https://www.unicode.org/reports/tr46/#Processing
|
10
|
+
class Processing < BaseProcessing
|
11
|
+
def call
|
12
|
+
domain = Mapping.call(
|
13
|
+
domain_name,
|
14
|
+
transitional_processing: options.transitional_processing?,
|
15
|
+
use_std3_ascii_rules: options.use_std3_ascii_rules?,
|
16
|
+
)
|
17
|
+
|
18
|
+
process_labels(domain) do |label|
|
19
|
+
if label.start_with?(ACE_PREFIX)
|
20
|
+
begin
|
21
|
+
label = punycode_decode(label)
|
22
|
+
rescue PunycodeError => e
|
23
|
+
next label if options.ignore_invalid_punycode?
|
24
|
+
|
25
|
+
raise e
|
26
|
+
end
|
27
|
+
validate(label, transitional_processing: false)
|
28
|
+
else
|
29
|
+
validate(label)
|
30
|
+
end
|
31
|
+
|
32
|
+
label = yield label if block_given?
|
33
|
+
|
34
|
+
label
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def options_class
|
41
|
+
Options
|
42
|
+
end
|
43
|
+
|
44
|
+
def check_bidi?
|
45
|
+
return @check_bidi if instance_variable_defined?(:@check_bidi)
|
46
|
+
|
47
|
+
@check_bidi = options.check_bidi? && Validation::Bidi.check?(domain_name)
|
48
|
+
end
|
49
|
+
|
50
|
+
# https://www.unicode.org/reports/tr46/#Validity_Criteria
|
51
|
+
def validate(label, transitional_processing: options.transitional_processing?)
|
52
|
+
return if label.empty?
|
53
|
+
|
54
|
+
Validation::Label.check_nfc(label)
|
55
|
+
if options.check_hyphens?
|
56
|
+
Validation::Label.check_hyphen34(label)
|
57
|
+
Validation::Label.check_hyphen_sides(label)
|
58
|
+
else
|
59
|
+
Validation::Label.check_ace_prefix(label)
|
60
|
+
end
|
61
|
+
Validation::Label.check_dot(label)
|
62
|
+
Validation::Label.check_leading_combining(label)
|
63
|
+
|
64
|
+
label.each_codepoint.with_index do |cp, pos|
|
65
|
+
Mapping.validate_status(
|
66
|
+
label, cp, pos,
|
67
|
+
transitional_processing: transitional_processing, use_std3_ascii_rules: options.use_std3_ascii_rules?
|
68
|
+
)
|
69
|
+
|
70
|
+
Validation::Codepoint.check_contextj(label, cp, pos) if options.check_joiners?
|
71
|
+
end
|
72
|
+
Validation::Bidi.call(label) if check_bidi?
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# https://www.unicode.org/reports/tr46/#ToUnicode
|
77
|
+
class ToUnicode < Processing
|
78
|
+
end
|
79
|
+
|
80
|
+
# https://www.unicode.org/reports/tr46/#ToASCII
|
81
|
+
class ToASCII < Processing
|
82
|
+
def options_class
|
83
|
+
ToASCIIOptions
|
84
|
+
end
|
85
|
+
|
86
|
+
def call
|
87
|
+
result = super do |label|
|
88
|
+
label = punycode_encode(label)
|
89
|
+
Validation::Label.check_length(label) if options.verify_dns_length?
|
90
|
+
label
|
91
|
+
end
|
92
|
+
Validation::Label.check_domain_length(result) if options.verify_dns_length?
|
93
|
+
result
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module URI
|
4
4
|
module IDNA
|
5
|
-
|
5
|
+
module Validation
|
6
6
|
# 4.2.3.4. Labels Containing Characters Written Right to Left
|
7
7
|
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.4
|
8
8
|
# https://datatracker.ietf.org/doc/html/rfc5893#section-2
|
@@ -10,9 +10,9 @@ module URI
|
|
10
10
|
class << self
|
11
11
|
def call(label)
|
12
12
|
# Bidi rule 1
|
13
|
-
if bidi_class(label[0], "RTL")
|
13
|
+
if bidi_class(label[0].ord, "RTL")
|
14
14
|
rtl = true
|
15
|
-
elsif bidi_class(label[0], "L")
|
15
|
+
elsif bidi_class(label[0].ord, "L")
|
16
16
|
rtl = false
|
17
17
|
else
|
18
18
|
raise BidiError, "First codepoint in label #{label} must be directionality L, R or AL"
|
@@ -20,11 +20,11 @@ module URI
|
|
20
20
|
|
21
21
|
valid_ending = false
|
22
22
|
number_type = nil
|
23
|
-
label.
|
23
|
+
label.each_codepoint.with_index do |cp, pos|
|
24
24
|
if rtl
|
25
25
|
# Bidi rule 2
|
26
26
|
if bidi_class(cp, "L") || bidi_class(cp, "UNUSED")
|
27
|
-
raise BidiError, "Invalid direction for codepoint at position #{
|
27
|
+
raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a right-to-left label"
|
28
28
|
end
|
29
29
|
|
30
30
|
# Bidi rule 3
|
@@ -42,7 +42,7 @@ module URI
|
|
42
42
|
else
|
43
43
|
# Bidi rule 5
|
44
44
|
if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
|
45
|
-
raise BidiError, "Invalid direction for codepoint at position #{
|
45
|
+
raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a left-to-right label"
|
46
46
|
end
|
47
47
|
|
48
48
|
# Bidi rule 6
|
@@ -60,12 +60,13 @@ module URI
|
|
60
60
|
end
|
61
61
|
|
62
62
|
# https://www.rfc-editor.org/rfc/rfc5891.html#section-4.2.3.4
|
63
|
-
def check?(
|
63
|
+
def check?(domain)
|
64
|
+
labels = domain.split(".", -1)
|
64
65
|
domain = labels.map do |label|
|
65
|
-
if label.start_with?(
|
66
|
+
if label.start_with?(ACE_PREFIX)
|
66
67
|
begin
|
67
|
-
Punycode.decode(label[
|
68
|
-
rescue
|
68
|
+
Punycode.decode(label[ACE_PREFIX.length..])
|
69
|
+
rescue PunycodeError
|
69
70
|
""
|
70
71
|
end
|
71
72
|
else
|
@@ -73,7 +74,7 @@ module URI
|
|
73
74
|
end
|
74
75
|
end.join(".")
|
75
76
|
|
76
|
-
domain.
|
77
|
+
domain.each_codepoint do |cp|
|
77
78
|
return true if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
|
78
79
|
end
|
79
80
|
false
|
@@ -81,8 +82,8 @@ module URI
|
|
81
82
|
|
82
83
|
private
|
83
84
|
|
84
|
-
def bidi_class(
|
85
|
-
return bidi_class if Intranges.contain?(
|
85
|
+
def bidi_class(codepoint, bidi_class)
|
86
|
+
return bidi_class if Intranges.contain?(codepoint, BIDI_CLASSES[bidi_class])
|
86
87
|
|
87
88
|
false
|
88
89
|
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../intranges"
|
4
|
+
require_relative "../data/idna"
|
5
|
+
|
6
|
+
module URI
|
7
|
+
module IDNA
|
8
|
+
module Validation
|
9
|
+
module Codepoint
|
10
|
+
class << self
|
11
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.2
|
12
|
+
def check_idna_validity(label, cp, pos)
|
13
|
+
return true if codepoint?(cp, "PVALID")
|
14
|
+
|
15
|
+
raise InvalidCodepointError, cp_error_message(label, cp, pos)
|
16
|
+
end
|
17
|
+
|
18
|
+
# https://datatracker.ietf.org/doc/html/rfc5892
|
19
|
+
def check_contextj(label, cp, pos)
|
20
|
+
return false unless codepoint?(cp, "CONTEXTJ")
|
21
|
+
return true if valid_contextj?(label, cp, pos)
|
22
|
+
|
23
|
+
raise InvalidCodepointContextError, cp_error_message(label, cp, pos)
|
24
|
+
end
|
25
|
+
|
26
|
+
# https://datatracker.ietf.org/doc/html/rfc5892
|
27
|
+
def check_contexto(label, cp, pos)
|
28
|
+
return false unless codepoint?(cp, "CONTEXTO")
|
29
|
+
return true if valid_contexto?(label, cp, pos)
|
30
|
+
|
31
|
+
raise InvalidCodepointContextError, cp_error_message(label, cp, pos)
|
32
|
+
end
|
33
|
+
|
34
|
+
def cp_error_message(label, cp, pos)
|
35
|
+
format("Codepoint U+%04X at position %d of %p not allowed", cp, pos + 1, label)
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def valid_contexto?(label, cp, pos)
|
41
|
+
case cp
|
42
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
|
43
|
+
when 0x00b7
|
44
|
+
pos > 0 && pos < label.length - 1 ? (label[pos - 1].ord == 0x006c && label[pos + 1].ord == 0x006c) : false
|
45
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
|
46
|
+
when 0x0375
|
47
|
+
pos < label.length - 1 ? script?(label[pos + 1].ord, "Greek") : false
|
48
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
|
49
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
|
50
|
+
when 0x05f3, 0x05f4
|
51
|
+
pos > 0 ? script?(label[pos - 1].ord, "Hebrew") : false
|
52
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
|
53
|
+
when 0x30fb
|
54
|
+
label.each_codepoint do |codepoint|
|
55
|
+
next if codepoint == 0x30fb
|
56
|
+
return true if script?(codepoint,
|
57
|
+
"Hiragana") || script?(codepoint, "Katakana") || script?(codepoint, "Han")
|
58
|
+
end
|
59
|
+
false
|
60
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
|
61
|
+
when 0x0660..0x0669
|
62
|
+
label.each_codepoint do |codepoint|
|
63
|
+
return false if codepoint >= 0x06f0 && codepoint <= 0x06f9
|
64
|
+
end
|
65
|
+
true
|
66
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
|
67
|
+
when 0x06f0..0x06f9
|
68
|
+
label.each_codepoint do |codepoint|
|
69
|
+
return false if codepoint >= 0x0660 && codepoint <= 0x0669
|
70
|
+
end
|
71
|
+
true
|
72
|
+
else
|
73
|
+
false
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def valid_contextj?(label, cp, pos)
|
78
|
+
case cp
|
79
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
|
80
|
+
when 0x200c
|
81
|
+
return true if pos > 0 && virama_combining_class?(label[pos - 1].ord)
|
82
|
+
|
83
|
+
ok = false
|
84
|
+
(pos - 1).downto(0) do |i|
|
85
|
+
joining_type = JOINING_TYPES[label[i].ord]
|
86
|
+
next if joining_type == 0x54
|
87
|
+
|
88
|
+
if [0x4c, 0x44].include?(joining_type)
|
89
|
+
ok = true
|
90
|
+
break
|
91
|
+
end
|
92
|
+
end
|
93
|
+
return false unless ok
|
94
|
+
|
95
|
+
(pos + 1).upto(label.length - 1) do |i|
|
96
|
+
joining_type = JOINING_TYPES[label[i].ord]
|
97
|
+
next if joining_type == 0x54
|
98
|
+
return true if [0x52, 0x44].include?(joining_type)
|
99
|
+
end
|
100
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
|
101
|
+
when 0x200d
|
102
|
+
return virama_combining_class?(label[pos - 1].ord) if pos > 0
|
103
|
+
end
|
104
|
+
false
|
105
|
+
end
|
106
|
+
|
107
|
+
def script?(cp, script)
|
108
|
+
Intranges.contain?(cp, SCRIPTS[script])
|
109
|
+
end
|
110
|
+
|
111
|
+
def virama_combining_class?(cp)
|
112
|
+
Intranges.contain?(cp, VIRAMA_COMBINING_CLASSES)
|
113
|
+
end
|
114
|
+
|
115
|
+
def codepoint?(cp, class_name)
|
116
|
+
Intranges.contain?(cp, CODEPOINT_CLASSES[class_name])
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|