uri-idna 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -1
- data/README.md +149 -45
- data/lib/uri/idna/base_processing.rb +65 -0
- data/lib/uri/idna/data/idna.rb +11 -6
- data/lib/uri/idna/data/uts46.rb +4 -6
- data/lib/uri/idna/idna2008/options.rb +59 -0
- data/lib/uri/idna/idna2008/processing.rb +158 -0
- data/lib/uri/idna/intranges.rb +12 -4
- data/lib/uri/idna/punycode.rb +11 -15
- data/lib/uri/idna/uts46/mapping.rb +61 -0
- data/lib/uri/idna/uts46/options.rb +75 -0
- data/lib/uri/idna/uts46/processing.rb +98 -0
- data/lib/uri/idna/validation/bidi.rb +14 -13
- data/lib/uri/idna/validation/codepoint.rb +122 -0
- data/lib/uri/idna/validation/label.rb +70 -0
- data/lib/uri/idna/version.rb +1 -1
- data/lib/uri/idna/whatwg/processing.rb +35 -0
- data/lib/uri/idna.rb +30 -24
- data/lib/uri-idna.rb +3 -0
- metadata +12 -5
- data/lib/uri/idna/process.rb +0 -139
- data/lib/uri/idna/uts46.rb +0 -60
- data/lib/uri/idna/validation.rb +0 -199
@@ -0,0 +1,158 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "options"
|
4
|
+
|
5
|
+
module URI
|
6
|
+
module IDNA
|
7
|
+
module IDNA2008
|
8
|
+
class Processing < BaseProcessing
|
9
|
+
private
|
10
|
+
|
11
|
+
def options_class
|
12
|
+
Options
|
13
|
+
end
|
14
|
+
|
15
|
+
def validate(label)
|
16
|
+
return if label.empty?
|
17
|
+
|
18
|
+
Validation::Label.check_nfc(label)
|
19
|
+
if options.check_hyphens?
|
20
|
+
Validation::Label.check_hyphen34(label)
|
21
|
+
else
|
22
|
+
Validation::Label.check_ace_prefix(label)
|
23
|
+
end
|
24
|
+
Validation::Label.check_leading_combining(label) if options.leading_combining?
|
25
|
+
|
26
|
+
label.each_codepoint.with_index do |cp, pos|
|
27
|
+
begin
|
28
|
+
next if Validation::Codepoint.check_contextj(label, cp, pos)
|
29
|
+
rescue InvalidCodepointContextError => e
|
30
|
+
next unless options.check_joiners?
|
31
|
+
|
32
|
+
raise e
|
33
|
+
end
|
34
|
+
|
35
|
+
begin
|
36
|
+
next if Validation::Codepoint.check_contexto(label, cp, pos)
|
37
|
+
rescue InvalidCodepointContextError => e
|
38
|
+
next unless options.check_others?
|
39
|
+
|
40
|
+
raise e
|
41
|
+
end
|
42
|
+
|
43
|
+
Validation::Codepoint.check_idna_validity(label, cp, pos)
|
44
|
+
end
|
45
|
+
Validation::Bidi.call(label) if check_bidi?
|
46
|
+
end
|
47
|
+
|
48
|
+
def check_bidi?
|
49
|
+
return @check_bidi if instance_variable_defined?(:@check_bidi)
|
50
|
+
|
51
|
+
@check_bidi = options.check_bidi? && Validation::Bidi.check?(domain_name)
|
52
|
+
end
|
53
|
+
|
54
|
+
def punycode_decode(label)
|
55
|
+
return label unless label.start_with?(ACE_PREFIX)
|
56
|
+
|
57
|
+
raise Error, "A-label must not end with a hyphen" if label[-1] == "-"
|
58
|
+
|
59
|
+
super
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4
|
64
|
+
class Registration < Processing
|
65
|
+
def initialize(alabel: nil, ulabel: nil, **options)
|
66
|
+
raise ArgumentError, "Provide alabel or ulabel" if alabel.nil? && ulabel.nil?
|
67
|
+
|
68
|
+
@alabel = alabel
|
69
|
+
@ulabel = ulabel
|
70
|
+
|
71
|
+
super(ulabel || alabel, **options)
|
72
|
+
end
|
73
|
+
|
74
|
+
def call
|
75
|
+
alabels, alabel_trailing_dot = split_domain(alabel.encode("UTF-8").unicode_normalize(:nfc)) if alabel
|
76
|
+
ulabels, ulabel_trailing_dot = split_domain(ulabel.encode("UTF-8").unicode_normalize(:nfc)) if ulabel
|
77
|
+
|
78
|
+
if alabels && ulabels && (alabels.size != ulabels.size || alabel_trailing_dot != ulabel_trailing_dot)
|
79
|
+
raise Error, "alabel doesn't match ulabel"
|
80
|
+
end
|
81
|
+
|
82
|
+
trailing_dot = alabel_trailing_dot || ulabel_trailing_dot
|
83
|
+
size = (alabels || ulabels).size
|
84
|
+
|
85
|
+
result = Array.new(size) do |i|
|
86
|
+
alabel = alabels&.[](i)
|
87
|
+
ulabel = ulabels&.[](i)
|
88
|
+
|
89
|
+
raise Error, "Provided alabel must be downcased" if alabel && alabel.downcase != alabel
|
90
|
+
|
91
|
+
if alabel
|
92
|
+
u_alabel = punycode_decode(alabel)
|
93
|
+
if ulabel && u_alabel != ulabel
|
94
|
+
raise Error,
|
95
|
+
"Provided ulabel #{ulabel.inspect} doesn't match punycoded alabel #{u_alabel.inspect}"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
validate(ulabel || punycode_decode(alabel))
|
100
|
+
a_ulabel = punycode_encode(ulabel || punycode_decode(alabel))
|
101
|
+
|
102
|
+
Validation::Label.check_length(a_ulabel) if options.verify_dns_length?
|
103
|
+
|
104
|
+
if alabel && ulabel && (a_ulabel != alabel) && (a_ulabel != alabel)
|
105
|
+
raise Error,
|
106
|
+
"Provided alabel #{alabel.inspect} doesn't match de-punycoded ulabel #{u_alabel.inspect}"
|
107
|
+
end
|
108
|
+
|
109
|
+
a_ulabel
|
110
|
+
end
|
111
|
+
|
112
|
+
result = join_labels(result, trailing_dot)
|
113
|
+
|
114
|
+
Validation::Label.check_domain_length(result) if options.verify_dns_length?
|
115
|
+
result
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
attr_reader :ulabel, :alabel
|
121
|
+
|
122
|
+
def validate(label)
|
123
|
+
Validation::Label.check_hyphen_sides(label) if options.check_hyphens?
|
124
|
+
super
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# # https://datatracker.ietf.org/doc/html/rfc5891#section-5
|
129
|
+
class Lookup < Processing
|
130
|
+
def call
|
131
|
+
domain = domain_name.encode("UTF-8").unicode_normalize(:nfc)
|
132
|
+
|
133
|
+
result = process_labels(domain) do |label|
|
134
|
+
orig_label = label
|
135
|
+
alabel_input = label.start_with?(ACE_PREFIX)
|
136
|
+
|
137
|
+
label = punycode_decode(label)
|
138
|
+
|
139
|
+
validate(label)
|
140
|
+
|
141
|
+
label = punycode_encode(label)
|
142
|
+
|
143
|
+
Validation::Label.check_length(label) if options.verify_dns_length?
|
144
|
+
|
145
|
+
if alabel_input && orig_label != label
|
146
|
+
raise Error, "Resulting label #{label.inspect} doesn't match initial label #{orig_label.inspect}"
|
147
|
+
end
|
148
|
+
|
149
|
+
label
|
150
|
+
end
|
151
|
+
|
152
|
+
Validation::Label.check_domain_length(result) if options.verify_dns_length?
|
153
|
+
result
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
data/lib/uri/idna/intranges.rb
CHANGED
@@ -23,12 +23,14 @@ module URI
|
|
23
23
|
# we could be immediately ahead of a tuple (start, end)
|
24
24
|
# with start < int_ <= end
|
25
25
|
if pos > 0
|
26
|
-
|
26
|
+
r = ranges[pos - 1]
|
27
|
+
left = decode_left(r)
|
28
|
+
right = decode_right(r)
|
27
29
|
return true if left <= int && int < right
|
28
30
|
end
|
29
31
|
# or we could be immediately behind a tuple (int_, end)
|
30
32
|
if pos < ranges.length
|
31
|
-
left
|
33
|
+
left = decode_left(ranges[pos])
|
32
34
|
return true if left == int
|
33
35
|
end
|
34
36
|
false
|
@@ -40,8 +42,14 @@ module URI
|
|
40
42
|
(start << 32) | finish
|
41
43
|
end
|
42
44
|
|
43
|
-
|
44
|
-
|
45
|
+
MASK = ((1 << 32) - 1)
|
46
|
+
|
47
|
+
def decode_right(r)
|
48
|
+
r & MASK
|
49
|
+
end
|
50
|
+
|
51
|
+
def decode_left(r)
|
52
|
+
r >> 32
|
45
53
|
end
|
46
54
|
end
|
47
55
|
end
|
data/lib/uri/idna/punycode.rb
CHANGED
@@ -47,20 +47,21 @@ module URI
|
|
47
47
|
|
48
48
|
def encode(input)
|
49
49
|
input = input.codepoints
|
50
|
+
output = []
|
50
51
|
|
51
52
|
n = INITIAL_N
|
52
53
|
delta = 0
|
53
54
|
bias = INITIAL_BIAS
|
54
55
|
|
55
|
-
|
56
|
+
input.each { |cp| output << cp if cp < 0x80 }
|
56
57
|
h = b = output.length
|
57
58
|
|
58
59
|
output << DELIMITER if b > 0
|
59
60
|
|
60
61
|
while h < input.length
|
61
62
|
m = MAXINT
|
62
|
-
input.each do |
|
63
|
-
m =
|
63
|
+
input.each do |cp|
|
64
|
+
m = cp if cp >= n && cp < m
|
64
65
|
end
|
65
66
|
|
66
67
|
raise PunycodeError, "Arithmetic overflow" if m - n > (MAXINT - delta) / (h + 1)
|
@@ -68,12 +69,12 @@ module URI
|
|
68
69
|
delta += (m - n) * (h + 1)
|
69
70
|
n = m
|
70
71
|
|
71
|
-
input.each do |
|
72
|
-
if
|
72
|
+
input.each do |cp|
|
73
|
+
if cp < n
|
73
74
|
delta += 1
|
74
75
|
raise PunycodeError, "Arithmetic overflow" if delta > MAXINT
|
75
76
|
end
|
76
|
-
next unless
|
77
|
+
next unless cp == n
|
77
78
|
|
78
79
|
q = delta
|
79
80
|
k = BASE
|
@@ -115,10 +116,11 @@ module URI
|
|
115
116
|
|
116
117
|
b = input.rindex(DELIMITER) || 0
|
117
118
|
|
118
|
-
|
119
|
-
|
119
|
+
0.upto(b - 1) do |idx|
|
120
|
+
cp = input[idx]
|
121
|
+
raise PunycodeError, "Invalid input" unless cp < 0x80
|
120
122
|
|
121
|
-
output <<
|
123
|
+
output << cp
|
122
124
|
end
|
123
125
|
|
124
126
|
inc = b > 0 ? b + 1 : 0
|
@@ -162,12 +164,6 @@ module URI
|
|
162
164
|
|
163
165
|
output.pack("U*")
|
164
166
|
end
|
165
|
-
|
166
|
-
private
|
167
|
-
|
168
|
-
def basic?(codepoint)
|
169
|
-
codepoint < 0x80
|
170
|
-
end
|
171
167
|
end
|
172
168
|
end
|
173
169
|
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../data/uts46"
|
4
|
+
|
5
|
+
module URI
|
6
|
+
module IDNA
|
7
|
+
module UTS46
|
8
|
+
# https://www.unicode.org/reports/tr46/#IDNA_Mapping_Table
|
9
|
+
module Mapping
|
10
|
+
class << self
|
11
|
+
def call(domain_name, transitional_processing: false, use_std3_ascii_rules: true)
|
12
|
+
output = +""
|
13
|
+
domain_name.each_codepoint do |codepoint|
|
14
|
+
_, status, replacement = status(codepoint)
|
15
|
+
case status
|
16
|
+
when "V", "X" # valid, disallowed
|
17
|
+
output << codepoint.chr(Encoding::UTF_8)
|
18
|
+
when "M" # mapped
|
19
|
+
output << if transitional_processing && codepoint == 7838
|
20
|
+
"ss"
|
21
|
+
else
|
22
|
+
replacement
|
23
|
+
end
|
24
|
+
when "D" # deviation
|
25
|
+
output << (transitional_processing ? replacement : codepoint.chr(Encoding::UTF_8))
|
26
|
+
when "3" # disallowed_STD3_valid, disallowed_STD3_mapped
|
27
|
+
output << if use_std3_ascii_rules
|
28
|
+
codepoint.chr(Encoding::UTF_8)
|
29
|
+
else
|
30
|
+
(replacement || codepoint.chr(Encoding::UTF_8))
|
31
|
+
end
|
32
|
+
when "I" # ignored
|
33
|
+
next
|
34
|
+
end
|
35
|
+
end
|
36
|
+
output.unicode_normalize(:nfc)
|
37
|
+
end
|
38
|
+
|
39
|
+
def validate_status(label, cp, pos, transitional_processing:, use_std3_ascii_rules:)
|
40
|
+
_, status, = status(cp)
|
41
|
+
return if status == "V"
|
42
|
+
return if !transitional_processing && status == "D"
|
43
|
+
return if !use_std3_ascii_rules && status == "3"
|
44
|
+
|
45
|
+
raise InvalidCodepointError, Validation::Codepoint.cp_error_message(label, cp, pos)
|
46
|
+
end
|
47
|
+
|
48
|
+
def status(codepoint)
|
49
|
+
index =
|
50
|
+
if codepoint < 256
|
51
|
+
codepoint
|
52
|
+
else
|
53
|
+
(UTS46_DATA.bsearch_index { |x| x[0] > codepoint } || UTS46_DATA.length) - 1
|
54
|
+
end
|
55
|
+
UTS46_DATA[index] || []
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module URI
|
4
|
+
module IDNA
|
5
|
+
module UTS46
|
6
|
+
# Options for ToUnicode operation
|
7
|
+
# https://www.unicode.org/reports/tr46/#ToUnicode
|
8
|
+
class Options
|
9
|
+
USE_STD3_ASCII_RULES = 1 << 0
|
10
|
+
CHECK_HYPHENS = 1 << 1
|
11
|
+
CHECK_BIDI = 1 << 2
|
12
|
+
CHECK_JOINERS = 1 << 3
|
13
|
+
TRANSITIONAL_PROCESSING = 1 << 4
|
14
|
+
IGNORE_INVALID_PUNYCODE = 1 << 5
|
15
|
+
|
16
|
+
def initialize(
|
17
|
+
use_std3_ascii_rules: true,
|
18
|
+
check_hyphens: true,
|
19
|
+
check_bidi: true,
|
20
|
+
check_joiners: true,
|
21
|
+
transitional_processing: false,
|
22
|
+
ignore_invalid_punycode: false
|
23
|
+
)
|
24
|
+
@flags = 0
|
25
|
+
@flags |= USE_STD3_ASCII_RULES if use_std3_ascii_rules
|
26
|
+
@flags |= CHECK_HYPHENS if check_hyphens
|
27
|
+
@flags |= CHECK_BIDI if check_bidi
|
28
|
+
@flags |= CHECK_JOINERS if check_joiners
|
29
|
+
@flags |= TRANSITIONAL_PROCESSING if transitional_processing
|
30
|
+
@flags |= IGNORE_INVALID_PUNYCODE if ignore_invalid_punycode
|
31
|
+
end
|
32
|
+
|
33
|
+
def use_std3_ascii_rules?
|
34
|
+
(@flags & USE_STD3_ASCII_RULES) != 0
|
35
|
+
end
|
36
|
+
|
37
|
+
def check_hyphens?
|
38
|
+
(@flags & CHECK_HYPHENS) != 0
|
39
|
+
end
|
40
|
+
|
41
|
+
def check_bidi?
|
42
|
+
(@flags & CHECK_BIDI) != 0
|
43
|
+
end
|
44
|
+
|
45
|
+
def check_joiners?
|
46
|
+
(@flags & CHECK_JOINERS) != 0
|
47
|
+
end
|
48
|
+
|
49
|
+
def transitional_processing?
|
50
|
+
(@flags & TRANSITIONAL_PROCESSING) != 0
|
51
|
+
end
|
52
|
+
|
53
|
+
def ignore_invalid_punycode?
|
54
|
+
(@flags & IGNORE_INVALID_PUNYCODE) != 0
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Options for ToASCII operation
|
59
|
+
# https://www.unicode.org/reports/tr46/#ToASCII
|
60
|
+
class ToASCIIOptions < Options
|
61
|
+
VERIFY_DNS_LENGTH = 1 << 6
|
62
|
+
|
63
|
+
def initialize(verify_dns_length: true, **options)
|
64
|
+
@flags_extended = 0
|
65
|
+
@flags_extended |= VERIFY_DNS_LENGTH if verify_dns_length
|
66
|
+
super(**options)
|
67
|
+
end
|
68
|
+
|
69
|
+
def verify_dns_length?
|
70
|
+
(@flags_extended & VERIFY_DNS_LENGTH) != 0
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "mapping"
|
4
|
+
require_relative "options"
|
5
|
+
|
6
|
+
module URI
|
7
|
+
module IDNA
|
8
|
+
module UTS46
|
9
|
+
# https://www.unicode.org/reports/tr46/#Processing
|
10
|
+
class Processing < BaseProcessing
|
11
|
+
def call
|
12
|
+
domain = Mapping.call(
|
13
|
+
domain_name,
|
14
|
+
transitional_processing: options.transitional_processing?,
|
15
|
+
use_std3_ascii_rules: options.use_std3_ascii_rules?,
|
16
|
+
)
|
17
|
+
|
18
|
+
process_labels(domain) do |label|
|
19
|
+
if label.start_with?(ACE_PREFIX)
|
20
|
+
begin
|
21
|
+
label = punycode_decode(label)
|
22
|
+
rescue PunycodeError => e
|
23
|
+
next label if options.ignore_invalid_punycode?
|
24
|
+
|
25
|
+
raise e
|
26
|
+
end
|
27
|
+
validate(label, transitional_processing: false)
|
28
|
+
else
|
29
|
+
validate(label)
|
30
|
+
end
|
31
|
+
|
32
|
+
label = yield label if block_given?
|
33
|
+
|
34
|
+
label
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def options_class
|
41
|
+
Options
|
42
|
+
end
|
43
|
+
|
44
|
+
def check_bidi?
|
45
|
+
return @check_bidi if instance_variable_defined?(:@check_bidi)
|
46
|
+
|
47
|
+
@check_bidi = options.check_bidi? && Validation::Bidi.check?(domain_name)
|
48
|
+
end
|
49
|
+
|
50
|
+
# https://www.unicode.org/reports/tr46/#Validity_Criteria
|
51
|
+
def validate(label, transitional_processing: options.transitional_processing?)
|
52
|
+
return if label.empty?
|
53
|
+
|
54
|
+
Validation::Label.check_nfc(label)
|
55
|
+
if options.check_hyphens?
|
56
|
+
Validation::Label.check_hyphen34(label)
|
57
|
+
Validation::Label.check_hyphen_sides(label)
|
58
|
+
else
|
59
|
+
Validation::Label.check_ace_prefix(label)
|
60
|
+
end
|
61
|
+
Validation::Label.check_dot(label)
|
62
|
+
Validation::Label.check_leading_combining(label)
|
63
|
+
|
64
|
+
label.each_codepoint.with_index do |cp, pos|
|
65
|
+
Mapping.validate_status(
|
66
|
+
label, cp, pos,
|
67
|
+
transitional_processing: transitional_processing, use_std3_ascii_rules: options.use_std3_ascii_rules?
|
68
|
+
)
|
69
|
+
|
70
|
+
Validation::Codepoint.check_contextj(label, cp, pos) if options.check_joiners?
|
71
|
+
end
|
72
|
+
Validation::Bidi.call(label) if check_bidi?
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# https://www.unicode.org/reports/tr46/#ToUnicode
|
77
|
+
class ToUnicode < Processing
|
78
|
+
end
|
79
|
+
|
80
|
+
# https://www.unicode.org/reports/tr46/#ToASCII
|
81
|
+
class ToASCII < Processing
|
82
|
+
def options_class
|
83
|
+
ToASCIIOptions
|
84
|
+
end
|
85
|
+
|
86
|
+
def call
|
87
|
+
result = super do |label|
|
88
|
+
label = punycode_encode(label)
|
89
|
+
Validation::Label.check_length(label) if options.verify_dns_length?
|
90
|
+
label
|
91
|
+
end
|
92
|
+
Validation::Label.check_domain_length(result) if options.verify_dns_length?
|
93
|
+
result
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module URI
|
4
4
|
module IDNA
|
5
|
-
|
5
|
+
module Validation
|
6
6
|
# 4.2.3.4. Labels Containing Characters Written Right to Left
|
7
7
|
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.4
|
8
8
|
# https://datatracker.ietf.org/doc/html/rfc5893#section-2
|
@@ -10,9 +10,9 @@ module URI
|
|
10
10
|
class << self
|
11
11
|
def call(label)
|
12
12
|
# Bidi rule 1
|
13
|
-
if bidi_class(label[0], "RTL")
|
13
|
+
if bidi_class(label[0].ord, "RTL")
|
14
14
|
rtl = true
|
15
|
-
elsif bidi_class(label[0], "L")
|
15
|
+
elsif bidi_class(label[0].ord, "L")
|
16
16
|
rtl = false
|
17
17
|
else
|
18
18
|
raise BidiError, "First codepoint in label #{label} must be directionality L, R or AL"
|
@@ -20,11 +20,11 @@ module URI
|
|
20
20
|
|
21
21
|
valid_ending = false
|
22
22
|
number_type = nil
|
23
|
-
label.
|
23
|
+
label.each_codepoint.with_index do |cp, pos|
|
24
24
|
if rtl
|
25
25
|
# Bidi rule 2
|
26
26
|
if bidi_class(cp, "L") || bidi_class(cp, "UNUSED")
|
27
|
-
raise BidiError, "Invalid direction for codepoint at position #{
|
27
|
+
raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a right-to-left label"
|
28
28
|
end
|
29
29
|
|
30
30
|
# Bidi rule 3
|
@@ -42,7 +42,7 @@ module URI
|
|
42
42
|
else
|
43
43
|
# Bidi rule 5
|
44
44
|
if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
|
45
|
-
raise BidiError, "Invalid direction for codepoint at position #{
|
45
|
+
raise BidiError, "Invalid direction for codepoint at position #{pos + 1} in a left-to-right label"
|
46
46
|
end
|
47
47
|
|
48
48
|
# Bidi rule 6
|
@@ -60,12 +60,13 @@ module URI
|
|
60
60
|
end
|
61
61
|
|
62
62
|
# https://www.rfc-editor.org/rfc/rfc5891.html#section-4.2.3.4
|
63
|
-
def check?(
|
63
|
+
def check?(domain)
|
64
|
+
labels = domain.split(".", -1)
|
64
65
|
domain = labels.map do |label|
|
65
|
-
if label.start_with?(
|
66
|
+
if label.start_with?(ACE_PREFIX)
|
66
67
|
begin
|
67
|
-
Punycode.decode(label[
|
68
|
-
rescue
|
68
|
+
Punycode.decode(label[ACE_PREFIX.length..])
|
69
|
+
rescue PunycodeError
|
69
70
|
""
|
70
71
|
end
|
71
72
|
else
|
@@ -73,7 +74,7 @@ module URI
|
|
73
74
|
end
|
74
75
|
end.join(".")
|
75
76
|
|
76
|
-
domain.
|
77
|
+
domain.each_codepoint do |cp|
|
77
78
|
return true if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
|
78
79
|
end
|
79
80
|
false
|
@@ -81,8 +82,8 @@ module URI
|
|
81
82
|
|
82
83
|
private
|
83
84
|
|
84
|
-
def bidi_class(
|
85
|
-
return bidi_class if Intranges.contain?(
|
85
|
+
def bidi_class(codepoint, bidi_class)
|
86
|
+
return bidi_class if Intranges.contain?(codepoint, BIDI_CLASSES[bidi_class])
|
86
87
|
|
87
88
|
false
|
88
89
|
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../intranges"
|
4
|
+
require_relative "../data/idna"
|
5
|
+
|
6
|
+
module URI
|
7
|
+
module IDNA
|
8
|
+
module Validation
|
9
|
+
module Codepoint
|
10
|
+
class << self
|
11
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.2
|
12
|
+
def check_idna_validity(label, cp, pos)
|
13
|
+
return true if codepoint?(cp, "PVALID")
|
14
|
+
|
15
|
+
raise InvalidCodepointError, cp_error_message(label, cp, pos)
|
16
|
+
end
|
17
|
+
|
18
|
+
# https://datatracker.ietf.org/doc/html/rfc5892
|
19
|
+
def check_contextj(label, cp, pos)
|
20
|
+
return false unless codepoint?(cp, "CONTEXTJ")
|
21
|
+
return true if valid_contextj?(label, cp, pos)
|
22
|
+
|
23
|
+
raise InvalidCodepointContextError, cp_error_message(label, cp, pos)
|
24
|
+
end
|
25
|
+
|
26
|
+
# https://datatracker.ietf.org/doc/html/rfc5892
|
27
|
+
def check_contexto(label, cp, pos)
|
28
|
+
return false unless codepoint?(cp, "CONTEXTO")
|
29
|
+
return true if valid_contexto?(label, cp, pos)
|
30
|
+
|
31
|
+
raise InvalidCodepointContextError, cp_error_message(label, cp, pos)
|
32
|
+
end
|
33
|
+
|
34
|
+
def cp_error_message(label, cp, pos)
|
35
|
+
format("Codepoint U+%04X at position %d of %p not allowed", cp, pos + 1, label)
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def valid_contexto?(label, cp, pos)
|
41
|
+
case cp
|
42
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
|
43
|
+
when 0x00b7
|
44
|
+
pos > 0 && pos < label.length - 1 ? (label[pos - 1].ord == 0x006c && label[pos + 1].ord == 0x006c) : false
|
45
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
|
46
|
+
when 0x0375
|
47
|
+
pos < label.length - 1 ? script?(label[pos + 1].ord, "Greek") : false
|
48
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
|
49
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
|
50
|
+
when 0x05f3, 0x05f4
|
51
|
+
pos > 0 ? script?(label[pos - 1].ord, "Hebrew") : false
|
52
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
|
53
|
+
when 0x30fb
|
54
|
+
label.each_codepoint do |codepoint|
|
55
|
+
next if codepoint == 0x30fb
|
56
|
+
return true if script?(codepoint,
|
57
|
+
"Hiragana") || script?(codepoint, "Katakana") || script?(codepoint, "Han")
|
58
|
+
end
|
59
|
+
false
|
60
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
|
61
|
+
when 0x0660..0x0669
|
62
|
+
label.each_codepoint do |codepoint|
|
63
|
+
return false if codepoint >= 0x06f0 && codepoint <= 0x06f9
|
64
|
+
end
|
65
|
+
true
|
66
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
|
67
|
+
when 0x06f0..0x06f9
|
68
|
+
label.each_codepoint do |codepoint|
|
69
|
+
return false if codepoint >= 0x0660 && codepoint <= 0x0669
|
70
|
+
end
|
71
|
+
true
|
72
|
+
else
|
73
|
+
false
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def valid_contextj?(label, cp, pos)
|
78
|
+
case cp
|
79
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
|
80
|
+
when 0x200c
|
81
|
+
return true if pos > 0 && virama_combining_class?(label[pos - 1].ord)
|
82
|
+
|
83
|
+
ok = false
|
84
|
+
(pos - 1).downto(0) do |i|
|
85
|
+
joining_type = JOINING_TYPES[label[i].ord]
|
86
|
+
next if joining_type == 0x54
|
87
|
+
|
88
|
+
if [0x4c, 0x44].include?(joining_type)
|
89
|
+
ok = true
|
90
|
+
break
|
91
|
+
end
|
92
|
+
end
|
93
|
+
return false unless ok
|
94
|
+
|
95
|
+
(pos + 1).upto(label.length - 1) do |i|
|
96
|
+
joining_type = JOINING_TYPES[label[i].ord]
|
97
|
+
next if joining_type == 0x54
|
98
|
+
return true if [0x52, 0x44].include?(joining_type)
|
99
|
+
end
|
100
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
|
101
|
+
when 0x200d
|
102
|
+
return virama_combining_class?(label[pos - 1].ord) if pos > 0
|
103
|
+
end
|
104
|
+
false
|
105
|
+
end
|
106
|
+
|
107
|
+
def script?(cp, script)
|
108
|
+
Intranges.contain?(cp, SCRIPTS[script])
|
109
|
+
end
|
110
|
+
|
111
|
+
def virama_combining_class?(cp)
|
112
|
+
Intranges.contain?(cp, VIRAMA_COMBINING_CLASSES)
|
113
|
+
end
|
114
|
+
|
115
|
+
def codepoint?(cp, class_name)
|
116
|
+
Intranges.contain?(cp, CODEPOINT_CLASSES[class_name])
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|