uri-idna 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -1
- data/README.md +149 -45
- data/lib/uri/idna/base_processing.rb +65 -0
- data/lib/uri/idna/data/idna.rb +11 -6
- data/lib/uri/idna/data/uts46.rb +4 -6
- data/lib/uri/idna/idna2008/options.rb +59 -0
- data/lib/uri/idna/idna2008/processing.rb +158 -0
- data/lib/uri/idna/intranges.rb +12 -4
- data/lib/uri/idna/punycode.rb +11 -15
- data/lib/uri/idna/uts46/mapping.rb +61 -0
- data/lib/uri/idna/uts46/options.rb +75 -0
- data/lib/uri/idna/uts46/processing.rb +98 -0
- data/lib/uri/idna/validation/bidi.rb +14 -13
- data/lib/uri/idna/validation/codepoint.rb +122 -0
- data/lib/uri/idna/validation/label.rb +70 -0
- data/lib/uri/idna/version.rb +1 -1
- data/lib/uri/idna/whatwg/processing.rb +35 -0
- data/lib/uri/idna.rb +30 -24
- data/lib/uri-idna.rb +3 -0
- metadata +12 -5
- data/lib/uri/idna/process.rb +0 -139
- data/lib/uri/idna/uts46.rb +0 -60
- data/lib/uri/idna/validation.rb +0 -199
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../intranges"
|
4
|
+
require_relative "../data/idna"
|
5
|
+
|
6
|
+
module URI
|
7
|
+
module IDNA
|
8
|
+
module Validation
|
9
|
+
module Label
|
10
|
+
class << self
|
11
|
+
# 4.1. Input to IDNA Registration
|
12
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.1
|
13
|
+
def check_nfc(label)
|
14
|
+
return if label.unicode_normalized?(:nfc)
|
15
|
+
|
16
|
+
raise Error, "Label must be in Unicode Normalization Form NFC"
|
17
|
+
end
|
18
|
+
|
19
|
+
# 4.2.3.1. Hyphen Restrictions
|
20
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
|
21
|
+
def check_hyphen34(label)
|
22
|
+
return if label[2..3] != "--"
|
23
|
+
|
24
|
+
raise Error, "Label must not contain a U+002D HYPHEN-MINUS character in both the third and fourth positions"
|
25
|
+
end
|
26
|
+
|
27
|
+
# 4.2.3.1. Hyphen Restrictions
|
28
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
|
29
|
+
def check_hyphen_sides(label)
|
30
|
+
return unless label[0] == "-" || label[-1] == "-"
|
31
|
+
|
32
|
+
raise Error, "Label must neither begin nor end with a U+002D HYPHEN-MINUS character"
|
33
|
+
end
|
34
|
+
|
35
|
+
def check_ace_prefix(label)
|
36
|
+
return unless label.start_with?(ACE_PREFIX)
|
37
|
+
|
38
|
+
raise Error, "Label must not begin with `xn--`"
|
39
|
+
end
|
40
|
+
|
41
|
+
# 4.2.3.2. Leading Combining Marks
|
42
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
|
43
|
+
def check_leading_combining(label)
|
44
|
+
return unless Intranges.contain?(label[0].ord, INITIAL_COMBINERS)
|
45
|
+
|
46
|
+
raise Error, "Label begins with an illegal combining character"
|
47
|
+
end
|
48
|
+
|
49
|
+
def check_dot(label)
|
50
|
+
raise Error, "Label must not contain a U+002E ( . ) FULL STOP" if label.include?(".")
|
51
|
+
end
|
52
|
+
|
53
|
+
# DNS label size limit
|
54
|
+
# See Processing step 4.2
|
55
|
+
# https://www.unicode.org/reports/tr46/#ToASCII
|
56
|
+
def check_length(label)
|
57
|
+
raise Error, "Label too long" unless label.length < 64
|
58
|
+
end
|
59
|
+
|
60
|
+
# DNS name size limit
|
61
|
+
# See Processing step 4.1
|
62
|
+
# https://www.unicode.org/reports/tr46/#ToASCII
|
63
|
+
def check_domain_length(domain_name)
|
64
|
+
raise Error, "Domain too long" unless domain_name.length < (domain_name[-1] == "." ? 255 : 254)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
data/lib/uri/idna/version.rb
CHANGED
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module URI
|
4
|
+
module IDNA
|
5
|
+
# https://url.spec.whatwg.org/#idna
|
6
|
+
module WHATWG
|
7
|
+
class ToASCII < UTS46::ToASCII
|
8
|
+
def initialize(domain_name, be_strict: true)
|
9
|
+
super(
|
10
|
+
domain_name,
|
11
|
+
use_std3_ascii_rules: be_strict,
|
12
|
+
check_hyphens: false,
|
13
|
+
check_bidi: true,
|
14
|
+
check_joiners: true,
|
15
|
+
transitional_processing: false,
|
16
|
+
verify_dns_length: be_strict,
|
17
|
+
)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class ToUnicode < UTS46::ToUnicode
|
22
|
+
def initialize(domain_name, be_strict: true)
|
23
|
+
super(
|
24
|
+
domain_name,
|
25
|
+
use_std3_ascii_rules: be_strict,
|
26
|
+
check_hyphens: false,
|
27
|
+
check_bidi: true,
|
28
|
+
check_joiners: true,
|
29
|
+
transitional_processing: false,
|
30
|
+
)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/uri/idna.rb
CHANGED
@@ -1,11 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative "idna/version"
|
4
|
-
require_relative "idna/
|
4
|
+
require_relative "idna/punycode"
|
5
|
+
require_relative "idna/base_processing"
|
6
|
+
require_relative "idna/idna2008/processing"
|
7
|
+
require_relative "idna/uts46/processing"
|
8
|
+
require_relative "idna/whatwg/processing"
|
5
9
|
|
6
10
|
module URI
|
7
11
|
module IDNA
|
8
|
-
|
12
|
+
ACE_PREFIX = "xn--"
|
9
13
|
|
10
14
|
class Error < StandardError; end
|
11
15
|
|
@@ -22,38 +26,40 @@ module URI
|
|
22
26
|
class PunycodeError < Error; end
|
23
27
|
|
24
28
|
class << self
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
uts46_std3: true,
|
30
|
-
uts46_transitional: false,
|
31
|
-
contexto: false,
|
32
|
-
}.freeze
|
33
|
-
|
34
|
-
LOOKUP_PARAMS = {
|
35
|
-
hyphen_sides: false,
|
36
|
-
leading_combining: false,
|
37
|
-
}.freeze
|
38
|
-
|
39
|
-
def lookup(s, **params)
|
40
|
-
Process.new(**LOOKUP_PARAMS.merge(params)).lookup(s)
|
29
|
+
# IDNA2008 Lookup protocol defined in RFC 5891
|
30
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-5
|
31
|
+
def lookup(domain_name, **options)
|
32
|
+
IDNA2008::Lookup.new(domain_name, **options).call
|
41
33
|
end
|
42
34
|
|
43
|
-
|
44
|
-
|
35
|
+
# IDNA2008 Registration protocol defined in RFC 5891
|
36
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4
|
37
|
+
def register(alabel: nil, ulabel: nil, **options)
|
38
|
+
IDNA2008::Registration.new(alabel: alabel, ulabel: ulabel, **options).call
|
45
39
|
end
|
46
40
|
|
47
41
|
# UTS46 ToUnicode process
|
48
42
|
# https://unicode.org/reports/tr46/#ToUnicode
|
49
|
-
def to_unicode(
|
50
|
-
|
43
|
+
def to_unicode(domain_name, **options)
|
44
|
+
UTS46::ToUnicode.new(domain_name, **options).call
|
51
45
|
end
|
52
46
|
|
53
47
|
# UTS46 ToASCII process
|
54
48
|
# https://unicode.org/reports/tr46/#ToASCII
|
55
|
-
def to_ascii(
|
56
|
-
|
49
|
+
def to_ascii(domain_name, **options)
|
50
|
+
UTS46::ToASCII.new(domain_name, **options).call
|
51
|
+
end
|
52
|
+
|
53
|
+
# WHATWG URL Standard domain to ASCII algorithm
|
54
|
+
# https://url.spec.whatwg.org/#idna
|
55
|
+
def whatwg_to_unicode(domain_name, **options)
|
56
|
+
WHATWG::ToUnicode.new(domain_name, **options).call
|
57
|
+
end
|
58
|
+
|
59
|
+
# WHATWG URL Standard domain to Unicode algorithm
|
60
|
+
# https://url.spec.whatwg.org/#idna
|
61
|
+
def whatwg_to_ascii(domain_name, **options)
|
62
|
+
WHATWG::ToASCII.new(domain_name, **options).call
|
57
63
|
end
|
58
64
|
end
|
59
65
|
end
|
data/lib/uri-idna.rb
ADDED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uri-idna
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Svyatoslav Kryukov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-11-14 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Internationalized Domain Names in Applications (IDNA)
|
14
14
|
email:
|
@@ -20,16 +20,23 @@ files:
|
|
20
20
|
- CHANGELOG.md
|
21
21
|
- LICENSE.txt
|
22
22
|
- README.md
|
23
|
+
- lib/uri-idna.rb
|
23
24
|
- lib/uri/idna.rb
|
25
|
+
- lib/uri/idna/base_processing.rb
|
24
26
|
- lib/uri/idna/data/idna.rb
|
25
27
|
- lib/uri/idna/data/uts46.rb
|
28
|
+
- lib/uri/idna/idna2008/options.rb
|
29
|
+
- lib/uri/idna/idna2008/processing.rb
|
26
30
|
- lib/uri/idna/intranges.rb
|
27
|
-
- lib/uri/idna/process.rb
|
28
31
|
- lib/uri/idna/punycode.rb
|
29
|
-
- lib/uri/idna/uts46.rb
|
30
|
-
- lib/uri/idna/
|
32
|
+
- lib/uri/idna/uts46/mapping.rb
|
33
|
+
- lib/uri/idna/uts46/options.rb
|
34
|
+
- lib/uri/idna/uts46/processing.rb
|
31
35
|
- lib/uri/idna/validation/bidi.rb
|
36
|
+
- lib/uri/idna/validation/codepoint.rb
|
37
|
+
- lib/uri/idna/validation/label.rb
|
32
38
|
- lib/uri/idna/version.rb
|
39
|
+
- lib/uri/idna/whatwg/processing.rb
|
33
40
|
homepage: https://github.com/skryukov/uri-idna
|
34
41
|
licenses:
|
35
42
|
- MIT
|
data/lib/uri/idna/process.rb
DELETED
@@ -1,139 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative "punycode"
|
4
|
-
require_relative "validation"
|
5
|
-
|
6
|
-
require_relative "uts46"
|
7
|
-
|
8
|
-
module URI
|
9
|
-
module IDNA
|
10
|
-
class Process
|
11
|
-
UNICODE_DOTS_REGEX = /[\u002e\u3002\uff0e\uff61]/.freeze
|
12
|
-
|
13
|
-
def initialize(**options)
|
14
|
-
@options = options
|
15
|
-
end
|
16
|
-
|
17
|
-
def register(alabel: nil, ulabel: nil)
|
18
|
-
raise ArgumentError, "Provide alabel or ulabel" if alabel.nil? && ulabel.nil?
|
19
|
-
|
20
|
-
return encode(ulabel) if alabel.nil?
|
21
|
-
|
22
|
-
raise ArgumentError, "String expected" unless alabel.is_a?(String)
|
23
|
-
raise Error, "Invalid alabel #{alabel}" unless alabel.start_with?(ALABEL_PREFIX)
|
24
|
-
|
25
|
-
process_labels(alabel) do |l|
|
26
|
-
to_alabel(l, roundtrip: true, ulabel: ulabel)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def lookup(s)
|
31
|
-
raise ArgumentError, "String expected" unless s.is_a?(String)
|
32
|
-
|
33
|
-
s = process_labels(s) do |l|
|
34
|
-
to_alabel(l, roundtrip: true)
|
35
|
-
end
|
36
|
-
validate_domain_length(s) if options.fetch(:dns_length, true)
|
37
|
-
s
|
38
|
-
end
|
39
|
-
|
40
|
-
def encode(s)
|
41
|
-
raise ArgumentError, "String expected" unless s.is_a?(String)
|
42
|
-
|
43
|
-
s = process_labels(s) { |l| to_alabel(l) }
|
44
|
-
validate_domain_length(s) if options.fetch(:dns_length, true)
|
45
|
-
s
|
46
|
-
end
|
47
|
-
|
48
|
-
def decode(s)
|
49
|
-
raise ArgumentError, "String expected" unless s.is_a?(String)
|
50
|
-
|
51
|
-
process_labels(s) { |l| to_ulabel(l) }
|
52
|
-
end
|
53
|
-
|
54
|
-
private
|
55
|
-
|
56
|
-
attr_reader :labels, :options
|
57
|
-
|
58
|
-
def splitter
|
59
|
-
@splitter ||= options.fetch(:uts46, false) ? "." : UNICODE_DOTS_REGEX
|
60
|
-
end
|
61
|
-
|
62
|
-
def process_labels(s)
|
63
|
-
s = UTS46.map_string(s, **options.slice(:uts46_std3, :uts46_transitional)) if options.fetch(:uts46, false)
|
64
|
-
@labels ||= s.split(splitter, -1)
|
65
|
-
trailing_dot = labels[-1] && labels[-1].empty? ? labels.pop : false
|
66
|
-
|
67
|
-
raise Error, "Empty domain" if labels.empty? || labels == [""]
|
68
|
-
|
69
|
-
result = []
|
70
|
-
labels.each do |label|
|
71
|
-
str = yield(label)
|
72
|
-
raise Error, "Empty label" if str.empty?
|
73
|
-
|
74
|
-
result << str
|
75
|
-
end
|
76
|
-
|
77
|
-
result << "" if trailing_dot
|
78
|
-
result.join(".")
|
79
|
-
end
|
80
|
-
|
81
|
-
def to_alabel(label, roundtrip: false, ulabel: nil)
|
82
|
-
orig_label = label
|
83
|
-
# validate label is a valid U-label
|
84
|
-
label = to_ulabel(label)
|
85
|
-
if ulabel && ulabel != label
|
86
|
-
raise Error, "Provided ulabel does not match conversion of alabel, #{ulabel.inspect} != #{label.inspect}"
|
87
|
-
end
|
88
|
-
|
89
|
-
label = encode_punycode_label(label) unless label.ascii_only?
|
90
|
-
validate_label_length(label)
|
91
|
-
|
92
|
-
if roundtrip && orig_label.ascii_only? && orig_label != label
|
93
|
-
raise Error, "Roundtrip encoding failed, #{orig_label.inspect} != #{label.inspect}"
|
94
|
-
end
|
95
|
-
|
96
|
-
label
|
97
|
-
end
|
98
|
-
|
99
|
-
# https://datatracker.ietf.org/doc/html/rfc5891#section-5.3
|
100
|
-
def to_ulabel(label)
|
101
|
-
decoded = false
|
102
|
-
label, decoded = decode_punycode_label(label) if label.ascii_only?
|
103
|
-
validation.call(label, decoded: decoded)
|
104
|
-
label
|
105
|
-
end
|
106
|
-
|
107
|
-
def encode_punycode_label(label)
|
108
|
-
ALABEL_PREFIX + Punycode.encode(label)
|
109
|
-
end
|
110
|
-
|
111
|
-
def decode_punycode_label(label)
|
112
|
-
label = label.downcase
|
113
|
-
return [label, false] unless label.start_with?(ALABEL_PREFIX)
|
114
|
-
|
115
|
-
code = label[ALABEL_PREFIX.length..]
|
116
|
-
raise Error, "Malformed A-label, no Punycode eligible content found" if code.empty?
|
117
|
-
raise Error, "A-label must not end with a hyphen" if code[-1] == "-"
|
118
|
-
|
119
|
-
[URI::IDNA::Punycode.decode(code), true]
|
120
|
-
end
|
121
|
-
|
122
|
-
def validate_label_length(label)
|
123
|
-
raise Error, "Label too long" unless label.length < 64
|
124
|
-
end
|
125
|
-
|
126
|
-
def validate_domain_length(s)
|
127
|
-
raise Error, "Domain too long" unless s.length < (s[-1] == "." ? 255 : 254)
|
128
|
-
end
|
129
|
-
|
130
|
-
def validation
|
131
|
-
@validation ||= Validation.new(options.merge(bidi: check_bidi?))
|
132
|
-
end
|
133
|
-
|
134
|
-
def check_bidi?
|
135
|
-
options.fetch(:bidi, true) && Validation::Bidi.check?(labels)
|
136
|
-
end
|
137
|
-
end
|
138
|
-
end
|
139
|
-
end
|
data/lib/uri/idna/uts46.rb
DELETED
@@ -1,60 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative "data/uts46"
|
4
|
-
|
5
|
-
module URI
|
6
|
-
module IDNA
|
7
|
-
module UTS46
|
8
|
-
class << self
|
9
|
-
# https://unicode.org/reports/tr46/#ProcessingStepMap
|
10
|
-
def map_string(domain, uts46_std3: true, uts46_transitional: false)
|
11
|
-
output = ""
|
12
|
-
domain.each_char do |char|
|
13
|
-
code_point = char.ord
|
14
|
-
_, status, replacement = uts46_status(code_point)
|
15
|
-
case status
|
16
|
-
when "I"
|
17
|
-
next
|
18
|
-
when "V"
|
19
|
-
output += char
|
20
|
-
when "M"
|
21
|
-
output += replacement
|
22
|
-
when "D"
|
23
|
-
output += uts46_transitional ? replacement : char
|
24
|
-
when "3"
|
25
|
-
if uts46_std3
|
26
|
-
raise InvalidCodepointError,
|
27
|
-
"Codepoint #{code_point} not allowed in #{domain} via STD3 rules"
|
28
|
-
end
|
29
|
-
|
30
|
-
output += replacement || char
|
31
|
-
else
|
32
|
-
raise InvalidCodepointError, "Codepoint #{code_point} not allowed in #{domain}"
|
33
|
-
end
|
34
|
-
end
|
35
|
-
output.unicode_normalize(:nfc)
|
36
|
-
end
|
37
|
-
|
38
|
-
def valid?(char, uts46_transitional: false)
|
39
|
-
_, status, = uts46_status(char.ord)
|
40
|
-
return true if status == "V"
|
41
|
-
return true if uts46_transitional && status == "D"
|
42
|
-
|
43
|
-
false
|
44
|
-
end
|
45
|
-
|
46
|
-
private
|
47
|
-
|
48
|
-
def uts46_status(code_point)
|
49
|
-
index =
|
50
|
-
if code_point < 256
|
51
|
-
code_point
|
52
|
-
else
|
53
|
-
(UTS46_DATA.bsearch_index { |x| x[0] > code_point } || UTS46_DATA.length) - 1
|
54
|
-
end
|
55
|
-
UTS46_DATA[index] || []
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
data/lib/uri/idna/validation.rb
DELETED
@@ -1,199 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative "intranges"
|
4
|
-
require_relative "data/idna"
|
5
|
-
require_relative "validation/bidi"
|
6
|
-
|
7
|
-
module URI
|
8
|
-
module IDNA
|
9
|
-
# U-label domain validation for IDNA and UTS46.
|
10
|
-
class Validation
|
11
|
-
# @param [Hash] params
|
12
|
-
# @option params [Boolean] :nfc Normalize to NFC (true by default)
|
13
|
-
# @option params [Boolean] :hyphen34 Hyphen restrictions (true by default)
|
14
|
-
# @option params [Boolean] :hyphen_sides Hyphen restrictions (true for the register protocol and UTS46)
|
15
|
-
# @option params [Boolean] :leading_combining Leading combining marks (true for the register protocol and UTS46)
|
16
|
-
# @option params [Boolean] :contextj Contextual rules CONTEXTJ (true by default)
|
17
|
-
# @option params [Boolean] :contexto Contextual rules CONTEXTO (true for IDNA2008 protocols)
|
18
|
-
# @option params [Boolean] :bidi Bidi rules (true by default)
|
19
|
-
# @option params [Boolean] :idna_validity IDNA2008 validity (true for IDNA2008 protocols)
|
20
|
-
# @option params [Boolean] :uts46 UTS46 validity (true for UTS46)
|
21
|
-
# @option params [Boolean] :uts46_transitional UTS46 transitional validity (false by default)
|
22
|
-
# @option params [Boolean] :check_dot Check for dots (true for UTS46)
|
23
|
-
#
|
24
|
-
def initialize(params)
|
25
|
-
@nfc = params.fetch(:nfc, true)
|
26
|
-
@hyphen34 = params.fetch(:hyphen34, true)
|
27
|
-
@hyphen_sides = params.fetch(:hyphen_sides, true)
|
28
|
-
|
29
|
-
# Contextual rules
|
30
|
-
@leading_combining = params.fetch(:leading_combining, true)
|
31
|
-
@contextj = params.fetch(:contextj, true)
|
32
|
-
@contexto = params.fetch(:contexto, true)
|
33
|
-
@bidi = params.fetch(:bidi, true)
|
34
|
-
# IDNA2008 specific
|
35
|
-
@idna_validity = params.fetch(:idna_validity, true)
|
36
|
-
|
37
|
-
# UTS46 specific
|
38
|
-
@uts46 = params.fetch(:uts46, false)
|
39
|
-
@uts46_transitional = params.fetch(:uts46_transitional, false)
|
40
|
-
@check_dot = params.fetch(:check_dot, false)
|
41
|
-
end
|
42
|
-
|
43
|
-
def call(label, decoded: false)
|
44
|
-
raise Error, "Empty label" if label.empty?
|
45
|
-
|
46
|
-
check_nfc(label) if @nfc
|
47
|
-
check_hyphen34(label) if @hyphen34
|
48
|
-
check_hyphen_sides(label) if @hyphen_sides
|
49
|
-
check_leading_combining(label) if @leading_combining
|
50
|
-
check_dot(label) if @check_dot
|
51
|
-
label.each_char.with_index do |cp, pos|
|
52
|
-
next if codepoint?(cp, "PVALID")
|
53
|
-
|
54
|
-
if @contextj && codepoint?(cp, "CONTEXTJ")
|
55
|
-
next if valid_contextj?(label, pos)
|
56
|
-
|
57
|
-
raise InvalidCodepointContextError, cp_error_message(cp, label, pos)
|
58
|
-
end
|
59
|
-
|
60
|
-
if @contexto && codepoint?(cp, "CONTEXTO")
|
61
|
-
next if valid_contexto?(label, pos)
|
62
|
-
|
63
|
-
raise InvalidCodepointContextError, cp_error_message(cp, label, pos)
|
64
|
-
end
|
65
|
-
|
66
|
-
# 4.2.2. Rejection of Characters That Are Not Permitted
|
67
|
-
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.2
|
68
|
-
raise InvalidCodepointError, cp_error_message(cp, label, pos) if @idna_validity
|
69
|
-
|
70
|
-
if @uts46 && !UTS46.valid?(cp, uts46_transitional: @uts46_transitional && !decoded)
|
71
|
-
raise InvalidCodepointError, cp_error_message(cp, label, pos)
|
72
|
-
end
|
73
|
-
end
|
74
|
-
Bidi.call(label) if @bidi
|
75
|
-
end
|
76
|
-
|
77
|
-
private
|
78
|
-
|
79
|
-
# 4.1. Input to IDNA Registration
|
80
|
-
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.1
|
81
|
-
def check_nfc(label)
|
82
|
-
return true if label.unicode_normalized?(:nfc)
|
83
|
-
|
84
|
-
raise Error, "Label must be in Normalization Form C"
|
85
|
-
end
|
86
|
-
|
87
|
-
# 4.2.3.1. Hyphen Restrictions
|
88
|
-
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
|
89
|
-
def check_hyphen34(label)
|
90
|
-
return unless label[2..3] == "--"
|
91
|
-
|
92
|
-
raise Error, "Label has disallowed hyphens in 3rd and 4th position"
|
93
|
-
end
|
94
|
-
|
95
|
-
# 4.2.3.1. Hyphen Restrictions
|
96
|
-
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
|
97
|
-
def check_hyphen_sides(label)
|
98
|
-
return unless label[0] == "-" || label[-1] == "-"
|
99
|
-
|
100
|
-
raise Error, "Label must not start or end with a hyphen"
|
101
|
-
end
|
102
|
-
|
103
|
-
# 4.2.3.2. Leading Combining Marks
|
104
|
-
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
|
105
|
-
def check_leading_combining(label)
|
106
|
-
return unless Intranges.contain?(label[0].ord, INITIAL_COMBINERS)
|
107
|
-
|
108
|
-
raise Error, "Label begins with an illegal combining character"
|
109
|
-
end
|
110
|
-
|
111
|
-
def check_dot(label)
|
112
|
-
raise Error, "Label must not contain dots" if label.include?(".")
|
113
|
-
end
|
114
|
-
|
115
|
-
def valid_contexto?(label, pos)
|
116
|
-
cp_value = label[pos].ord
|
117
|
-
case cp_value
|
118
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
|
119
|
-
when 0x00b7
|
120
|
-
pos > 0 && pos < label.length - 1 ? (label[pos - 1].ord == 0x006c && label[pos + 1].ord == 0x006c) : false
|
121
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
|
122
|
-
when 0x0375
|
123
|
-
pos < label.length - 1 ? script?(label[pos + 1], "Greek") : false
|
124
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
|
125
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
|
126
|
-
when 0x05f3, 0x05f4
|
127
|
-
pos > 0 ? script?(label[pos - 1], "Hebrew") : false
|
128
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
|
129
|
-
when 0x30fb
|
130
|
-
label.each_char do |cp|
|
131
|
-
next if cp.ord == 0x30fb
|
132
|
-
return true if script?(cp, "Hiragana") || script?(cp, "Katakana") || script?(cp, "Han")
|
133
|
-
end
|
134
|
-
false
|
135
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
|
136
|
-
when 0x0660..0x0669
|
137
|
-
label.each_char do |cp|
|
138
|
-
return false if cp.ord >= 0x06f0 && cp.ord <= 0x06f9
|
139
|
-
end
|
140
|
-
true
|
141
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
|
142
|
-
when 0x06f0..0x06f9
|
143
|
-
label.each_char do |cp|
|
144
|
-
return false if cp.ord >= 0x0660 && cp.ord <= 0x0669
|
145
|
-
end
|
146
|
-
true
|
147
|
-
else
|
148
|
-
false
|
149
|
-
end
|
150
|
-
end
|
151
|
-
|
152
|
-
def valid_contextj?(label, pos)
|
153
|
-
case label[pos].ord
|
154
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
|
155
|
-
when 0x200c
|
156
|
-
return true if pos > 0 && virama_combining_class?(label[pos - 1])
|
157
|
-
|
158
|
-
ok = false
|
159
|
-
(pos - 1).downto(0) do |i|
|
160
|
-
joining_type = JOINING_TYPES[label[i].ord]
|
161
|
-
next if joining_type == 0x54
|
162
|
-
|
163
|
-
if [0x4c, 0x44].include?(joining_type)
|
164
|
-
ok = true
|
165
|
-
break
|
166
|
-
end
|
167
|
-
end
|
168
|
-
return false unless ok
|
169
|
-
|
170
|
-
(pos + 1).upto(label.length - 1) do |i|
|
171
|
-
joining_type = JOINING_TYPES[label[i].ord]
|
172
|
-
next if joining_type == 0x54
|
173
|
-
return true if [0x52, 0x44].include?(joining_type)
|
174
|
-
end
|
175
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
|
176
|
-
when 0x200d
|
177
|
-
return virama_combining_class?(label[pos - 1]) if pos > 0
|
178
|
-
end
|
179
|
-
false
|
180
|
-
end
|
181
|
-
|
182
|
-
def codepoint?(cp, class_name)
|
183
|
-
Intranges.contain?(cp.ord, CODEPOINT_CLASSES[class_name])
|
184
|
-
end
|
185
|
-
|
186
|
-
def script?(cp, script)
|
187
|
-
Intranges.contain?(cp.ord, SCRIPTS[script])
|
188
|
-
end
|
189
|
-
|
190
|
-
def virama_combining_class?(cp)
|
191
|
-
Intranges.contain?(cp.ord, VIRAMA_COMBINING_CLASSES)
|
192
|
-
end
|
193
|
-
|
194
|
-
def cp_error_message(cp, label, pos)
|
195
|
-
format("Codepoint U+%04X at position %d of %p not allowed", cp.ord, pos + 1, label)
|
196
|
-
end
|
197
|
-
end
|
198
|
-
end
|
199
|
-
end
|