uri-idna 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -1
- data/README.md +149 -45
- data/lib/uri/idna/base_processing.rb +65 -0
- data/lib/uri/idna/data/idna.rb +11 -6
- data/lib/uri/idna/data/uts46.rb +4 -6
- data/lib/uri/idna/idna2008/options.rb +59 -0
- data/lib/uri/idna/idna2008/processing.rb +158 -0
- data/lib/uri/idna/intranges.rb +12 -4
- data/lib/uri/idna/punycode.rb +11 -15
- data/lib/uri/idna/uts46/mapping.rb +61 -0
- data/lib/uri/idna/uts46/options.rb +75 -0
- data/lib/uri/idna/uts46/processing.rb +98 -0
- data/lib/uri/idna/validation/bidi.rb +14 -13
- data/lib/uri/idna/validation/codepoint.rb +122 -0
- data/lib/uri/idna/validation/label.rb +70 -0
- data/lib/uri/idna/version.rb +1 -1
- data/lib/uri/idna/whatwg/processing.rb +35 -0
- data/lib/uri/idna.rb +30 -24
- data/lib/uri-idna.rb +3 -0
- metadata +12 -5
- data/lib/uri/idna/process.rb +0 -139
- data/lib/uri/idna/uts46.rb +0 -60
- data/lib/uri/idna/validation.rb +0 -199
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../intranges"
|
4
|
+
require_relative "../data/idna"
|
5
|
+
|
6
|
+
module URI
|
7
|
+
module IDNA
|
8
|
+
module Validation
|
9
|
+
module Label
|
10
|
+
class << self
|
11
|
+
# 4.1. Input to IDNA Registration
|
12
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.1
|
13
|
+
def check_nfc(label)
|
14
|
+
return if label.unicode_normalized?(:nfc)
|
15
|
+
|
16
|
+
raise Error, "Label must be in Unicode Normalization Form NFC"
|
17
|
+
end
|
18
|
+
|
19
|
+
# 4.2.3.1. Hyphen Restrictions
|
20
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
|
21
|
+
def check_hyphen34(label)
|
22
|
+
return if label[2..3] != "--"
|
23
|
+
|
24
|
+
raise Error, "Label must not contain a U+002D HYPHEN-MINUS character in both the third and fourth positions"
|
25
|
+
end
|
26
|
+
|
27
|
+
# 4.2.3.1. Hyphen Restrictions
|
28
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
|
29
|
+
def check_hyphen_sides(label)
|
30
|
+
return unless label[0] == "-" || label[-1] == "-"
|
31
|
+
|
32
|
+
raise Error, "Label must neither begin nor end with a U+002D HYPHEN-MINUS character"
|
33
|
+
end
|
34
|
+
|
35
|
+
def check_ace_prefix(label)
|
36
|
+
return unless label.start_with?(ACE_PREFIX)
|
37
|
+
|
38
|
+
raise Error, "Label must not begin with `xn--`"
|
39
|
+
end
|
40
|
+
|
41
|
+
# 4.2.3.2. Leading Combining Marks
|
42
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
|
43
|
+
def check_leading_combining(label)
|
44
|
+
return unless Intranges.contain?(label[0].ord, INITIAL_COMBINERS)
|
45
|
+
|
46
|
+
raise Error, "Label begins with an illegal combining character"
|
47
|
+
end
|
48
|
+
|
49
|
+
def check_dot(label)
|
50
|
+
raise Error, "Label must not contain a U+002E ( . ) FULL STOP" if label.include?(".")
|
51
|
+
end
|
52
|
+
|
53
|
+
# DNS label size limit
|
54
|
+
# See Processing step 4.2
|
55
|
+
# https://www.unicode.org/reports/tr46/#ToASCII
|
56
|
+
def check_length(label)
|
57
|
+
raise Error, "Label too long" unless label.length < 64
|
58
|
+
end
|
59
|
+
|
60
|
+
# DNS name size limit
|
61
|
+
# See Processing step 4.1
|
62
|
+
# https://www.unicode.org/reports/tr46/#ToASCII
|
63
|
+
def check_domain_length(domain_name)
|
64
|
+
raise Error, "Domain too long" unless domain_name.length < (domain_name[-1] == "." ? 255 : 254)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
data/lib/uri/idna/version.rb
CHANGED
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module URI
|
4
|
+
module IDNA
|
5
|
+
# https://url.spec.whatwg.org/#idna
|
6
|
+
module WHATWG
|
7
|
+
class ToASCII < UTS46::ToASCII
|
8
|
+
def initialize(domain_name, be_strict: true)
|
9
|
+
super(
|
10
|
+
domain_name,
|
11
|
+
use_std3_ascii_rules: be_strict,
|
12
|
+
check_hyphens: false,
|
13
|
+
check_bidi: true,
|
14
|
+
check_joiners: true,
|
15
|
+
transitional_processing: false,
|
16
|
+
verify_dns_length: be_strict,
|
17
|
+
)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class ToUnicode < UTS46::ToUnicode
|
22
|
+
def initialize(domain_name, be_strict: true)
|
23
|
+
super(
|
24
|
+
domain_name,
|
25
|
+
use_std3_ascii_rules: be_strict,
|
26
|
+
check_hyphens: false,
|
27
|
+
check_bidi: true,
|
28
|
+
check_joiners: true,
|
29
|
+
transitional_processing: false,
|
30
|
+
)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/uri/idna.rb
CHANGED
@@ -1,11 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative "idna/version"
|
4
|
-
require_relative "idna/
|
4
|
+
require_relative "idna/punycode"
|
5
|
+
require_relative "idna/base_processing"
|
6
|
+
require_relative "idna/idna2008/processing"
|
7
|
+
require_relative "idna/uts46/processing"
|
8
|
+
require_relative "idna/whatwg/processing"
|
5
9
|
|
6
10
|
module URI
|
7
11
|
module IDNA
|
8
|
-
|
12
|
+
ACE_PREFIX = "xn--"
|
9
13
|
|
10
14
|
class Error < StandardError; end
|
11
15
|
|
@@ -22,38 +26,40 @@ module URI
|
|
22
26
|
class PunycodeError < Error; end
|
23
27
|
|
24
28
|
class << self
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
uts46_std3: true,
|
30
|
-
uts46_transitional: false,
|
31
|
-
contexto: false,
|
32
|
-
}.freeze
|
33
|
-
|
34
|
-
LOOKUP_PARAMS = {
|
35
|
-
hyphen_sides: false,
|
36
|
-
leading_combining: false,
|
37
|
-
}.freeze
|
38
|
-
|
39
|
-
def lookup(s, **params)
|
40
|
-
Process.new(**LOOKUP_PARAMS.merge(params)).lookup(s)
|
29
|
+
# IDNA2008 Lookup protocol defined in RFC 5891
|
30
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-5
|
31
|
+
def lookup(domain_name, **options)
|
32
|
+
IDNA2008::Lookup.new(domain_name, **options).call
|
41
33
|
end
|
42
34
|
|
43
|
-
|
44
|
-
|
35
|
+
# IDNA2008 Registration protocol defined in RFC 5891
|
36
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4
|
37
|
+
def register(alabel: nil, ulabel: nil, **options)
|
38
|
+
IDNA2008::Registration.new(alabel: alabel, ulabel: ulabel, **options).call
|
45
39
|
end
|
46
40
|
|
47
41
|
# UTS46 ToUnicode process
|
48
42
|
# https://unicode.org/reports/tr46/#ToUnicode
|
49
|
-
def to_unicode(
|
50
|
-
|
43
|
+
def to_unicode(domain_name, **options)
|
44
|
+
UTS46::ToUnicode.new(domain_name, **options).call
|
51
45
|
end
|
52
46
|
|
53
47
|
# UTS46 ToASCII process
|
54
48
|
# https://unicode.org/reports/tr46/#ToASCII
|
55
|
-
def to_ascii(
|
56
|
-
|
49
|
+
def to_ascii(domain_name, **options)
|
50
|
+
UTS46::ToASCII.new(domain_name, **options).call
|
51
|
+
end
|
52
|
+
|
53
|
+
# WHATWG URL Standard domain to ASCII algorithm
|
54
|
+
# https://url.spec.whatwg.org/#idna
|
55
|
+
def whatwg_to_unicode(domain_name, **options)
|
56
|
+
WHATWG::ToUnicode.new(domain_name, **options).call
|
57
|
+
end
|
58
|
+
|
59
|
+
# WHATWG URL Standard domain to Unicode algorithm
|
60
|
+
# https://url.spec.whatwg.org/#idna
|
61
|
+
def whatwg_to_ascii(domain_name, **options)
|
62
|
+
WHATWG::ToASCII.new(domain_name, **options).call
|
57
63
|
end
|
58
64
|
end
|
59
65
|
end
|
data/lib/uri-idna.rb
ADDED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uri-idna
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Svyatoslav Kryukov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-11-14 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Internationalized Domain Names in Applications (IDNA)
|
14
14
|
email:
|
@@ -20,16 +20,23 @@ files:
|
|
20
20
|
- CHANGELOG.md
|
21
21
|
- LICENSE.txt
|
22
22
|
- README.md
|
23
|
+
- lib/uri-idna.rb
|
23
24
|
- lib/uri/idna.rb
|
25
|
+
- lib/uri/idna/base_processing.rb
|
24
26
|
- lib/uri/idna/data/idna.rb
|
25
27
|
- lib/uri/idna/data/uts46.rb
|
28
|
+
- lib/uri/idna/idna2008/options.rb
|
29
|
+
- lib/uri/idna/idna2008/processing.rb
|
26
30
|
- lib/uri/idna/intranges.rb
|
27
|
-
- lib/uri/idna/process.rb
|
28
31
|
- lib/uri/idna/punycode.rb
|
29
|
-
- lib/uri/idna/uts46.rb
|
30
|
-
- lib/uri/idna/
|
32
|
+
- lib/uri/idna/uts46/mapping.rb
|
33
|
+
- lib/uri/idna/uts46/options.rb
|
34
|
+
- lib/uri/idna/uts46/processing.rb
|
31
35
|
- lib/uri/idna/validation/bidi.rb
|
36
|
+
- lib/uri/idna/validation/codepoint.rb
|
37
|
+
- lib/uri/idna/validation/label.rb
|
32
38
|
- lib/uri/idna/version.rb
|
39
|
+
- lib/uri/idna/whatwg/processing.rb
|
33
40
|
homepage: https://github.com/skryukov/uri-idna
|
34
41
|
licenses:
|
35
42
|
- MIT
|
data/lib/uri/idna/process.rb
DELETED
@@ -1,139 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative "punycode"
|
4
|
-
require_relative "validation"
|
5
|
-
|
6
|
-
require_relative "uts46"
|
7
|
-
|
8
|
-
module URI
|
9
|
-
module IDNA
|
10
|
-
class Process
|
11
|
-
UNICODE_DOTS_REGEX = /[\u002e\u3002\uff0e\uff61]/.freeze
|
12
|
-
|
13
|
-
def initialize(**options)
|
14
|
-
@options = options
|
15
|
-
end
|
16
|
-
|
17
|
-
def register(alabel: nil, ulabel: nil)
|
18
|
-
raise ArgumentError, "Provide alabel or ulabel" if alabel.nil? && ulabel.nil?
|
19
|
-
|
20
|
-
return encode(ulabel) if alabel.nil?
|
21
|
-
|
22
|
-
raise ArgumentError, "String expected" unless alabel.is_a?(String)
|
23
|
-
raise Error, "Invalid alabel #{alabel}" unless alabel.start_with?(ALABEL_PREFIX)
|
24
|
-
|
25
|
-
process_labels(alabel) do |l|
|
26
|
-
to_alabel(l, roundtrip: true, ulabel: ulabel)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def lookup(s)
|
31
|
-
raise ArgumentError, "String expected" unless s.is_a?(String)
|
32
|
-
|
33
|
-
s = process_labels(s) do |l|
|
34
|
-
to_alabel(l, roundtrip: true)
|
35
|
-
end
|
36
|
-
validate_domain_length(s) if options.fetch(:dns_length, true)
|
37
|
-
s
|
38
|
-
end
|
39
|
-
|
40
|
-
def encode(s)
|
41
|
-
raise ArgumentError, "String expected" unless s.is_a?(String)
|
42
|
-
|
43
|
-
s = process_labels(s) { |l| to_alabel(l) }
|
44
|
-
validate_domain_length(s) if options.fetch(:dns_length, true)
|
45
|
-
s
|
46
|
-
end
|
47
|
-
|
48
|
-
def decode(s)
|
49
|
-
raise ArgumentError, "String expected" unless s.is_a?(String)
|
50
|
-
|
51
|
-
process_labels(s) { |l| to_ulabel(l) }
|
52
|
-
end
|
53
|
-
|
54
|
-
private
|
55
|
-
|
56
|
-
attr_reader :labels, :options
|
57
|
-
|
58
|
-
def splitter
|
59
|
-
@splitter ||= options.fetch(:uts46, false) ? "." : UNICODE_DOTS_REGEX
|
60
|
-
end
|
61
|
-
|
62
|
-
def process_labels(s)
|
63
|
-
s = UTS46.map_string(s, **options.slice(:uts46_std3, :uts46_transitional)) if options.fetch(:uts46, false)
|
64
|
-
@labels ||= s.split(splitter, -1)
|
65
|
-
trailing_dot = labels[-1] && labels[-1].empty? ? labels.pop : false
|
66
|
-
|
67
|
-
raise Error, "Empty domain" if labels.empty? || labels == [""]
|
68
|
-
|
69
|
-
result = []
|
70
|
-
labels.each do |label|
|
71
|
-
str = yield(label)
|
72
|
-
raise Error, "Empty label" if str.empty?
|
73
|
-
|
74
|
-
result << str
|
75
|
-
end
|
76
|
-
|
77
|
-
result << "" if trailing_dot
|
78
|
-
result.join(".")
|
79
|
-
end
|
80
|
-
|
81
|
-
def to_alabel(label, roundtrip: false, ulabel: nil)
|
82
|
-
orig_label = label
|
83
|
-
# validate label is a valid U-label
|
84
|
-
label = to_ulabel(label)
|
85
|
-
if ulabel && ulabel != label
|
86
|
-
raise Error, "Provided ulabel does not match conversion of alabel, #{ulabel.inspect} != #{label.inspect}"
|
87
|
-
end
|
88
|
-
|
89
|
-
label = encode_punycode_label(label) unless label.ascii_only?
|
90
|
-
validate_label_length(label)
|
91
|
-
|
92
|
-
if roundtrip && orig_label.ascii_only? && orig_label != label
|
93
|
-
raise Error, "Roundtrip encoding failed, #{orig_label.inspect} != #{label.inspect}"
|
94
|
-
end
|
95
|
-
|
96
|
-
label
|
97
|
-
end
|
98
|
-
|
99
|
-
# https://datatracker.ietf.org/doc/html/rfc5891#section-5.3
|
100
|
-
def to_ulabel(label)
|
101
|
-
decoded = false
|
102
|
-
label, decoded = decode_punycode_label(label) if label.ascii_only?
|
103
|
-
validation.call(label, decoded: decoded)
|
104
|
-
label
|
105
|
-
end
|
106
|
-
|
107
|
-
def encode_punycode_label(label)
|
108
|
-
ALABEL_PREFIX + Punycode.encode(label)
|
109
|
-
end
|
110
|
-
|
111
|
-
def decode_punycode_label(label)
|
112
|
-
label = label.downcase
|
113
|
-
return [label, false] unless label.start_with?(ALABEL_PREFIX)
|
114
|
-
|
115
|
-
code = label[ALABEL_PREFIX.length..]
|
116
|
-
raise Error, "Malformed A-label, no Punycode eligible content found" if code.empty?
|
117
|
-
raise Error, "A-label must not end with a hyphen" if code[-1] == "-"
|
118
|
-
|
119
|
-
[URI::IDNA::Punycode.decode(code), true]
|
120
|
-
end
|
121
|
-
|
122
|
-
def validate_label_length(label)
|
123
|
-
raise Error, "Label too long" unless label.length < 64
|
124
|
-
end
|
125
|
-
|
126
|
-
def validate_domain_length(s)
|
127
|
-
raise Error, "Domain too long" unless s.length < (s[-1] == "." ? 255 : 254)
|
128
|
-
end
|
129
|
-
|
130
|
-
def validation
|
131
|
-
@validation ||= Validation.new(options.merge(bidi: check_bidi?))
|
132
|
-
end
|
133
|
-
|
134
|
-
def check_bidi?
|
135
|
-
options.fetch(:bidi, true) && Validation::Bidi.check?(labels)
|
136
|
-
end
|
137
|
-
end
|
138
|
-
end
|
139
|
-
end
|
data/lib/uri/idna/uts46.rb
DELETED
@@ -1,60 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative "data/uts46"
|
4
|
-
|
5
|
-
module URI
|
6
|
-
module IDNA
|
7
|
-
module UTS46
|
8
|
-
class << self
|
9
|
-
# https://unicode.org/reports/tr46/#ProcessingStepMap
|
10
|
-
def map_string(domain, uts46_std3: true, uts46_transitional: false)
|
11
|
-
output = ""
|
12
|
-
domain.each_char do |char|
|
13
|
-
code_point = char.ord
|
14
|
-
_, status, replacement = uts46_status(code_point)
|
15
|
-
case status
|
16
|
-
when "I"
|
17
|
-
next
|
18
|
-
when "V"
|
19
|
-
output += char
|
20
|
-
when "M"
|
21
|
-
output += replacement
|
22
|
-
when "D"
|
23
|
-
output += uts46_transitional ? replacement : char
|
24
|
-
when "3"
|
25
|
-
if uts46_std3
|
26
|
-
raise InvalidCodepointError,
|
27
|
-
"Codepoint #{code_point} not allowed in #{domain} via STD3 rules"
|
28
|
-
end
|
29
|
-
|
30
|
-
output += replacement || char
|
31
|
-
else
|
32
|
-
raise InvalidCodepointError, "Codepoint #{code_point} not allowed in #{domain}"
|
33
|
-
end
|
34
|
-
end
|
35
|
-
output.unicode_normalize(:nfc)
|
36
|
-
end
|
37
|
-
|
38
|
-
def valid?(char, uts46_transitional: false)
|
39
|
-
_, status, = uts46_status(char.ord)
|
40
|
-
return true if status == "V"
|
41
|
-
return true if uts46_transitional && status == "D"
|
42
|
-
|
43
|
-
false
|
44
|
-
end
|
45
|
-
|
46
|
-
private
|
47
|
-
|
48
|
-
def uts46_status(code_point)
|
49
|
-
index =
|
50
|
-
if code_point < 256
|
51
|
-
code_point
|
52
|
-
else
|
53
|
-
(UTS46_DATA.bsearch_index { |x| x[0] > code_point } || UTS46_DATA.length) - 1
|
54
|
-
end
|
55
|
-
UTS46_DATA[index] || []
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
data/lib/uri/idna/validation.rb
DELETED
@@ -1,199 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative "intranges"
|
4
|
-
require_relative "data/idna"
|
5
|
-
require_relative "validation/bidi"
|
6
|
-
|
7
|
-
module URI
|
8
|
-
module IDNA
|
9
|
-
# U-label domain validation for IDNA and UTS46.
|
10
|
-
class Validation
|
11
|
-
# @param [Hash] params
|
12
|
-
# @option params [Boolean] :nfc Normalize to NFC (true by default)
|
13
|
-
# @option params [Boolean] :hyphen34 Hyphen restrictions (true by default)
|
14
|
-
# @option params [Boolean] :hyphen_sides Hyphen restrictions (true for the register protocol and UTS46)
|
15
|
-
# @option params [Boolean] :leading_combining Leading combining marks (true for the register protocol and UTS46)
|
16
|
-
# @option params [Boolean] :contextj Contextual rules CONTEXTJ (true by default)
|
17
|
-
# @option params [Boolean] :contexto Contextual rules CONTEXTO (true for IDNA2008 protocols)
|
18
|
-
# @option params [Boolean] :bidi Bidi rules (true by default)
|
19
|
-
# @option params [Boolean] :idna_validity IDNA2008 validity (true for IDNA2008 protocols)
|
20
|
-
# @option params [Boolean] :uts46 UTS46 validity (true for UTS46)
|
21
|
-
# @option params [Boolean] :uts46_transitional UTS46 transitional validity (false by default)
|
22
|
-
# @option params [Boolean] :check_dot Check for dots (true for UTS46)
|
23
|
-
#
|
24
|
-
def initialize(params)
|
25
|
-
@nfc = params.fetch(:nfc, true)
|
26
|
-
@hyphen34 = params.fetch(:hyphen34, true)
|
27
|
-
@hyphen_sides = params.fetch(:hyphen_sides, true)
|
28
|
-
|
29
|
-
# Contextual rules
|
30
|
-
@leading_combining = params.fetch(:leading_combining, true)
|
31
|
-
@contextj = params.fetch(:contextj, true)
|
32
|
-
@contexto = params.fetch(:contexto, true)
|
33
|
-
@bidi = params.fetch(:bidi, true)
|
34
|
-
# IDNA2008 specific
|
35
|
-
@idna_validity = params.fetch(:idna_validity, true)
|
36
|
-
|
37
|
-
# UTS46 specific
|
38
|
-
@uts46 = params.fetch(:uts46, false)
|
39
|
-
@uts46_transitional = params.fetch(:uts46_transitional, false)
|
40
|
-
@check_dot = params.fetch(:check_dot, false)
|
41
|
-
end
|
42
|
-
|
43
|
-
def call(label, decoded: false)
|
44
|
-
raise Error, "Empty label" if label.empty?
|
45
|
-
|
46
|
-
check_nfc(label) if @nfc
|
47
|
-
check_hyphen34(label) if @hyphen34
|
48
|
-
check_hyphen_sides(label) if @hyphen_sides
|
49
|
-
check_leading_combining(label) if @leading_combining
|
50
|
-
check_dot(label) if @check_dot
|
51
|
-
label.each_char.with_index do |cp, pos|
|
52
|
-
next if codepoint?(cp, "PVALID")
|
53
|
-
|
54
|
-
if @contextj && codepoint?(cp, "CONTEXTJ")
|
55
|
-
next if valid_contextj?(label, pos)
|
56
|
-
|
57
|
-
raise InvalidCodepointContextError, cp_error_message(cp, label, pos)
|
58
|
-
end
|
59
|
-
|
60
|
-
if @contexto && codepoint?(cp, "CONTEXTO")
|
61
|
-
next if valid_contexto?(label, pos)
|
62
|
-
|
63
|
-
raise InvalidCodepointContextError, cp_error_message(cp, label, pos)
|
64
|
-
end
|
65
|
-
|
66
|
-
# 4.2.2. Rejection of Characters That Are Not Permitted
|
67
|
-
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.2
|
68
|
-
raise InvalidCodepointError, cp_error_message(cp, label, pos) if @idna_validity
|
69
|
-
|
70
|
-
if @uts46 && !UTS46.valid?(cp, uts46_transitional: @uts46_transitional && !decoded)
|
71
|
-
raise InvalidCodepointError, cp_error_message(cp, label, pos)
|
72
|
-
end
|
73
|
-
end
|
74
|
-
Bidi.call(label) if @bidi
|
75
|
-
end
|
76
|
-
|
77
|
-
private
|
78
|
-
|
79
|
-
# 4.1. Input to IDNA Registration
|
80
|
-
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.1
|
81
|
-
def check_nfc(label)
|
82
|
-
return true if label.unicode_normalized?(:nfc)
|
83
|
-
|
84
|
-
raise Error, "Label must be in Normalization Form C"
|
85
|
-
end
|
86
|
-
|
87
|
-
# 4.2.3.1. Hyphen Restrictions
|
88
|
-
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
|
89
|
-
def check_hyphen34(label)
|
90
|
-
return unless label[2..3] == "--"
|
91
|
-
|
92
|
-
raise Error, "Label has disallowed hyphens in 3rd and 4th position"
|
93
|
-
end
|
94
|
-
|
95
|
-
# 4.2.3.1. Hyphen Restrictions
|
96
|
-
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
|
97
|
-
def check_hyphen_sides(label)
|
98
|
-
return unless label[0] == "-" || label[-1] == "-"
|
99
|
-
|
100
|
-
raise Error, "Label must not start or end with a hyphen"
|
101
|
-
end
|
102
|
-
|
103
|
-
# 4.2.3.2. Leading Combining Marks
|
104
|
-
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
|
105
|
-
def check_leading_combining(label)
|
106
|
-
return unless Intranges.contain?(label[0].ord, INITIAL_COMBINERS)
|
107
|
-
|
108
|
-
raise Error, "Label begins with an illegal combining character"
|
109
|
-
end
|
110
|
-
|
111
|
-
def check_dot(label)
|
112
|
-
raise Error, "Label must not contain dots" if label.include?(".")
|
113
|
-
end
|
114
|
-
|
115
|
-
def valid_contexto?(label, pos)
|
116
|
-
cp_value = label[pos].ord
|
117
|
-
case cp_value
|
118
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
|
119
|
-
when 0x00b7
|
120
|
-
pos > 0 && pos < label.length - 1 ? (label[pos - 1].ord == 0x006c && label[pos + 1].ord == 0x006c) : false
|
121
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
|
122
|
-
when 0x0375
|
123
|
-
pos < label.length - 1 ? script?(label[pos + 1], "Greek") : false
|
124
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
|
125
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
|
126
|
-
when 0x05f3, 0x05f4
|
127
|
-
pos > 0 ? script?(label[pos - 1], "Hebrew") : false
|
128
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
|
129
|
-
when 0x30fb
|
130
|
-
label.each_char do |cp|
|
131
|
-
next if cp.ord == 0x30fb
|
132
|
-
return true if script?(cp, "Hiragana") || script?(cp, "Katakana") || script?(cp, "Han")
|
133
|
-
end
|
134
|
-
false
|
135
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
|
136
|
-
when 0x0660..0x0669
|
137
|
-
label.each_char do |cp|
|
138
|
-
return false if cp.ord >= 0x06f0 && cp.ord <= 0x06f9
|
139
|
-
end
|
140
|
-
true
|
141
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
|
142
|
-
when 0x06f0..0x06f9
|
143
|
-
label.each_char do |cp|
|
144
|
-
return false if cp.ord >= 0x0660 && cp.ord <= 0x0669
|
145
|
-
end
|
146
|
-
true
|
147
|
-
else
|
148
|
-
false
|
149
|
-
end
|
150
|
-
end
|
151
|
-
|
152
|
-
def valid_contextj?(label, pos)
|
153
|
-
case label[pos].ord
|
154
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
|
155
|
-
when 0x200c
|
156
|
-
return true if pos > 0 && virama_combining_class?(label[pos - 1])
|
157
|
-
|
158
|
-
ok = false
|
159
|
-
(pos - 1).downto(0) do |i|
|
160
|
-
joining_type = JOINING_TYPES[label[i].ord]
|
161
|
-
next if joining_type == 0x54
|
162
|
-
|
163
|
-
if [0x4c, 0x44].include?(joining_type)
|
164
|
-
ok = true
|
165
|
-
break
|
166
|
-
end
|
167
|
-
end
|
168
|
-
return false unless ok
|
169
|
-
|
170
|
-
(pos + 1).upto(label.length - 1) do |i|
|
171
|
-
joining_type = JOINING_TYPES[label[i].ord]
|
172
|
-
next if joining_type == 0x54
|
173
|
-
return true if [0x52, 0x44].include?(joining_type)
|
174
|
-
end
|
175
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
|
176
|
-
when 0x200d
|
177
|
-
return virama_combining_class?(label[pos - 1]) if pos > 0
|
178
|
-
end
|
179
|
-
false
|
180
|
-
end
|
181
|
-
|
182
|
-
def codepoint?(cp, class_name)
|
183
|
-
Intranges.contain?(cp.ord, CODEPOINT_CLASSES[class_name])
|
184
|
-
end
|
185
|
-
|
186
|
-
def script?(cp, script)
|
187
|
-
Intranges.contain?(cp.ord, SCRIPTS[script])
|
188
|
-
end
|
189
|
-
|
190
|
-
def virama_combining_class?(cp)
|
191
|
-
Intranges.contain?(cp.ord, VIRAMA_COMBINING_CLASSES)
|
192
|
-
end
|
193
|
-
|
194
|
-
def cp_error_message(cp, label, pos)
|
195
|
-
format("Codepoint U+%04X at position %d of %p not allowed", cp.ord, pos + 1, label)
|
196
|
-
end
|
197
|
-
end
|
198
|
-
end
|
199
|
-
end
|