uri-idna 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +22 -0
- data/LICENSE.txt +21 -0
- data/README.md +184 -0
- data/lib/uri/idna/data/idna.rb +4692 -0
- data/lib/uri/idna/data/uts46.rb +8190 -0
- data/lib/uri/idna/intranges.rb +49 -0
- data/lib/uri/idna/process.rb +139 -0
- data/lib/uri/idna/punycode.rb +174 -0
- data/lib/uri/idna/uts46.rb +60 -0
- data/lib/uri/idna/validation/bidi.rb +93 -0
- data/lib/uri/idna/validation.rb +199 -0
- data/lib/uri/idna/version.rb +7 -0
- data/lib/uri/idna.rb +60 -0
- metadata +62 -0
@@ -0,0 +1,199 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "intranges"
|
4
|
+
require_relative "data/idna"
|
5
|
+
require_relative "validation/bidi"
|
6
|
+
|
7
|
+
module URI
|
8
|
+
module IDNA
|
9
|
+
# U-label domain validation for IDNA and UTS46.
|
10
|
+
class Validation
|
11
|
+
# @param [Hash] params
|
12
|
+
# @option params [Boolean] :nfc Normalize to NFC (true by default)
|
13
|
+
# @option params [Boolean] :hyphen34 Hyphen restrictions (true by default)
|
14
|
+
# @option params [Boolean] :hyphen_sides Hyphen restrictions (true for the register protocol and UTS46)
|
15
|
+
# @option params [Boolean] :leading_combining Leading combining marks (true for the register protocol and UTS46)
|
16
|
+
# @option params [Boolean] :contextj Contextual rules CONTEXTJ (true by default)
|
17
|
+
# @option params [Boolean] :contexto Contextual rules CONTEXTO (true for IDNA2008 protocols)
|
18
|
+
# @option params [Boolean] :bidi Bidi rules (true by default)
|
19
|
+
# @option params [Boolean] :idna_validity IDNA2008 validity (true for IDNA2008 protocols)
|
20
|
+
# @option params [Boolean] :uts46 UTS46 validity (true for UTS46)
|
21
|
+
# @option params [Boolean] :uts46_transitional UTS46 transitional validity (false by default)
|
22
|
+
# @option params [Boolean] :check_dot Check for dots (true for UTS46)
|
23
|
+
#
|
24
|
+
def initialize(params)
|
25
|
+
@nfc = params.fetch(:nfc, true)
|
26
|
+
@hyphen34 = params.fetch(:hyphen34, true)
|
27
|
+
@hyphen_sides = params.fetch(:hyphen_sides, true)
|
28
|
+
|
29
|
+
# Contextual rules
|
30
|
+
@leading_combining = params.fetch(:leading_combining, true)
|
31
|
+
@contextj = params.fetch(:contextj, true)
|
32
|
+
@contexto = params.fetch(:contexto, true)
|
33
|
+
@bidi = params.fetch(:bidi, true)
|
34
|
+
# IDNA2008 specific
|
35
|
+
@idna_validity = params.fetch(:idna_validity, true)
|
36
|
+
|
37
|
+
# UTS46 specific
|
38
|
+
@uts46 = params.fetch(:uts46, false)
|
39
|
+
@uts46_transitional = params.fetch(:uts46_transitional, false)
|
40
|
+
@check_dot = params.fetch(:check_dot, false)
|
41
|
+
end
|
42
|
+
|
43
|
+
def call(label, decoded: false)
|
44
|
+
raise Error, "Empty label" if label.empty?
|
45
|
+
|
46
|
+
check_nfc(label) if @nfc
|
47
|
+
check_hyphen34(label) if @hyphen34
|
48
|
+
check_hyphen_sides(label) if @hyphen_sides
|
49
|
+
check_leading_combining(label) if @leading_combining
|
50
|
+
check_dot(label) if @check_dot
|
51
|
+
label.each_char.with_index do |cp, pos|
|
52
|
+
next if codepoint?(cp, "PVALID")
|
53
|
+
|
54
|
+
if @contextj && codepoint?(cp, "CONTEXTJ")
|
55
|
+
next if valid_contextj?(label, pos)
|
56
|
+
|
57
|
+
raise InvalidCodepointContextError, cp_error_message(cp, label, pos)
|
58
|
+
end
|
59
|
+
|
60
|
+
if @contexto && codepoint?(cp, "CONTEXTO")
|
61
|
+
next if valid_contexto?(label, pos)
|
62
|
+
|
63
|
+
raise InvalidCodepointContextError, cp_error_message(cp, label, pos)
|
64
|
+
end
|
65
|
+
|
66
|
+
# 4.2.2. Rejection of Characters That Are Not Permitted
|
67
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.2
|
68
|
+
raise InvalidCodepointError, cp_error_message(cp, label, pos) if @idna_validity
|
69
|
+
|
70
|
+
if @uts46 && !UTS46.valid?(cp, uts46_transitional: @uts46_transitional && !decoded)
|
71
|
+
raise InvalidCodepointError, cp_error_message(cp, label, pos)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
Bidi.call(label) if @bidi
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
# 4.1. Input to IDNA Registration
|
80
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.1
|
81
|
+
def check_nfc(label)
|
82
|
+
return true if label.unicode_normalized?(:nfc)
|
83
|
+
|
84
|
+
raise Error, "Label must be in Normalization Form C"
|
85
|
+
end
|
86
|
+
|
87
|
+
# 4.2.3.1. Hyphen Restrictions
|
88
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
|
89
|
+
def check_hyphen34(label)
|
90
|
+
return unless label[2..3] == "--"
|
91
|
+
|
92
|
+
raise Error, "Label has disallowed hyphens in 3rd and 4th position"
|
93
|
+
end
|
94
|
+
|
95
|
+
# 4.2.3.1. Hyphen Restrictions
|
96
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
|
97
|
+
def check_hyphen_sides(label)
|
98
|
+
return unless label[0] == "-" || label[-1] == "-"
|
99
|
+
|
100
|
+
raise Error, "Label must not start or end with a hyphen"
|
101
|
+
end
|
102
|
+
|
103
|
+
# 4.2.3.2. Leading Combining Marks
|
104
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
|
105
|
+
def check_leading_combining(label)
|
106
|
+
return unless Intranges.contain?(label[0].ord, INITIAL_COMBINERS)
|
107
|
+
|
108
|
+
raise Error, "Label begins with an illegal combining character"
|
109
|
+
end
|
110
|
+
|
111
|
+
def check_dot(label)
|
112
|
+
raise Error, "Label must not contain dots" if label.include?(".")
|
113
|
+
end
|
114
|
+
|
115
|
+
def valid_contexto?(label, pos)
|
116
|
+
cp_value = label[pos].ord
|
117
|
+
case cp_value
|
118
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
|
119
|
+
when 0x00b7
|
120
|
+
pos > 0 && pos < label.length - 1 ? (label[pos - 1].ord == 0x006c && label[pos + 1].ord == 0x006c) : false
|
121
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
|
122
|
+
when 0x0375
|
123
|
+
pos < label.length - 1 ? script?(label[pos + 1], "Greek") : false
|
124
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
|
125
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
|
126
|
+
when 0x05f3, 0x05f4
|
127
|
+
pos > 0 ? script?(label[pos - 1], "Hebrew") : false
|
128
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
|
129
|
+
when 0x30fb
|
130
|
+
label.each_char do |cp|
|
131
|
+
next if cp.ord == 0x30fb
|
132
|
+
return true if script?(cp, "Hiragana") || script?(cp, "Katakana") || script?(cp, "Han")
|
133
|
+
end
|
134
|
+
false
|
135
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
|
136
|
+
when 0x0660..0x0669
|
137
|
+
label.each_char do |cp|
|
138
|
+
return false if cp.ord >= 0x06f0 && cp.ord <= 0x06f9
|
139
|
+
end
|
140
|
+
true
|
141
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
|
142
|
+
when 0x06f0..0x06f9
|
143
|
+
label.each_char do |cp|
|
144
|
+
return false if cp.ord >= 0x0660 && cp.ord <= 0x0669
|
145
|
+
end
|
146
|
+
true
|
147
|
+
else
|
148
|
+
false
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def valid_contextj?(label, pos)
|
153
|
+
case label[pos].ord
|
154
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
|
155
|
+
when 0x200c
|
156
|
+
return true if pos > 0 && virama_combining_class?(label[pos - 1])
|
157
|
+
|
158
|
+
ok = false
|
159
|
+
(pos - 1).downto(0) do |i|
|
160
|
+
joining_type = JOINING_TYPES[label[i].ord]
|
161
|
+
next if joining_type == 0x54
|
162
|
+
|
163
|
+
if [0x4c, 0x44].include?(joining_type)
|
164
|
+
ok = true
|
165
|
+
break
|
166
|
+
end
|
167
|
+
end
|
168
|
+
return false unless ok
|
169
|
+
|
170
|
+
(pos + 1).upto(label.length - 1) do |i|
|
171
|
+
joining_type = JOINING_TYPES[label[i].ord]
|
172
|
+
next if joining_type == 0x54
|
173
|
+
return true if [0x52, 0x44].include?(joining_type)
|
174
|
+
end
|
175
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
|
176
|
+
when 0x200d
|
177
|
+
return virama_combining_class?(label[pos - 1]) if pos > 0
|
178
|
+
end
|
179
|
+
false
|
180
|
+
end
|
181
|
+
|
182
|
+
def codepoint?(cp, class_name)
|
183
|
+
Intranges.contain?(cp.ord, CODEPOINT_CLASSES[class_name])
|
184
|
+
end
|
185
|
+
|
186
|
+
def script?(cp, script)
|
187
|
+
Intranges.contain?(cp.ord, SCRIPTS[script])
|
188
|
+
end
|
189
|
+
|
190
|
+
def virama_combining_class?(cp)
|
191
|
+
Intranges.contain?(cp.ord, VIRAMA_COMBINING_CLASSES)
|
192
|
+
end
|
193
|
+
|
194
|
+
def cp_error_message(cp, label, pos)
|
195
|
+
format("Codepoint U+%04X at position %d of %p not allowed", cp.ord, pos + 1, label)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
data/lib/uri/idna.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "idna/version"
|
4
|
+
require_relative "idna/process"
|
5
|
+
|
6
|
+
module URI
|
7
|
+
module IDNA
|
8
|
+
ALABEL_PREFIX = "xn--"
|
9
|
+
|
10
|
+
class Error < StandardError; end
|
11
|
+
|
12
|
+
# Raised when bidirectional requirements are not satisfied
|
13
|
+
class BidiError < Error; end
|
14
|
+
|
15
|
+
# Raised when a disallowed or unallocated codepoint is used
|
16
|
+
class InvalidCodepointError < Error; end
|
17
|
+
|
18
|
+
# Raised when the codepoint is not valid in the context it is used
|
19
|
+
class InvalidCodepointContextError < Error; end
|
20
|
+
|
21
|
+
# Raised when an error occurs during a punycode operation
|
22
|
+
class PunycodeError < Error; end
|
23
|
+
|
24
|
+
class << self
|
25
|
+
UTS46_PARAMS = {
|
26
|
+
check_dot: true,
|
27
|
+
idna_validity: false,
|
28
|
+
uts46: true,
|
29
|
+
uts46_std3: true,
|
30
|
+
uts46_transitional: false,
|
31
|
+
contexto: false,
|
32
|
+
}.freeze
|
33
|
+
|
34
|
+
LOOKUP_PARAMS = {
|
35
|
+
hyphen_sides: false,
|
36
|
+
leading_combining: false,
|
37
|
+
}.freeze
|
38
|
+
|
39
|
+
def lookup(s, **params)
|
40
|
+
Process.new(**LOOKUP_PARAMS.merge(params)).lookup(s)
|
41
|
+
end
|
42
|
+
|
43
|
+
def register(alabel: nil, ulabel: nil, **params)
|
44
|
+
Process.new(**params).register(alabel: alabel, ulabel: ulabel)
|
45
|
+
end
|
46
|
+
|
47
|
+
# UTS46 ToUnicode process
|
48
|
+
# https://unicode.org/reports/tr46/#ToUnicode
|
49
|
+
def to_unicode(s, **params)
|
50
|
+
Process.new(**UTS46_PARAMS.merge(params)).decode(s)
|
51
|
+
end
|
52
|
+
|
53
|
+
# UTS46 ToASCII process
|
54
|
+
# https://unicode.org/reports/tr46/#ToASCII
|
55
|
+
def to_ascii(s, **params)
|
56
|
+
Process.new(**UTS46_PARAMS.merge(params)).encode(s)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
metadata
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: uri-idna
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Svyatoslav Kryukov
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-08-05 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Internationalized Domain Names in Applications (IDNA)
|
14
|
+
email:
|
15
|
+
- s.g.kryukov@yandex.ru
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- CHANGELOG.md
|
21
|
+
- LICENSE.txt
|
22
|
+
- README.md
|
23
|
+
- lib/uri/idna.rb
|
24
|
+
- lib/uri/idna/data/idna.rb
|
25
|
+
- lib/uri/idna/data/uts46.rb
|
26
|
+
- lib/uri/idna/intranges.rb
|
27
|
+
- lib/uri/idna/process.rb
|
28
|
+
- lib/uri/idna/punycode.rb
|
29
|
+
- lib/uri/idna/uts46.rb
|
30
|
+
- lib/uri/idna/validation.rb
|
31
|
+
- lib/uri/idna/validation/bidi.rb
|
32
|
+
- lib/uri/idna/version.rb
|
33
|
+
homepage: https://github.com/skryukov/uri-idna
|
34
|
+
licenses:
|
35
|
+
- MIT
|
36
|
+
metadata:
|
37
|
+
bug_tracker_uri: https://github.com/skryukov/uri-idna/issues
|
38
|
+
changelog_uri: https://github.com/skryukov/uri-idna/blob/main/CHANGELOG.md
|
39
|
+
documentation_uri: https://github.com/skryukov/uri-idna/blob/main/README.md
|
40
|
+
homepage_uri: https://github.com/skryukov/uri-idna
|
41
|
+
source_code_uri: https://github.com/skryukov/uri-idna
|
42
|
+
rubygems_mfa_required: 'true'
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options: []
|
45
|
+
require_paths:
|
46
|
+
- lib
|
47
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: 2.7.0
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
requirements: []
|
58
|
+
rubygems_version: 3.3.7
|
59
|
+
signing_key:
|
60
|
+
specification_version: 4
|
61
|
+
summary: 'Internationalized Domain Names for Ruby (IDNA 2008 and UTS #46)'
|
62
|
+
test_files: []
|