uri-idna 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +22 -0
- data/LICENSE.txt +21 -0
- data/README.md +184 -0
- data/lib/uri/idna/data/idna.rb +4692 -0
- data/lib/uri/idna/data/uts46.rb +8190 -0
- data/lib/uri/idna/intranges.rb +49 -0
- data/lib/uri/idna/process.rb +139 -0
- data/lib/uri/idna/punycode.rb +174 -0
- data/lib/uri/idna/uts46.rb +60 -0
- data/lib/uri/idna/validation/bidi.rb +93 -0
- data/lib/uri/idna/validation.rb +199 -0
- data/lib/uri/idna/version.rb +7 -0
- data/lib/uri/idna.rb +60 -0
- metadata +62 -0
@@ -0,0 +1,199 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "intranges"
|
4
|
+
require_relative "data/idna"
|
5
|
+
require_relative "validation/bidi"
|
6
|
+
|
7
|
+
module URI
|
8
|
+
module IDNA
|
9
|
+
# U-label domain validation for IDNA and UTS46.
|
10
|
+
class Validation
|
11
|
+
# @param [Hash] params
|
12
|
+
# @option params [Boolean] :nfc Normalize to NFC (true by default)
|
13
|
+
# @option params [Boolean] :hyphen34 Hyphen restrictions (true by default)
|
14
|
+
# @option params [Boolean] :hyphen_sides Hyphen restrictions (true for the register protocol and UTS46)
|
15
|
+
# @option params [Boolean] :leading_combining Leading combining marks (true for the register protocol and UTS46)
|
16
|
+
# @option params [Boolean] :contextj Contextual rules CONTEXTJ (true by default)
|
17
|
+
# @option params [Boolean] :contexto Contextual rules CONTEXTO (true for IDNA2008 protocols)
|
18
|
+
# @option params [Boolean] :bidi Bidi rules (true by default)
|
19
|
+
# @option params [Boolean] :idna_validity IDNA2008 validity (true for IDNA2008 protocols)
|
20
|
+
# @option params [Boolean] :uts46 UTS46 validity (true for UTS46)
|
21
|
+
# @option params [Boolean] :uts46_transitional UTS46 transitional validity (false by default)
|
22
|
+
# @option params [Boolean] :check_dot Check for dots (true for UTS46)
|
23
|
+
#
|
24
|
+
def initialize(params)
|
25
|
+
@nfc = params.fetch(:nfc, true)
|
26
|
+
@hyphen34 = params.fetch(:hyphen34, true)
|
27
|
+
@hyphen_sides = params.fetch(:hyphen_sides, true)
|
28
|
+
|
29
|
+
# Contextual rules
|
30
|
+
@leading_combining = params.fetch(:leading_combining, true)
|
31
|
+
@contextj = params.fetch(:contextj, true)
|
32
|
+
@contexto = params.fetch(:contexto, true)
|
33
|
+
@bidi = params.fetch(:bidi, true)
|
34
|
+
# IDNA2008 specific
|
35
|
+
@idna_validity = params.fetch(:idna_validity, true)
|
36
|
+
|
37
|
+
# UTS46 specific
|
38
|
+
@uts46 = params.fetch(:uts46, false)
|
39
|
+
@uts46_transitional = params.fetch(:uts46_transitional, false)
|
40
|
+
@check_dot = params.fetch(:check_dot, false)
|
41
|
+
end
|
42
|
+
|
43
|
+
def call(label, decoded: false)
|
44
|
+
raise Error, "Empty label" if label.empty?
|
45
|
+
|
46
|
+
check_nfc(label) if @nfc
|
47
|
+
check_hyphen34(label) if @hyphen34
|
48
|
+
check_hyphen_sides(label) if @hyphen_sides
|
49
|
+
check_leading_combining(label) if @leading_combining
|
50
|
+
check_dot(label) if @check_dot
|
51
|
+
label.each_char.with_index do |cp, pos|
|
52
|
+
next if codepoint?(cp, "PVALID")
|
53
|
+
|
54
|
+
if @contextj && codepoint?(cp, "CONTEXTJ")
|
55
|
+
next if valid_contextj?(label, pos)
|
56
|
+
|
57
|
+
raise InvalidCodepointContextError, cp_error_message(cp, label, pos)
|
58
|
+
end
|
59
|
+
|
60
|
+
if @contexto && codepoint?(cp, "CONTEXTO")
|
61
|
+
next if valid_contexto?(label, pos)
|
62
|
+
|
63
|
+
raise InvalidCodepointContextError, cp_error_message(cp, label, pos)
|
64
|
+
end
|
65
|
+
|
66
|
+
# 4.2.2. Rejection of Characters That Are Not Permitted
|
67
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.2
|
68
|
+
raise InvalidCodepointError, cp_error_message(cp, label, pos) if @idna_validity
|
69
|
+
|
70
|
+
if @uts46 && !UTS46.valid?(cp, uts46_transitional: @uts46_transitional && !decoded)
|
71
|
+
raise InvalidCodepointError, cp_error_message(cp, label, pos)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
Bidi.call(label) if @bidi
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
|
79
|
+
# 4.1. Input to IDNA Registration
|
80
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.1
|
81
|
+
def check_nfc(label)
|
82
|
+
return true if label.unicode_normalized?(:nfc)
|
83
|
+
|
84
|
+
raise Error, "Label must be in Normalization Form C"
|
85
|
+
end
|
86
|
+
|
87
|
+
# 4.2.3.1. Hyphen Restrictions
|
88
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
|
89
|
+
def check_hyphen34(label)
|
90
|
+
return unless label[2..3] == "--"
|
91
|
+
|
92
|
+
raise Error, "Label has disallowed hyphens in 3rd and 4th position"
|
93
|
+
end
|
94
|
+
|
95
|
+
# 4.2.3.1. Hyphen Restrictions
|
96
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
|
97
|
+
def check_hyphen_sides(label)
|
98
|
+
return unless label[0] == "-" || label[-1] == "-"
|
99
|
+
|
100
|
+
raise Error, "Label must not start or end with a hyphen"
|
101
|
+
end
|
102
|
+
|
103
|
+
# 4.2.3.2. Leading Combining Marks
|
104
|
+
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
|
105
|
+
def check_leading_combining(label)
|
106
|
+
return unless Intranges.contain?(label[0].ord, INITIAL_COMBINERS)
|
107
|
+
|
108
|
+
raise Error, "Label begins with an illegal combining character"
|
109
|
+
end
|
110
|
+
|
111
|
+
def check_dot(label)
|
112
|
+
raise Error, "Label must not contain dots" if label.include?(".")
|
113
|
+
end
|
114
|
+
|
115
|
+
def valid_contexto?(label, pos)
|
116
|
+
cp_value = label[pos].ord
|
117
|
+
case cp_value
|
118
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
|
119
|
+
when 0x00b7
|
120
|
+
pos > 0 && pos < label.length - 1 ? (label[pos - 1].ord == 0x006c && label[pos + 1].ord == 0x006c) : false
|
121
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
|
122
|
+
when 0x0375
|
123
|
+
pos < label.length - 1 ? script?(label[pos + 1], "Greek") : false
|
124
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
|
125
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
|
126
|
+
when 0x05f3, 0x05f4
|
127
|
+
pos > 0 ? script?(label[pos - 1], "Hebrew") : false
|
128
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
|
129
|
+
when 0x30fb
|
130
|
+
label.each_char do |cp|
|
131
|
+
next if cp.ord == 0x30fb
|
132
|
+
return true if script?(cp, "Hiragana") || script?(cp, "Katakana") || script?(cp, "Han")
|
133
|
+
end
|
134
|
+
false
|
135
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
|
136
|
+
when 0x0660..0x0669
|
137
|
+
label.each_char do |cp|
|
138
|
+
return false if cp.ord >= 0x06f0 && cp.ord <= 0x06f9
|
139
|
+
end
|
140
|
+
true
|
141
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
|
142
|
+
when 0x06f0..0x06f9
|
143
|
+
label.each_char do |cp|
|
144
|
+
return false if cp.ord >= 0x0660 && cp.ord <= 0x0669
|
145
|
+
end
|
146
|
+
true
|
147
|
+
else
|
148
|
+
false
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def valid_contextj?(label, pos)
|
153
|
+
case label[pos].ord
|
154
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
|
155
|
+
when 0x200c
|
156
|
+
return true if pos > 0 && virama_combining_class?(label[pos - 1])
|
157
|
+
|
158
|
+
ok = false
|
159
|
+
(pos - 1).downto(0) do |i|
|
160
|
+
joining_type = JOINING_TYPES[label[i].ord]
|
161
|
+
next if joining_type == 0x54
|
162
|
+
|
163
|
+
if [0x4c, 0x44].include?(joining_type)
|
164
|
+
ok = true
|
165
|
+
break
|
166
|
+
end
|
167
|
+
end
|
168
|
+
return false unless ok
|
169
|
+
|
170
|
+
(pos + 1).upto(label.length - 1) do |i|
|
171
|
+
joining_type = JOINING_TYPES[label[i].ord]
|
172
|
+
next if joining_type == 0x54
|
173
|
+
return true if [0x52, 0x44].include?(joining_type)
|
174
|
+
end
|
175
|
+
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
|
176
|
+
when 0x200d
|
177
|
+
return virama_combining_class?(label[pos - 1]) if pos > 0
|
178
|
+
end
|
179
|
+
false
|
180
|
+
end
|
181
|
+
|
182
|
+
def codepoint?(cp, class_name)
|
183
|
+
Intranges.contain?(cp.ord, CODEPOINT_CLASSES[class_name])
|
184
|
+
end
|
185
|
+
|
186
|
+
def script?(cp, script)
|
187
|
+
Intranges.contain?(cp.ord, SCRIPTS[script])
|
188
|
+
end
|
189
|
+
|
190
|
+
def virama_combining_class?(cp)
|
191
|
+
Intranges.contain?(cp.ord, VIRAMA_COMBINING_CLASSES)
|
192
|
+
end
|
193
|
+
|
194
|
+
def cp_error_message(cp, label, pos)
|
195
|
+
format("Codepoint U+%04X at position %d of %p not allowed", cp.ord, pos + 1, label)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
data/lib/uri/idna.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "idna/version"
|
4
|
+
require_relative "idna/process"
|
5
|
+
|
6
|
+
module URI
|
7
|
+
module IDNA
|
8
|
+
ALABEL_PREFIX = "xn--"
|
9
|
+
|
10
|
+
class Error < StandardError; end
|
11
|
+
|
12
|
+
# Raised when bidirectional requirements are not satisfied
|
13
|
+
class BidiError < Error; end
|
14
|
+
|
15
|
+
# Raised when a disallowed or unallocated codepoint is used
|
16
|
+
class InvalidCodepointError < Error; end
|
17
|
+
|
18
|
+
# Raised when the codepoint is not valid in the context it is used
|
19
|
+
class InvalidCodepointContextError < Error; end
|
20
|
+
|
21
|
+
# Raised when an error occurs during a punycode operation
|
22
|
+
class PunycodeError < Error; end
|
23
|
+
|
24
|
+
class << self
|
25
|
+
UTS46_PARAMS = {
|
26
|
+
check_dot: true,
|
27
|
+
idna_validity: false,
|
28
|
+
uts46: true,
|
29
|
+
uts46_std3: true,
|
30
|
+
uts46_transitional: false,
|
31
|
+
contexto: false,
|
32
|
+
}.freeze
|
33
|
+
|
34
|
+
LOOKUP_PARAMS = {
|
35
|
+
hyphen_sides: false,
|
36
|
+
leading_combining: false,
|
37
|
+
}.freeze
|
38
|
+
|
39
|
+
def lookup(s, **params)
|
40
|
+
Process.new(**LOOKUP_PARAMS.merge(params)).lookup(s)
|
41
|
+
end
|
42
|
+
|
43
|
+
def register(alabel: nil, ulabel: nil, **params)
|
44
|
+
Process.new(**params).register(alabel: alabel, ulabel: ulabel)
|
45
|
+
end
|
46
|
+
|
47
|
+
# UTS46 ToUnicode process
|
48
|
+
# https://unicode.org/reports/tr46/#ToUnicode
|
49
|
+
def to_unicode(s, **params)
|
50
|
+
Process.new(**UTS46_PARAMS.merge(params)).decode(s)
|
51
|
+
end
|
52
|
+
|
53
|
+
# UTS46 ToASCII process
|
54
|
+
# https://unicode.org/reports/tr46/#ToASCII
|
55
|
+
def to_ascii(s, **params)
|
56
|
+
Process.new(**UTS46_PARAMS.merge(params)).encode(s)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
metadata
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: uri-idna
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Svyatoslav Kryukov
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-08-05 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Internationalized Domain Names in Applications (IDNA)
|
14
|
+
email:
|
15
|
+
- s.g.kryukov@yandex.ru
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- CHANGELOG.md
|
21
|
+
- LICENSE.txt
|
22
|
+
- README.md
|
23
|
+
- lib/uri/idna.rb
|
24
|
+
- lib/uri/idna/data/idna.rb
|
25
|
+
- lib/uri/idna/data/uts46.rb
|
26
|
+
- lib/uri/idna/intranges.rb
|
27
|
+
- lib/uri/idna/process.rb
|
28
|
+
- lib/uri/idna/punycode.rb
|
29
|
+
- lib/uri/idna/uts46.rb
|
30
|
+
- lib/uri/idna/validation.rb
|
31
|
+
- lib/uri/idna/validation/bidi.rb
|
32
|
+
- lib/uri/idna/version.rb
|
33
|
+
homepage: https://github.com/skryukov/uri-idna
|
34
|
+
licenses:
|
35
|
+
- MIT
|
36
|
+
metadata:
|
37
|
+
bug_tracker_uri: https://github.com/skryukov/uri-idna/issues
|
38
|
+
changelog_uri: https://github.com/skryukov/uri-idna/blob/main/CHANGELOG.md
|
39
|
+
documentation_uri: https://github.com/skryukov/uri-idna/blob/main/README.md
|
40
|
+
homepage_uri: https://github.com/skryukov/uri-idna
|
41
|
+
source_code_uri: https://github.com/skryukov/uri-idna
|
42
|
+
rubygems_mfa_required: 'true'
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options: []
|
45
|
+
require_paths:
|
46
|
+
- lib
|
47
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: 2.7.0
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
requirements: []
|
58
|
+
rubygems_version: 3.3.7
|
59
|
+
signing_key:
|
60
|
+
specification_version: 4
|
61
|
+
summary: 'Internationalized Domain Names for Ruby (IDNA 2008 and UTS #46)'
|
62
|
+
test_files: []
|