uri-idna 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "intranges"
4
+ require_relative "data/idna"
5
+ require_relative "validation/bidi"
6
+
7
+ module URI
8
+ module IDNA
9
+ # U-label domain validation for IDNA and UTS46.
10
+ class Validation
11
+ # @param [Hash] params
12
+ # @option params [Boolean] :nfc Normalize to NFC (true by default)
13
+ # @option params [Boolean] :hyphen34 Hyphen restrictions (true by default)
14
+ # @option params [Boolean] :hyphen_sides Hyphen restrictions (true for the register protocol and UTS46)
15
+ # @option params [Boolean] :leading_combining Leading combining marks (true for the register protocol and UTS46)
16
+ # @option params [Boolean] :contextj Contextual rules CONTEXTJ (true by default)
17
+ # @option params [Boolean] :contexto Contextual rules CONTEXTO (true for IDNA2008 protocols)
18
+ # @option params [Boolean] :bidi Bidi rules (true by default)
19
+ # @option params [Boolean] :idna_validity IDNA2008 validity (true for IDNA2008 protocols)
20
+ # @option params [Boolean] :uts46 UTS46 validity (true for UTS46)
21
+ # @option params [Boolean] :uts46_transitional UTS46 transitional validity (false by default)
22
+ # @option params [Boolean] :check_dot Check for dots (true for UTS46)
23
+ #
24
+ def initialize(params)
25
+ @nfc = params.fetch(:nfc, true)
26
+ @hyphen34 = params.fetch(:hyphen34, true)
27
+ @hyphen_sides = params.fetch(:hyphen_sides, true)
28
+
29
+ # Contextual rules
30
+ @leading_combining = params.fetch(:leading_combining, true)
31
+ @contextj = params.fetch(:contextj, true)
32
+ @contexto = params.fetch(:contexto, true)
33
+ @bidi = params.fetch(:bidi, true)
34
+ # IDNA2008 specific
35
+ @idna_validity = params.fetch(:idna_validity, true)
36
+
37
+ # UTS46 specific
38
+ @uts46 = params.fetch(:uts46, false)
39
+ @uts46_transitional = params.fetch(:uts46_transitional, false)
40
+ @check_dot = params.fetch(:check_dot, false)
41
+ end
42
+
43
+ def call(label, decoded: false)
44
+ raise Error, "Empty label" if label.empty?
45
+
46
+ check_nfc(label) if @nfc
47
+ check_hyphen34(label) if @hyphen34
48
+ check_hyphen_sides(label) if @hyphen_sides
49
+ check_leading_combining(label) if @leading_combining
50
+ check_dot(label) if @check_dot
51
+ label.each_char.with_index do |cp, pos|
52
+ next if codepoint?(cp, "PVALID")
53
+
54
+ if @contextj && codepoint?(cp, "CONTEXTJ")
55
+ next if valid_contextj?(label, pos)
56
+
57
+ raise InvalidCodepointContextError, cp_error_message(cp, label, pos)
58
+ end
59
+
60
+ if @contexto && codepoint?(cp, "CONTEXTO")
61
+ next if valid_contexto?(label, pos)
62
+
63
+ raise InvalidCodepointContextError, cp_error_message(cp, label, pos)
64
+ end
65
+
66
+ # 4.2.2. Rejection of Characters That Are Not Permitted
67
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.2
68
+ raise InvalidCodepointError, cp_error_message(cp, label, pos) if @idna_validity
69
+
70
+ if @uts46 && !UTS46.valid?(cp, uts46_transitional: @uts46_transitional && !decoded)
71
+ raise InvalidCodepointError, cp_error_message(cp, label, pos)
72
+ end
73
+ end
74
+ Bidi.call(label) if @bidi
75
+ end
76
+
77
+ private
78
+
79
+ # 4.1. Input to IDNA Registration
80
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.1
81
+ def check_nfc(label)
82
+ return true if label.unicode_normalized?(:nfc)
83
+
84
+ raise Error, "Label must be in Normalization Form C"
85
+ end
86
+
87
+ # 4.2.3.1. Hyphen Restrictions
88
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
89
+ def check_hyphen34(label)
90
+ return unless label[2..3] == "--"
91
+
92
+ raise Error, "Label has disallowed hyphens in 3rd and 4th position"
93
+ end
94
+
95
+ # 4.2.3.1. Hyphen Restrictions
96
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
97
+ def check_hyphen_sides(label)
98
+ return unless label[0] == "-" || label[-1] == "-"
99
+
100
+ raise Error, "Label must not start or end with a hyphen"
101
+ end
102
+
103
+ # 4.2.3.2. Leading Combining Marks
104
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
105
+ def check_leading_combining(label)
106
+ return unless Intranges.contain?(label[0].ord, INITIAL_COMBINERS)
107
+
108
+ raise Error, "Label begins with an illegal combining character"
109
+ end
110
+
111
+ def check_dot(label)
112
+ raise Error, "Label must not contain dots" if label.include?(".")
113
+ end
114
+
115
+ def valid_contexto?(label, pos)
116
+ cp_value = label[pos].ord
117
+ case cp_value
118
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
119
+ when 0x00b7
120
+ pos > 0 && pos < label.length - 1 ? (label[pos - 1].ord == 0x006c && label[pos + 1].ord == 0x006c) : false
121
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
122
+ when 0x0375
123
+ pos < label.length - 1 ? script?(label[pos + 1], "Greek") : false
124
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
125
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
126
+ when 0x05f3, 0x05f4
127
+ pos > 0 ? script?(label[pos - 1], "Hebrew") : false
128
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
129
+ when 0x30fb
130
+ label.each_char do |cp|
131
+ next if cp.ord == 0x30fb
132
+ return true if script?(cp, "Hiragana") || script?(cp, "Katakana") || script?(cp, "Han")
133
+ end
134
+ false
135
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
136
+ when 0x0660..0x0669
137
+ label.each_char do |cp|
138
+ return false if cp.ord >= 0x06f0 && cp.ord <= 0x06f9
139
+ end
140
+ true
141
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
142
+ when 0x06f0..0x06f9
143
+ label.each_char do |cp|
144
+ return false if cp.ord >= 0x0660 && cp.ord <= 0x0669
145
+ end
146
+ true
147
+ else
148
+ false
149
+ end
150
+ end
151
+
152
+ def valid_contextj?(label, pos)
153
+ case label[pos].ord
154
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
155
+ when 0x200c
156
+ return true if pos > 0 && virama_combining_class?(label[pos - 1])
157
+
158
+ ok = false
159
+ (pos - 1).downto(0) do |i|
160
+ joining_type = JOINING_TYPES[label[i].ord]
161
+ next if joining_type == 0x54
162
+
163
+ if [0x4c, 0x44].include?(joining_type)
164
+ ok = true
165
+ break
166
+ end
167
+ end
168
+ return false unless ok
169
+
170
+ (pos + 1).upto(label.length - 1) do |i|
171
+ joining_type = JOINING_TYPES[label[i].ord]
172
+ next if joining_type == 0x54
173
+ return true if [0x52, 0x44].include?(joining_type)
174
+ end
175
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
176
+ when 0x200d
177
+ return virama_combining_class?(label[pos - 1]) if pos > 0
178
+ end
179
+ false
180
+ end
181
+
182
+ def codepoint?(cp, class_name)
183
+ Intranges.contain?(cp.ord, CODEPOINT_CLASSES[class_name])
184
+ end
185
+
186
+ def script?(cp, script)
187
+ Intranges.contain?(cp.ord, SCRIPTS[script])
188
+ end
189
+
190
+ def virama_combining_class?(cp)
191
+ Intranges.contain?(cp.ord, VIRAMA_COMBINING_CLASSES)
192
+ end
193
+
194
+ def cp_error_message(cp, label, pos)
195
+ format("Codepoint U+%04X at position %d of %p not allowed", cp.ord, pos + 1, label)
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module URI
4
+ module IDNA
5
+ VERSION = "0.1.0"
6
+ end
7
+ end
data/lib/uri/idna.rb ADDED
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "idna/version"
4
+ require_relative "idna/process"
5
+
6
+ module URI
7
+ module IDNA
8
+ ALABEL_PREFIX = "xn--"
9
+
10
+ class Error < StandardError; end
11
+
12
+ # Raised when bidirectional requirements are not satisfied
13
+ class BidiError < Error; end
14
+
15
+ # Raised when a disallowed or unallocated codepoint is used
16
+ class InvalidCodepointError < Error; end
17
+
18
+ # Raised when the codepoint is not valid in the context it is used
19
+ class InvalidCodepointContextError < Error; end
20
+
21
+ # Raised when an error occurs during a punycode operation
22
+ class PunycodeError < Error; end
23
+
24
+ class << self
25
+ UTS46_PARAMS = {
26
+ check_dot: true,
27
+ idna_validity: false,
28
+ uts46: true,
29
+ uts46_std3: true,
30
+ uts46_transitional: false,
31
+ contexto: false,
32
+ }.freeze
33
+
34
+ LOOKUP_PARAMS = {
35
+ hyphen_sides: false,
36
+ leading_combining: false,
37
+ }.freeze
38
+
39
+ def lookup(s, **params)
40
+ Process.new(**LOOKUP_PARAMS.merge(params)).lookup(s)
41
+ end
42
+
43
+ def register(alabel: nil, ulabel: nil, **params)
44
+ Process.new(**params).register(alabel: alabel, ulabel: ulabel)
45
+ end
46
+
47
+ # UTS46 ToUnicode process
48
+ # https://unicode.org/reports/tr46/#ToUnicode
49
+ def to_unicode(s, **params)
50
+ Process.new(**UTS46_PARAMS.merge(params)).decode(s)
51
+ end
52
+
53
+ # UTS46 ToASCII process
54
+ # https://unicode.org/reports/tr46/#ToASCII
55
+ def to_ascii(s, **params)
56
+ Process.new(**UTS46_PARAMS.merge(params)).encode(s)
57
+ end
58
+ end
59
+ end
60
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: uri-idna
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Svyatoslav Kryukov
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-08-05 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Internationalized Domain Names in Applications (IDNA)
14
+ email:
15
+ - s.g.kryukov@yandex.ru
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - CHANGELOG.md
21
+ - LICENSE.txt
22
+ - README.md
23
+ - lib/uri/idna.rb
24
+ - lib/uri/idna/data/idna.rb
25
+ - lib/uri/idna/data/uts46.rb
26
+ - lib/uri/idna/intranges.rb
27
+ - lib/uri/idna/process.rb
28
+ - lib/uri/idna/punycode.rb
29
+ - lib/uri/idna/uts46.rb
30
+ - lib/uri/idna/validation.rb
31
+ - lib/uri/idna/validation/bidi.rb
32
+ - lib/uri/idna/version.rb
33
+ homepage: https://github.com/skryukov/uri-idna
34
+ licenses:
35
+ - MIT
36
+ metadata:
37
+ bug_tracker_uri: https://github.com/skryukov/uri-idna/issues
38
+ changelog_uri: https://github.com/skryukov/uri-idna/blob/main/CHANGELOG.md
39
+ documentation_uri: https://github.com/skryukov/uri-idna/blob/main/README.md
40
+ homepage_uri: https://github.com/skryukov/uri-idna
41
+ source_code_uri: https://github.com/skryukov/uri-idna
42
+ rubygems_mfa_required: 'true'
43
+ post_install_message:
44
+ rdoc_options: []
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: 2.7.0
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ requirements: []
58
+ rubygems_version: 3.3.7
59
+ signing_key:
60
+ specification_version: 4
61
+ summary: 'Internationalized Domain Names for Ruby (IDNA 2008 and UTS #46)'
62
+ test_files: []