uri-idna 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "intranges"
4
+ require_relative "data/idna"
5
+ require_relative "validation/bidi"
6
+
7
+ module URI
8
+ module IDNA
9
+ # U-label domain validation for IDNA and UTS46.
10
+ class Validation
11
+ # @param [Hash] params
12
+ # @option params [Boolean] :nfc Normalize to NFC (true by default)
13
+ # @option params [Boolean] :hyphen34 Hyphen restrictions (true by default)
14
+ # @option params [Boolean] :hyphen_sides Hyphen restrictions (true for the register protocol and UTS46)
15
+ # @option params [Boolean] :leading_combining Leading combining marks (true for the register protocol and UTS46)
16
+ # @option params [Boolean] :contextj Contextual rules CONTEXTJ (true by default)
17
+ # @option params [Boolean] :contexto Contextual rules CONTEXTO (true for IDNA2008 protocols)
18
+ # @option params [Boolean] :bidi Bidi rules (true by default)
19
+ # @option params [Boolean] :idna_validity IDNA2008 validity (true for IDNA2008 protocols)
20
+ # @option params [Boolean] :uts46 UTS46 validity (true for UTS46)
21
+ # @option params [Boolean] :uts46_transitional UTS46 transitional validity (false by default)
22
+ # @option params [Boolean] :check_dot Check for dots (true for UTS46)
23
+ #
24
+ def initialize(params)
25
+ @nfc = params.fetch(:nfc, true)
26
+ @hyphen34 = params.fetch(:hyphen34, true)
27
+ @hyphen_sides = params.fetch(:hyphen_sides, true)
28
+
29
+ # Contextual rules
30
+ @leading_combining = params.fetch(:leading_combining, true)
31
+ @contextj = params.fetch(:contextj, true)
32
+ @contexto = params.fetch(:contexto, true)
33
+ @bidi = params.fetch(:bidi, true)
34
+ # IDNA2008 specific
35
+ @idna_validity = params.fetch(:idna_validity, true)
36
+
37
+ # UTS46 specific
38
+ @uts46 = params.fetch(:uts46, false)
39
+ @uts46_transitional = params.fetch(:uts46_transitional, false)
40
+ @check_dot = params.fetch(:check_dot, false)
41
+ end
42
+
43
+ def call(label, decoded: false)
44
+ raise Error, "Empty label" if label.empty?
45
+
46
+ check_nfc(label) if @nfc
47
+ check_hyphen34(label) if @hyphen34
48
+ check_hyphen_sides(label) if @hyphen_sides
49
+ check_leading_combining(label) if @leading_combining
50
+ check_dot(label) if @check_dot
51
+ label.each_char.with_index do |cp, pos|
52
+ next if codepoint?(cp, "PVALID")
53
+
54
+ if @contextj && codepoint?(cp, "CONTEXTJ")
55
+ next if valid_contextj?(label, pos)
56
+
57
+ raise InvalidCodepointContextError, cp_error_message(cp, label, pos)
58
+ end
59
+
60
+ if @contexto && codepoint?(cp, "CONTEXTO")
61
+ next if valid_contexto?(label, pos)
62
+
63
+ raise InvalidCodepointContextError, cp_error_message(cp, label, pos)
64
+ end
65
+
66
+ # 4.2.2. Rejection of Characters That Are Not Permitted
67
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.2
68
+ raise InvalidCodepointError, cp_error_message(cp, label, pos) if @idna_validity
69
+
70
+ if @uts46 && !UTS46.valid?(cp, uts46_transitional: @uts46_transitional && !decoded)
71
+ raise InvalidCodepointError, cp_error_message(cp, label, pos)
72
+ end
73
+ end
74
+ Bidi.call(label) if @bidi
75
+ end
76
+
77
+ private
78
+
79
+ # 4.1. Input to IDNA Registration
80
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.1
81
+ def check_nfc(label)
82
+ return true if label.unicode_normalized?(:nfc)
83
+
84
+ raise Error, "Label must be in Normalization Form C"
85
+ end
86
+
87
+ # 4.2.3.1. Hyphen Restrictions
88
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
89
+ def check_hyphen34(label)
90
+ return unless label[2..3] == "--"
91
+
92
+ raise Error, "Label has disallowed hyphens in 3rd and 4th position"
93
+ end
94
+
95
+ # 4.2.3.1. Hyphen Restrictions
96
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
97
+ def check_hyphen_sides(label)
98
+ return unless label[0] == "-" || label[-1] == "-"
99
+
100
+ raise Error, "Label must not start or end with a hyphen"
101
+ end
102
+
103
+ # 4.2.3.2. Leading Combining Marks
104
+ # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
105
+ def check_leading_combining(label)
106
+ return unless Intranges.contain?(label[0].ord, INITIAL_COMBINERS)
107
+
108
+ raise Error, "Label begins with an illegal combining character"
109
+ end
110
+
111
+ def check_dot(label)
112
+ raise Error, "Label must not contain dots" if label.include?(".")
113
+ end
114
+
115
+ def valid_contexto?(label, pos)
116
+ cp_value = label[pos].ord
117
+ case cp_value
118
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
119
+ when 0x00b7
120
+ pos > 0 && pos < label.length - 1 ? (label[pos - 1].ord == 0x006c && label[pos + 1].ord == 0x006c) : false
121
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
122
+ when 0x0375
123
+ pos < label.length - 1 ? script?(label[pos + 1], "Greek") : false
124
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
125
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
126
+ when 0x05f3, 0x05f4
127
+ pos > 0 ? script?(label[pos - 1], "Hebrew") : false
128
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
129
+ when 0x30fb
130
+ label.each_char do |cp|
131
+ next if cp.ord == 0x30fb
132
+ return true if script?(cp, "Hiragana") || script?(cp, "Katakana") || script?(cp, "Han")
133
+ end
134
+ false
135
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
136
+ when 0x0660..0x0669
137
+ label.each_char do |cp|
138
+ return false if cp.ord >= 0x06f0 && cp.ord <= 0x06f9
139
+ end
140
+ true
141
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
142
+ when 0x06f0..0x06f9
143
+ label.each_char do |cp|
144
+ return false if cp.ord >= 0x0660 && cp.ord <= 0x0669
145
+ end
146
+ true
147
+ else
148
+ false
149
+ end
150
+ end
151
+
152
+ def valid_contextj?(label, pos)
153
+ case label[pos].ord
154
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
155
+ when 0x200c
156
+ return true if pos > 0 && virama_combining_class?(label[pos - 1])
157
+
158
+ ok = false
159
+ (pos - 1).downto(0) do |i|
160
+ joining_type = JOINING_TYPES[label[i].ord]
161
+ next if joining_type == 0x54
162
+
163
+ if [0x4c, 0x44].include?(joining_type)
164
+ ok = true
165
+ break
166
+ end
167
+ end
168
+ return false unless ok
169
+
170
+ (pos + 1).upto(label.length - 1) do |i|
171
+ joining_type = JOINING_TYPES[label[i].ord]
172
+ next if joining_type == 0x54
173
+ return true if [0x52, 0x44].include?(joining_type)
174
+ end
175
+ # https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
176
+ when 0x200d
177
+ return virama_combining_class?(label[pos - 1]) if pos > 0
178
+ end
179
+ false
180
+ end
181
+
182
+ def codepoint?(cp, class_name)
183
+ Intranges.contain?(cp.ord, CODEPOINT_CLASSES[class_name])
184
+ end
185
+
186
+ def script?(cp, script)
187
+ Intranges.contain?(cp.ord, SCRIPTS[script])
188
+ end
189
+
190
+ def virama_combining_class?(cp)
191
+ Intranges.contain?(cp.ord, VIRAMA_COMBINING_CLASSES)
192
+ end
193
+
194
+ def cp_error_message(cp, label, pos)
195
+ format("Codepoint U+%04X at position %d of %p not allowed", cp.ord, pos + 1, label)
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module URI
4
+ module IDNA
5
+ VERSION = "0.1.0"
6
+ end
7
+ end
data/lib/uri/idna.rb ADDED
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "idna/version"
4
+ require_relative "idna/process"
5
+
6
+ module URI
7
+ module IDNA
8
+ ALABEL_PREFIX = "xn--"
9
+
10
+ class Error < StandardError; end
11
+
12
+ # Raised when bidirectional requirements are not satisfied
13
+ class BidiError < Error; end
14
+
15
+ # Raised when a disallowed or unallocated codepoint is used
16
+ class InvalidCodepointError < Error; end
17
+
18
+ # Raised when the codepoint is not valid in the context it is used
19
+ class InvalidCodepointContextError < Error; end
20
+
21
+ # Raised when an error occurs during a punycode operation
22
+ class PunycodeError < Error; end
23
+
24
+ class << self
25
+ UTS46_PARAMS = {
26
+ check_dot: true,
27
+ idna_validity: false,
28
+ uts46: true,
29
+ uts46_std3: true,
30
+ uts46_transitional: false,
31
+ contexto: false,
32
+ }.freeze
33
+
34
+ LOOKUP_PARAMS = {
35
+ hyphen_sides: false,
36
+ leading_combining: false,
37
+ }.freeze
38
+
39
+ def lookup(s, **params)
40
+ Process.new(**LOOKUP_PARAMS.merge(params)).lookup(s)
41
+ end
42
+
43
+ def register(alabel: nil, ulabel: nil, **params)
44
+ Process.new(**params).register(alabel: alabel, ulabel: ulabel)
45
+ end
46
+
47
+ # UTS46 ToUnicode process
48
+ # https://unicode.org/reports/tr46/#ToUnicode
49
+ def to_unicode(s, **params)
50
+ Process.new(**UTS46_PARAMS.merge(params)).decode(s)
51
+ end
52
+
53
+ # UTS46 ToASCII process
54
+ # https://unicode.org/reports/tr46/#ToASCII
55
+ def to_ascii(s, **params)
56
+ Process.new(**UTS46_PARAMS.merge(params)).encode(s)
57
+ end
58
+ end
59
+ end
60
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: uri-idna
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Svyatoslav Kryukov
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-08-05 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Internationalized Domain Names in Applications (IDNA)
14
+ email:
15
+ - s.g.kryukov@yandex.ru
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - CHANGELOG.md
21
+ - LICENSE.txt
22
+ - README.md
23
+ - lib/uri/idna.rb
24
+ - lib/uri/idna/data/idna.rb
25
+ - lib/uri/idna/data/uts46.rb
26
+ - lib/uri/idna/intranges.rb
27
+ - lib/uri/idna/process.rb
28
+ - lib/uri/idna/punycode.rb
29
+ - lib/uri/idna/uts46.rb
30
+ - lib/uri/idna/validation.rb
31
+ - lib/uri/idna/validation/bidi.rb
32
+ - lib/uri/idna/version.rb
33
+ homepage: https://github.com/skryukov/uri-idna
34
+ licenses:
35
+ - MIT
36
+ metadata:
37
+ bug_tracker_uri: https://github.com/skryukov/uri-idna/issues
38
+ changelog_uri: https://github.com/skryukov/uri-idna/blob/main/CHANGELOG.md
39
+ documentation_uri: https://github.com/skryukov/uri-idna/blob/main/README.md
40
+ homepage_uri: https://github.com/skryukov/uri-idna
41
+ source_code_uri: https://github.com/skryukov/uri-idna
42
+ rubygems_mfa_required: 'true'
43
+ post_install_message:
44
+ rdoc_options: []
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: 2.7.0
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ requirements: []
58
+ rubygems_version: 3.3.7
59
+ signing_key:
60
+ specification_version: 4
61
+ summary: 'Internationalized Domain Names for Ruby (IDNA 2008 and UTS #46)'
62
+ test_files: []