uri-idna 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -5
- data/README.md +1 -1
- data/lib/uri/idna/base_processing.rb +6 -0
- data/lib/uri/idna/data/uts46.rb +5989 -5989
- data/lib/uri/idna/idna2008/processing.rb +3 -9
- data/lib/uri/idna/uts46/mapping.rb +18 -17
- data/lib/uri/idna/uts46/processing.rb +8 -11
- data/lib/uri/idna/validation/bidi.rb +9 -9
- data/lib/uri/idna/validation/codepoint.rb +6 -0
- data/lib/uri/idna/validation/label.rb +4 -2
- data/lib/uri/idna/version.rb +1 -1
- metadata +2 -2
@@ -45,12 +45,6 @@ module URI
|
|
45
45
|
Validation::Bidi.call(label) if check_bidi?
|
46
46
|
end
|
47
47
|
|
48
|
-
def check_bidi?
|
49
|
-
return @check_bidi if instance_variable_defined?(:@check_bidi)
|
50
|
-
|
51
|
-
@check_bidi = options.check_bidi? && Validation::Bidi.check?(domain_name)
|
52
|
-
end
|
53
|
-
|
54
48
|
def punycode_decode(label)
|
55
49
|
return label unless label.start_with?(ACE_PREFIX)
|
56
50
|
|
@@ -72,8 +66,8 @@ module URI
|
|
72
66
|
end
|
73
67
|
|
74
68
|
def call
|
75
|
-
alabels, alabel_trailing_dot = split_domain(alabel.encode("UTF-8").unicode_normalize(:nfc)) if alabel
|
76
|
-
ulabels, ulabel_trailing_dot = split_domain(ulabel.encode("UTF-8").unicode_normalize(:nfc)) if ulabel
|
69
|
+
alabels, alabel_trailing_dot = split_domain(alabel.encode("UTF-8").unicode_normalize!(:nfc)) if alabel
|
70
|
+
ulabels, ulabel_trailing_dot = split_domain(ulabel.encode("UTF-8").unicode_normalize!(:nfc)) if ulabel
|
77
71
|
|
78
72
|
if alabels && ulabels && (alabels.size != ulabels.size || alabel_trailing_dot != ulabel_trailing_dot)
|
79
73
|
raise Error, "alabel doesn't match ulabel"
|
@@ -128,7 +122,7 @@ module URI
|
|
128
122
|
# # https://datatracker.ietf.org/doc/html/rfc5891#section-5
|
129
123
|
class Lookup < Processing
|
130
124
|
def call
|
131
|
-
domain = domain_name.encode("UTF-8").unicode_normalize(:nfc)
|
125
|
+
domain = domain_name.encode("UTF-8").unicode_normalize!(:nfc)
|
132
126
|
|
133
127
|
result = process_labels(domain) do |label|
|
134
128
|
orig_label = label
|
@@ -9,31 +9,35 @@ module URI
|
|
9
9
|
module Mapping
|
10
10
|
class << self
|
11
11
|
def call(domain_name, transitional_processing: false, use_std3_ascii_rules: true)
|
12
|
-
output =
|
12
|
+
output = []
|
13
13
|
domain_name.each_codepoint do |codepoint|
|
14
14
|
_, status, replacement = status(codepoint)
|
15
15
|
case status
|
16
16
|
when "V", "X" # valid, disallowed
|
17
|
-
output << codepoint
|
17
|
+
output << codepoint
|
18
18
|
when "M" # mapped
|
19
|
-
output
|
20
|
-
|
19
|
+
output += if transitional_processing && codepoint == 7838
|
20
|
+
[115, 115]
|
21
21
|
else
|
22
22
|
replacement
|
23
23
|
end
|
24
24
|
when "D" # deviation
|
25
|
-
|
25
|
+
if transitional_processing
|
26
|
+
output += replacement
|
27
|
+
else
|
28
|
+
output << codepoint
|
29
|
+
end
|
26
30
|
when "3" # disallowed_STD3_valid, disallowed_STD3_mapped
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
31
|
+
if use_std3_ascii_rules || !replacement
|
32
|
+
output << codepoint
|
33
|
+
else
|
34
|
+
output += replacement
|
35
|
+
end
|
32
36
|
when "I" # ignored
|
33
37
|
next
|
34
38
|
end
|
35
39
|
end
|
36
|
-
output.unicode_normalize(:nfc)
|
40
|
+
output.pack("U*").unicode_normalize!(:nfc)
|
37
41
|
end
|
38
42
|
|
39
43
|
def validate_status(label, cp, pos, transitional_processing:, use_std3_ascii_rules:)
|
@@ -46,12 +50,9 @@ module URI
|
|
46
50
|
end
|
47
51
|
|
48
52
|
def status(codepoint)
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
else
|
53
|
-
(UTS46_DATA.bsearch_index { |x| x[0] > codepoint } || UTS46_DATA.length) - 1
|
54
|
-
end
|
53
|
+
return UTS46_DATA[codepoint] if codepoint < 256
|
54
|
+
|
55
|
+
index = (UTS46_DATA.bsearch_index { |x| x[0] > codepoint } || UTS46_DATA.length) - 1
|
55
56
|
UTS46_DATA[index] || []
|
56
57
|
end
|
57
58
|
end
|
@@ -8,14 +8,17 @@ module URI
|
|
8
8
|
module UTS46
|
9
9
|
# https://www.unicode.org/reports/tr46/#Processing
|
10
10
|
class Processing < BaseProcessing
|
11
|
-
def
|
12
|
-
|
11
|
+
def initialize(domain_name, **options)
|
12
|
+
super
|
13
|
+
@domain_name = Mapping.call(
|
13
14
|
domain_name,
|
14
|
-
transitional_processing: options.transitional_processing?,
|
15
|
-
use_std3_ascii_rules: options.use_std3_ascii_rules?,
|
15
|
+
transitional_processing: @options.transitional_processing?,
|
16
|
+
use_std3_ascii_rules: @options.use_std3_ascii_rules?,
|
16
17
|
)
|
18
|
+
end
|
17
19
|
|
18
|
-
|
20
|
+
def call
|
21
|
+
process_labels(domain_name) do |label|
|
19
22
|
if label.start_with?(ACE_PREFIX)
|
20
23
|
begin
|
21
24
|
label = punycode_decode(label)
|
@@ -41,12 +44,6 @@ module URI
|
|
41
44
|
Options
|
42
45
|
end
|
43
46
|
|
44
|
-
def check_bidi?
|
45
|
-
return @check_bidi if instance_variable_defined?(:@check_bidi)
|
46
|
-
|
47
|
-
@check_bidi = options.check_bidi? && Validation::Bidi.check?(domain_name)
|
48
|
-
end
|
49
|
-
|
50
47
|
# https://www.unicode.org/reports/tr46/#Validity_Criteria
|
51
48
|
def validate(label, transitional_processing: options.transitional_processing?)
|
52
49
|
return if label.empty?
|
@@ -61,22 +61,22 @@ module URI
|
|
61
61
|
|
62
62
|
# https://www.rfc-editor.org/rfc/rfc5891.html#section-4.2.3.4
|
63
63
|
def check?(domain)
|
64
|
-
|
65
|
-
domain = labels.map do |label|
|
64
|
+
domain.split(".").each do |label|
|
66
65
|
if label.start_with?(ACE_PREFIX)
|
67
66
|
begin
|
68
|
-
Punycode.decode(label[ACE_PREFIX.length..])
|
67
|
+
label = Punycode.decode(label[ACE_PREFIX.length..])
|
69
68
|
rescue PunycodeError
|
70
|
-
|
69
|
+
next
|
71
70
|
end
|
72
|
-
else
|
73
|
-
label
|
74
71
|
end
|
75
|
-
|
72
|
+
next if label.ascii_only?
|
76
73
|
|
77
|
-
|
78
|
-
|
74
|
+
label.each_codepoint do |cp|
|
75
|
+
next if cp < 256
|
76
|
+
return true if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
|
77
|
+
end
|
79
78
|
end
|
79
|
+
|
80
80
|
false
|
81
81
|
end
|
82
82
|
|
@@ -17,6 +17,7 @@ module URI
|
|
17
17
|
|
18
18
|
# https://datatracker.ietf.org/doc/html/rfc5892
|
19
19
|
def check_contextj(label, cp, pos)
|
20
|
+
return false if cp < 256
|
20
21
|
return false unless codepoint?(cp, "CONTEXTJ")
|
21
22
|
return true if valid_contextj?(label, cp, pos)
|
22
23
|
|
@@ -25,6 +26,7 @@ module URI
|
|
25
26
|
|
26
27
|
# https://datatracker.ietf.org/doc/html/rfc5892
|
27
28
|
def check_contexto(label, cp, pos)
|
29
|
+
return false if cp < 183
|
28
30
|
return false unless codepoint?(cp, "CONTEXTO")
|
29
31
|
return true if valid_contexto?(label, cp, pos)
|
30
32
|
|
@@ -105,10 +107,14 @@ module URI
|
|
105
107
|
end
|
106
108
|
|
107
109
|
def script?(cp, script)
|
110
|
+
return false if cp < 256
|
111
|
+
|
108
112
|
Intranges.contain?(cp, SCRIPTS[script])
|
109
113
|
end
|
110
114
|
|
111
115
|
def virama_combining_class?(cp)
|
116
|
+
return false if cp < 256
|
117
|
+
|
112
118
|
Intranges.contain?(cp, VIRAMA_COMBINING_CLASSES)
|
113
119
|
end
|
114
120
|
|
@@ -27,7 +27,7 @@ module URI
|
|
27
27
|
# 4.2.3.1. Hyphen Restrictions
|
28
28
|
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
|
29
29
|
def check_hyphen_sides(label)
|
30
|
-
return unless label
|
30
|
+
return unless label.start_with?("-") || label.end_with?("-")
|
31
31
|
|
32
32
|
raise Error, "Label must neither begin nor end with a U+002D HYPHEN-MINUS character"
|
33
33
|
end
|
@@ -41,7 +41,9 @@ module URI
|
|
41
41
|
# 4.2.3.2. Leading Combining Marks
|
42
42
|
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
|
43
43
|
def check_leading_combining(label)
|
44
|
-
|
44
|
+
cp = label[0].ord
|
45
|
+
return if cp < 256
|
46
|
+
return unless Intranges.contain?(cp, INITIAL_COMBINERS)
|
45
47
|
|
46
48
|
raise Error, "Label begins with an illegal combining character"
|
47
49
|
end
|
data/lib/uri/idna/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uri-idna
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Svyatoslav Kryukov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-11-
|
11
|
+
date: 2023-11-15 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Internationalized Domain Names in Applications (IDNA)
|
14
14
|
email:
|