uri-idna 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -5
- data/README.md +1 -1
- data/lib/uri/idna/base_processing.rb +6 -0
- data/lib/uri/idna/data/uts46.rb +5989 -5989
- data/lib/uri/idna/idna2008/processing.rb +3 -9
- data/lib/uri/idna/uts46/mapping.rb +18 -17
- data/lib/uri/idna/uts46/processing.rb +8 -11
- data/lib/uri/idna/validation/bidi.rb +9 -9
- data/lib/uri/idna/validation/codepoint.rb +6 -0
- data/lib/uri/idna/validation/label.rb +4 -2
- data/lib/uri/idna/version.rb +1 -1
- metadata +2 -2
@@ -45,12 +45,6 @@ module URI
|
|
45
45
|
Validation::Bidi.call(label) if check_bidi?
|
46
46
|
end
|
47
47
|
|
48
|
-
def check_bidi?
|
49
|
-
return @check_bidi if instance_variable_defined?(:@check_bidi)
|
50
|
-
|
51
|
-
@check_bidi = options.check_bidi? && Validation::Bidi.check?(domain_name)
|
52
|
-
end
|
53
|
-
|
54
48
|
def punycode_decode(label)
|
55
49
|
return label unless label.start_with?(ACE_PREFIX)
|
56
50
|
|
@@ -72,8 +66,8 @@ module URI
|
|
72
66
|
end
|
73
67
|
|
74
68
|
def call
|
75
|
-
alabels, alabel_trailing_dot = split_domain(alabel.encode("UTF-8").unicode_normalize(:nfc)) if alabel
|
76
|
-
ulabels, ulabel_trailing_dot = split_domain(ulabel.encode("UTF-8").unicode_normalize(:nfc)) if ulabel
|
69
|
+
alabels, alabel_trailing_dot = split_domain(alabel.encode("UTF-8").unicode_normalize!(:nfc)) if alabel
|
70
|
+
ulabels, ulabel_trailing_dot = split_domain(ulabel.encode("UTF-8").unicode_normalize!(:nfc)) if ulabel
|
77
71
|
|
78
72
|
if alabels && ulabels && (alabels.size != ulabels.size || alabel_trailing_dot != ulabel_trailing_dot)
|
79
73
|
raise Error, "alabel doesn't match ulabel"
|
@@ -128,7 +122,7 @@ module URI
|
|
128
122
|
# # https://datatracker.ietf.org/doc/html/rfc5891#section-5
|
129
123
|
class Lookup < Processing
|
130
124
|
def call
|
131
|
-
domain = domain_name.encode("UTF-8").unicode_normalize(:nfc)
|
125
|
+
domain = domain_name.encode("UTF-8").unicode_normalize!(:nfc)
|
132
126
|
|
133
127
|
result = process_labels(domain) do |label|
|
134
128
|
orig_label = label
|
@@ -9,31 +9,35 @@ module URI
|
|
9
9
|
module Mapping
|
10
10
|
class << self
|
11
11
|
def call(domain_name, transitional_processing: false, use_std3_ascii_rules: true)
|
12
|
-
output =
|
12
|
+
output = []
|
13
13
|
domain_name.each_codepoint do |codepoint|
|
14
14
|
_, status, replacement = status(codepoint)
|
15
15
|
case status
|
16
16
|
when "V", "X" # valid, disallowed
|
17
|
-
output << codepoint
|
17
|
+
output << codepoint
|
18
18
|
when "M" # mapped
|
19
|
-
output
|
20
|
-
|
19
|
+
output += if transitional_processing && codepoint == 7838
|
20
|
+
[115, 115]
|
21
21
|
else
|
22
22
|
replacement
|
23
23
|
end
|
24
24
|
when "D" # deviation
|
25
|
-
|
25
|
+
if transitional_processing
|
26
|
+
output += replacement
|
27
|
+
else
|
28
|
+
output << codepoint
|
29
|
+
end
|
26
30
|
when "3" # disallowed_STD3_valid, disallowed_STD3_mapped
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
31
|
+
if use_std3_ascii_rules || !replacement
|
32
|
+
output << codepoint
|
33
|
+
else
|
34
|
+
output += replacement
|
35
|
+
end
|
32
36
|
when "I" # ignored
|
33
37
|
next
|
34
38
|
end
|
35
39
|
end
|
36
|
-
output.unicode_normalize(:nfc)
|
40
|
+
output.pack("U*").unicode_normalize!(:nfc)
|
37
41
|
end
|
38
42
|
|
39
43
|
def validate_status(label, cp, pos, transitional_processing:, use_std3_ascii_rules:)
|
@@ -46,12 +50,9 @@ module URI
|
|
46
50
|
end
|
47
51
|
|
48
52
|
def status(codepoint)
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
else
|
53
|
-
(UTS46_DATA.bsearch_index { |x| x[0] > codepoint } || UTS46_DATA.length) - 1
|
54
|
-
end
|
53
|
+
return UTS46_DATA[codepoint] if codepoint < 256
|
54
|
+
|
55
|
+
index = (UTS46_DATA.bsearch_index { |x| x[0] > codepoint } || UTS46_DATA.length) - 1
|
55
56
|
UTS46_DATA[index] || []
|
56
57
|
end
|
57
58
|
end
|
@@ -8,14 +8,17 @@ module URI
|
|
8
8
|
module UTS46
|
9
9
|
# https://www.unicode.org/reports/tr46/#Processing
|
10
10
|
class Processing < BaseProcessing
|
11
|
-
def
|
12
|
-
|
11
|
+
def initialize(domain_name, **options)
|
12
|
+
super
|
13
|
+
@domain_name = Mapping.call(
|
13
14
|
domain_name,
|
14
|
-
transitional_processing: options.transitional_processing?,
|
15
|
-
use_std3_ascii_rules: options.use_std3_ascii_rules?,
|
15
|
+
transitional_processing: @options.transitional_processing?,
|
16
|
+
use_std3_ascii_rules: @options.use_std3_ascii_rules?,
|
16
17
|
)
|
18
|
+
end
|
17
19
|
|
18
|
-
|
20
|
+
def call
|
21
|
+
process_labels(domain_name) do |label|
|
19
22
|
if label.start_with?(ACE_PREFIX)
|
20
23
|
begin
|
21
24
|
label = punycode_decode(label)
|
@@ -41,12 +44,6 @@ module URI
|
|
41
44
|
Options
|
42
45
|
end
|
43
46
|
|
44
|
-
def check_bidi?
|
45
|
-
return @check_bidi if instance_variable_defined?(:@check_bidi)
|
46
|
-
|
47
|
-
@check_bidi = options.check_bidi? && Validation::Bidi.check?(domain_name)
|
48
|
-
end
|
49
|
-
|
50
47
|
# https://www.unicode.org/reports/tr46/#Validity_Criteria
|
51
48
|
def validate(label, transitional_processing: options.transitional_processing?)
|
52
49
|
return if label.empty?
|
@@ -61,22 +61,22 @@ module URI
|
|
61
61
|
|
62
62
|
# https://www.rfc-editor.org/rfc/rfc5891.html#section-4.2.3.4
|
63
63
|
def check?(domain)
|
64
|
-
|
65
|
-
domain = labels.map do |label|
|
64
|
+
domain.split(".").each do |label|
|
66
65
|
if label.start_with?(ACE_PREFIX)
|
67
66
|
begin
|
68
|
-
Punycode.decode(label[ACE_PREFIX.length..])
|
67
|
+
label = Punycode.decode(label[ACE_PREFIX.length..])
|
69
68
|
rescue PunycodeError
|
70
|
-
|
69
|
+
next
|
71
70
|
end
|
72
|
-
else
|
73
|
-
label
|
74
71
|
end
|
75
|
-
|
72
|
+
next if label.ascii_only?
|
76
73
|
|
77
|
-
|
78
|
-
|
74
|
+
label.each_codepoint do |cp|
|
75
|
+
next if cp < 256
|
76
|
+
return true if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
|
77
|
+
end
|
79
78
|
end
|
79
|
+
|
80
80
|
false
|
81
81
|
end
|
82
82
|
|
@@ -17,6 +17,7 @@ module URI
|
|
17
17
|
|
18
18
|
# https://datatracker.ietf.org/doc/html/rfc5892
|
19
19
|
def check_contextj(label, cp, pos)
|
20
|
+
return false if cp < 256
|
20
21
|
return false unless codepoint?(cp, "CONTEXTJ")
|
21
22
|
return true if valid_contextj?(label, cp, pos)
|
22
23
|
|
@@ -25,6 +26,7 @@ module URI
|
|
25
26
|
|
26
27
|
# https://datatracker.ietf.org/doc/html/rfc5892
|
27
28
|
def check_contexto(label, cp, pos)
|
29
|
+
return false if cp < 183
|
28
30
|
return false unless codepoint?(cp, "CONTEXTO")
|
29
31
|
return true if valid_contexto?(label, cp, pos)
|
30
32
|
|
@@ -105,10 +107,14 @@ module URI
|
|
105
107
|
end
|
106
108
|
|
107
109
|
def script?(cp, script)
|
110
|
+
return false if cp < 256
|
111
|
+
|
108
112
|
Intranges.contain?(cp, SCRIPTS[script])
|
109
113
|
end
|
110
114
|
|
111
115
|
def virama_combining_class?(cp)
|
116
|
+
return false if cp < 256
|
117
|
+
|
112
118
|
Intranges.contain?(cp, VIRAMA_COMBINING_CLASSES)
|
113
119
|
end
|
114
120
|
|
@@ -27,7 +27,7 @@ module URI
|
|
27
27
|
# 4.2.3.1. Hyphen Restrictions
|
28
28
|
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.1
|
29
29
|
def check_hyphen_sides(label)
|
30
|
-
return unless label
|
30
|
+
return unless label.start_with?("-") || label.end_with?("-")
|
31
31
|
|
32
32
|
raise Error, "Label must neither begin nor end with a U+002D HYPHEN-MINUS character"
|
33
33
|
end
|
@@ -41,7 +41,9 @@ module URI
|
|
41
41
|
# 4.2.3.2. Leading Combining Marks
|
42
42
|
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.2
|
43
43
|
def check_leading_combining(label)
|
44
|
-
|
44
|
+
cp = label[0].ord
|
45
|
+
return if cp < 256
|
46
|
+
return unless Intranges.contain?(cp, INITIAL_COMBINERS)
|
45
47
|
|
46
48
|
raise Error, "Label begins with an illegal combining character"
|
47
49
|
end
|
data/lib/uri/idna/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uri-idna
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Svyatoslav Kryukov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-11-
|
11
|
+
date: 2023-11-15 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Internationalized Domain Names in Applications (IDNA)
|
14
14
|
email:
|