uri-idna 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -2
- data/lib/uri/idna/base_processing.rb +12 -6
- data/lib/uri/idna/data/bidi_classes.rb +1973 -0
- data/lib/uri/idna/data/codepoint_classes.rb +1226 -0
- data/lib/uri/idna/data/joining_types.rb +839 -0
- data/lib/uri/idna/data/leading_combiners.rb +321 -0
- data/lib/uri/idna/data/scripts.rb +108 -0
- data/lib/uri/idna/data/unicode_version.rb +10 -0
- data/lib/uri/idna/data/uts46.rb +8459 -8179
- data/lib/uri/idna/data/virama_combining_classes.rb +67 -0
- data/lib/uri/idna/idna2008/processing.rb +13 -28
- data/lib/uri/idna/punycode.rb +11 -9
- data/lib/uri/idna/uts46/mapping.rb +39 -37
- data/lib/uri/idna/uts46/processing.rb +14 -15
- data/lib/uri/idna/validation/bidi.rb +34 -52
- data/lib/uri/idna/validation/contextj.rb +62 -0
- data/lib/uri/idna/validation/contexto.rb +61 -0
- data/lib/uri/idna/validation/idna_permitted.rb +30 -0
- data/lib/uri/idna/validation/label.rb +1 -14
- data/lib/uri/idna/validation/leading_combining.rb +23 -0
- data/lib/uri/idna/version.rb +1 -1
- metadata +15 -7
- data/lib/uri/idna/data/idna.rb +0 -4697
- data/lib/uri/idna/intranges.rb +0 -57
- data/lib/uri/idna/validation/codepoint.rb +0 -128
data/lib/uri/idna/intranges.rb
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module URI
|
4
|
-
module IDNA
|
5
|
-
module Intranges
|
6
|
-
class << self
|
7
|
-
def from_list(list)
|
8
|
-
sorted_list = list.sort
|
9
|
-
ranges = []
|
10
|
-
last_write = -1
|
11
|
-
sorted_list.each_with_index do |value, i|
|
12
|
-
next if value + 1 == sorted_list[i + 1]
|
13
|
-
|
14
|
-
ranges << encode_range(sorted_list[last_write + 1], sorted_list[i] + 1)
|
15
|
-
last_write = i
|
16
|
-
end
|
17
|
-
ranges
|
18
|
-
end
|
19
|
-
|
20
|
-
def contain?(int, ranges)
|
21
|
-
tuple = encode_range(int, 0)
|
22
|
-
pos = ranges.bsearch_index { |x| x > tuple } || ranges.length
|
23
|
-
# we could be immediately ahead of a tuple (start, end)
|
24
|
-
# with start < int_ <= end
|
25
|
-
if pos > 0
|
26
|
-
r = ranges[pos - 1]
|
27
|
-
left = decode_left(r)
|
28
|
-
right = decode_right(r)
|
29
|
-
return true if left <= int && int < right
|
30
|
-
end
|
31
|
-
# or we could be immediately behind a tuple (int_, end)
|
32
|
-
if pos < ranges.length
|
33
|
-
left = decode_left(ranges[pos])
|
34
|
-
return true if left == int
|
35
|
-
end
|
36
|
-
false
|
37
|
-
end
|
38
|
-
|
39
|
-
private
|
40
|
-
|
41
|
-
def encode_range(start, finish)
|
42
|
-
(start << 32) | finish
|
43
|
-
end
|
44
|
-
|
45
|
-
MASK = ((1 << 32) - 1)
|
46
|
-
|
47
|
-
def decode_right(r)
|
48
|
-
r & MASK
|
49
|
-
end
|
50
|
-
|
51
|
-
def decode_left(r)
|
52
|
-
r >> 32
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
@@ -1,128 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative "../intranges"
|
4
|
-
require_relative "../data/idna"
|
5
|
-
|
6
|
-
module URI
|
7
|
-
module IDNA
|
8
|
-
module Validation
|
9
|
-
module Codepoint
|
10
|
-
class << self
|
11
|
-
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.2
|
12
|
-
def check_idna_validity(label, cp, pos)
|
13
|
-
return true if codepoint?(cp, "PVALID")
|
14
|
-
|
15
|
-
raise InvalidCodepointError, cp_error_message(label, cp, pos)
|
16
|
-
end
|
17
|
-
|
18
|
-
# https://datatracker.ietf.org/doc/html/rfc5892
|
19
|
-
def check_contextj(label, cp, pos)
|
20
|
-
return false if cp < 256
|
21
|
-
return false unless codepoint?(cp, "CONTEXTJ")
|
22
|
-
return true if valid_contextj?(label, cp, pos)
|
23
|
-
|
24
|
-
raise InvalidCodepointContextError, cp_error_message(label, cp, pos)
|
25
|
-
end
|
26
|
-
|
27
|
-
# https://datatracker.ietf.org/doc/html/rfc5892
|
28
|
-
def check_contexto(label, cp, pos)
|
29
|
-
return false if cp < 183
|
30
|
-
return false unless codepoint?(cp, "CONTEXTO")
|
31
|
-
return true if valid_contexto?(label, cp, pos)
|
32
|
-
|
33
|
-
raise InvalidCodepointContextError, cp_error_message(label, cp, pos)
|
34
|
-
end
|
35
|
-
|
36
|
-
def cp_error_message(label, cp, pos)
|
37
|
-
format("Codepoint U+%04X at position %d of %p not allowed", cp, pos + 1, label)
|
38
|
-
end
|
39
|
-
|
40
|
-
private
|
41
|
-
|
42
|
-
def valid_contexto?(label, cp, pos)
|
43
|
-
case cp
|
44
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
|
45
|
-
when 0x00b7
|
46
|
-
pos > 0 && pos < label.length - 1 ? (label[pos - 1].ord == 0x006c && label[pos + 1].ord == 0x006c) : false
|
47
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
|
48
|
-
when 0x0375
|
49
|
-
pos < label.length - 1 ? script?(label[pos + 1].ord, "Greek") : false
|
50
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
|
51
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
|
52
|
-
when 0x05f3, 0x05f4
|
53
|
-
pos > 0 ? script?(label[pos - 1].ord, "Hebrew") : false
|
54
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
|
55
|
-
when 0x30fb
|
56
|
-
label.each_codepoint do |codepoint|
|
57
|
-
next if codepoint == 0x30fb
|
58
|
-
return true if script?(codepoint,
|
59
|
-
"Hiragana") || script?(codepoint, "Katakana") || script?(codepoint, "Han")
|
60
|
-
end
|
61
|
-
false
|
62
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
|
63
|
-
when 0x0660..0x0669
|
64
|
-
label.each_codepoint do |codepoint|
|
65
|
-
return false if codepoint >= 0x06f0 && codepoint <= 0x06f9
|
66
|
-
end
|
67
|
-
true
|
68
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
|
69
|
-
when 0x06f0..0x06f9
|
70
|
-
label.each_codepoint do |codepoint|
|
71
|
-
return false if codepoint >= 0x0660 && codepoint <= 0x0669
|
72
|
-
end
|
73
|
-
true
|
74
|
-
else
|
75
|
-
false
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
def valid_contextj?(label, cp, pos)
|
80
|
-
case cp
|
81
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
|
82
|
-
when 0x200c
|
83
|
-
return true if pos > 0 && virama_combining_class?(label[pos - 1].ord)
|
84
|
-
|
85
|
-
ok = false
|
86
|
-
(pos - 1).downto(0) do |i|
|
87
|
-
joining_type = JOINING_TYPES[label[i].ord]
|
88
|
-
next if joining_type == 0x54
|
89
|
-
|
90
|
-
if [0x4c, 0x44].include?(joining_type)
|
91
|
-
ok = true
|
92
|
-
break
|
93
|
-
end
|
94
|
-
end
|
95
|
-
return false unless ok
|
96
|
-
|
97
|
-
(pos + 1).upto(label.length - 1) do |i|
|
98
|
-
joining_type = JOINING_TYPES[label[i].ord]
|
99
|
-
next if joining_type == 0x54
|
100
|
-
return true if [0x52, 0x44].include?(joining_type)
|
101
|
-
end
|
102
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
|
103
|
-
when 0x200d
|
104
|
-
return virama_combining_class?(label[pos - 1].ord) if pos > 0
|
105
|
-
end
|
106
|
-
false
|
107
|
-
end
|
108
|
-
|
109
|
-
def script?(cp, script)
|
110
|
-
return false if cp < 256
|
111
|
-
|
112
|
-
Intranges.contain?(cp, SCRIPTS[script])
|
113
|
-
end
|
114
|
-
|
115
|
-
def virama_combining_class?(cp)
|
116
|
-
return false if cp < 256
|
117
|
-
|
118
|
-
Intranges.contain?(cp, VIRAMA_COMBINING_CLASSES)
|
119
|
-
end
|
120
|
-
|
121
|
-
def codepoint?(cp, class_name)
|
122
|
-
Intranges.contain?(cp, CODEPOINT_CLASSES[class_name])
|
123
|
-
end
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
127
|
-
end
|
128
|
-
end
|