uri-idna 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -2
- data/lib/uri/idna/base_processing.rb +12 -6
- data/lib/uri/idna/data/bidi_classes.rb +1973 -0
- data/lib/uri/idna/data/codepoint_classes.rb +1226 -0
- data/lib/uri/idna/data/joining_types.rb +839 -0
- data/lib/uri/idna/data/leading_combiners.rb +321 -0
- data/lib/uri/idna/data/scripts.rb +108 -0
- data/lib/uri/idna/data/unicode_version.rb +10 -0
- data/lib/uri/idna/data/uts46.rb +8459 -8179
- data/lib/uri/idna/data/virama_combining_classes.rb +67 -0
- data/lib/uri/idna/idna2008/processing.rb +13 -28
- data/lib/uri/idna/punycode.rb +11 -9
- data/lib/uri/idna/uts46/mapping.rb +39 -37
- data/lib/uri/idna/uts46/processing.rb +14 -15
- data/lib/uri/idna/validation/bidi.rb +34 -52
- data/lib/uri/idna/validation/contextj.rb +62 -0
- data/lib/uri/idna/validation/contexto.rb +61 -0
- data/lib/uri/idna/validation/idna_permitted.rb +30 -0
- data/lib/uri/idna/validation/label.rb +1 -14
- data/lib/uri/idna/validation/leading_combining.rb +23 -0
- data/lib/uri/idna/version.rb +1 -1
- metadata +15 -7
- data/lib/uri/idna/data/idna.rb +0 -4697
- data/lib/uri/idna/intranges.rb +0 -57
- data/lib/uri/idna/validation/codepoint.rb +0 -128
data/lib/uri/idna/intranges.rb
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module URI
|
4
|
-
module IDNA
|
5
|
-
module Intranges
|
6
|
-
class << self
|
7
|
-
def from_list(list)
|
8
|
-
sorted_list = list.sort
|
9
|
-
ranges = []
|
10
|
-
last_write = -1
|
11
|
-
sorted_list.each_with_index do |value, i|
|
12
|
-
next if value + 1 == sorted_list[i + 1]
|
13
|
-
|
14
|
-
ranges << encode_range(sorted_list[last_write + 1], sorted_list[i] + 1)
|
15
|
-
last_write = i
|
16
|
-
end
|
17
|
-
ranges
|
18
|
-
end
|
19
|
-
|
20
|
-
def contain?(int, ranges)
|
21
|
-
tuple = encode_range(int, 0)
|
22
|
-
pos = ranges.bsearch_index { |x| x > tuple } || ranges.length
|
23
|
-
# we could be immediately ahead of a tuple (start, end)
|
24
|
-
# with start < int_ <= end
|
25
|
-
if pos > 0
|
26
|
-
r = ranges[pos - 1]
|
27
|
-
left = decode_left(r)
|
28
|
-
right = decode_right(r)
|
29
|
-
return true if left <= int && int < right
|
30
|
-
end
|
31
|
-
# or we could be immediately behind a tuple (int_, end)
|
32
|
-
if pos < ranges.length
|
33
|
-
left = decode_left(ranges[pos])
|
34
|
-
return true if left == int
|
35
|
-
end
|
36
|
-
false
|
37
|
-
end
|
38
|
-
|
39
|
-
private
|
40
|
-
|
41
|
-
def encode_range(start, finish)
|
42
|
-
(start << 32) | finish
|
43
|
-
end
|
44
|
-
|
45
|
-
MASK = ((1 << 32) - 1)
|
46
|
-
|
47
|
-
def decode_right(r)
|
48
|
-
r & MASK
|
49
|
-
end
|
50
|
-
|
51
|
-
def decode_left(r)
|
52
|
-
r >> 32
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
@@ -1,128 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative "../intranges"
|
4
|
-
require_relative "../data/idna"
|
5
|
-
|
6
|
-
module URI
|
7
|
-
module IDNA
|
8
|
-
module Validation
|
9
|
-
module Codepoint
|
10
|
-
class << self
|
11
|
-
# https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.2
|
12
|
-
def check_idna_validity(label, cp, pos)
|
13
|
-
return true if codepoint?(cp, "PVALID")
|
14
|
-
|
15
|
-
raise InvalidCodepointError, cp_error_message(label, cp, pos)
|
16
|
-
end
|
17
|
-
|
18
|
-
# https://datatracker.ietf.org/doc/html/rfc5892
|
19
|
-
def check_contextj(label, cp, pos)
|
20
|
-
return false if cp < 256
|
21
|
-
return false unless codepoint?(cp, "CONTEXTJ")
|
22
|
-
return true if valid_contextj?(label, cp, pos)
|
23
|
-
|
24
|
-
raise InvalidCodepointContextError, cp_error_message(label, cp, pos)
|
25
|
-
end
|
26
|
-
|
27
|
-
# https://datatracker.ietf.org/doc/html/rfc5892
|
28
|
-
def check_contexto(label, cp, pos)
|
29
|
-
return false if cp < 183
|
30
|
-
return false unless codepoint?(cp, "CONTEXTO")
|
31
|
-
return true if valid_contexto?(label, cp, pos)
|
32
|
-
|
33
|
-
raise InvalidCodepointContextError, cp_error_message(label, cp, pos)
|
34
|
-
end
|
35
|
-
|
36
|
-
def cp_error_message(label, cp, pos)
|
37
|
-
format("Codepoint U+%04X at position %d of %p not allowed", cp, pos + 1, label)
|
38
|
-
end
|
39
|
-
|
40
|
-
private
|
41
|
-
|
42
|
-
def valid_contexto?(label, cp, pos)
|
43
|
-
case cp
|
44
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.3
|
45
|
-
when 0x00b7
|
46
|
-
pos > 0 && pos < label.length - 1 ? (label[pos - 1].ord == 0x006c && label[pos + 1].ord == 0x006c) : false
|
47
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.4
|
48
|
-
when 0x0375
|
49
|
-
pos < label.length - 1 ? script?(label[pos + 1].ord, "Greek") : false
|
50
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.5
|
51
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.6
|
52
|
-
when 0x05f3, 0x05f4
|
53
|
-
pos > 0 ? script?(label[pos - 1].ord, "Hebrew") : false
|
54
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.7
|
55
|
-
when 0x30fb
|
56
|
-
label.each_codepoint do |codepoint|
|
57
|
-
next if codepoint == 0x30fb
|
58
|
-
return true if script?(codepoint,
|
59
|
-
"Hiragana") || script?(codepoint, "Katakana") || script?(codepoint, "Han")
|
60
|
-
end
|
61
|
-
false
|
62
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.8
|
63
|
-
when 0x0660..0x0669
|
64
|
-
label.each_codepoint do |codepoint|
|
65
|
-
return false if codepoint >= 0x06f0 && codepoint <= 0x06f9
|
66
|
-
end
|
67
|
-
true
|
68
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.9
|
69
|
-
when 0x06f0..0x06f9
|
70
|
-
label.each_codepoint do |codepoint|
|
71
|
-
return false if codepoint >= 0x0660 && codepoint <= 0x0669
|
72
|
-
end
|
73
|
-
true
|
74
|
-
else
|
75
|
-
false
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
def valid_contextj?(label, cp, pos)
|
80
|
-
case cp
|
81
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.1
|
82
|
-
when 0x200c
|
83
|
-
return true if pos > 0 && virama_combining_class?(label[pos - 1].ord)
|
84
|
-
|
85
|
-
ok = false
|
86
|
-
(pos - 1).downto(0) do |i|
|
87
|
-
joining_type = JOINING_TYPES[label[i].ord]
|
88
|
-
next if joining_type == 0x54
|
89
|
-
|
90
|
-
if [0x4c, 0x44].include?(joining_type)
|
91
|
-
ok = true
|
92
|
-
break
|
93
|
-
end
|
94
|
-
end
|
95
|
-
return false unless ok
|
96
|
-
|
97
|
-
(pos + 1).upto(label.length - 1) do |i|
|
98
|
-
joining_type = JOINING_TYPES[label[i].ord]
|
99
|
-
next if joining_type == 0x54
|
100
|
-
return true if [0x52, 0x44].include?(joining_type)
|
101
|
-
end
|
102
|
-
# https://datatracker.ietf.org/doc/html/rfc5892#appendix-A.2
|
103
|
-
when 0x200d
|
104
|
-
return virama_combining_class?(label[pos - 1].ord) if pos > 0
|
105
|
-
end
|
106
|
-
false
|
107
|
-
end
|
108
|
-
|
109
|
-
def script?(cp, script)
|
110
|
-
return false if cp < 256
|
111
|
-
|
112
|
-
Intranges.contain?(cp, SCRIPTS[script])
|
113
|
-
end
|
114
|
-
|
115
|
-
def virama_combining_class?(cp)
|
116
|
-
return false if cp < 256
|
117
|
-
|
118
|
-
Intranges.contain?(cp, VIRAMA_COMBINING_CLASSES)
|
119
|
-
end
|
120
|
-
|
121
|
-
def codepoint?(cp, class_name)
|
122
|
-
Intranges.contain?(cp, CODEPOINT_CLASSES[class_name])
|
123
|
-
end
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
127
|
-
end
|
128
|
-
end
|