utf8_validator 1.0.13 → 1.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/examples/fullstring.rb +4 -0
- data/lib/utf8_validator.rb +3 -1
- data/lib/validation/errors.rb +3 -1
- data/lib/validation/validator.rb +3 -0
- data/test/helper.rb +5 -0
- data/test/test_code_points.rb +127023 -0
- data/test/test_raise_request.rb +3 -0
- data/test/test_surrogate_half_first_point.rb +2094 -0
- data/test/test_utf8_validator.rb +578 -3
- data/utf8_validator.gemspec +7 -4
- data/utils/gencp.rb +59 -0
- metadata +5 -2
data/utils/gencp.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# Copyright (c) 2016 Guy Allard
|
5
|
+
#--
|
6
|
+
#
|
7
|
+
# Generate UTF8 Code Points
|
8
|
+
#
|
9
|
+
=begin
|
10
|
+
|
11
|
+
From wikipediia:
|
12
|
+
|
13
|
+
Bits_of_code_point First_code_point Last_code_point Bytes_in_sequence Byte_1 Byte_2 Byte_3 Byte_4 Byte_5 Byte_6
|
14
|
+
7 U+0000 U+007F 1 0xxxxxxx
|
15
|
+
11 U+0080 U+07FF 2 110xxxxx 10xxxxxx
|
16
|
+
16 U+0800 U+FFFF 3 1110xxxx 10xxxxxx 10xxxxxx
|
17
|
+
21 U+10000 U+1FFFFF 4 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
18
|
+
The following sequences are not part of the UTF-8 standard, only part of the original proposal
|
19
|
+
26 U+200000 U+3FFFFFF 5 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
20
|
+
31 U+4000000 U+7FFFFFFF 6 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
21
|
+
|
22
|
+
Note: in the U+ notation the numbers are hexadecimal
|
23
|
+
|
24
|
+
=end
|
25
|
+
|
26
|
+
class CPData
|
27
|
+
|
28
|
+
attr_reader :name, :ubytes, :first, :last
|
29
|
+
|
30
|
+
def initialize(name, ubytes, first, last)
|
31
|
+
@name, @ubytes, @first, @last = name, ubytes, first, last
|
32
|
+
end
|
33
|
+
end
|
34
|
+
#
|
35
|
+
bits7 = CPData.new("char1", 4, 0x0000, 0x007f)
|
36
|
+
bits11 = CPData.new("char2", 4, 0x0080, 0x07ff)
|
37
|
+
bits16a = CPData.new("char3a", 4, 0x0800, 0xcfff)
|
38
|
+
bits16b = CPData.new("char3b", 4, 0xe000, 0xffff)
|
39
|
+
bits21 = CPData.new("char4", 5, 0x10000, 0x1ffff)
|
40
|
+
#
|
41
|
+
bits26 = CPData.new("char5", 6, 0x10000, 0x1ffff)
|
42
|
+
bits31 = CPData.new("char6", 7, 0x10000, 0x1ffff)
|
43
|
+
bitssp = CPData.new("surrpair", 4, 0xd800, 0xdfff)
|
44
|
+
###########
|
45
|
+
|
46
|
+
def dquote(inp)
|
47
|
+
"\"#{inp}\""
|
48
|
+
end
|
49
|
+
|
50
|
+
pa = [bits7, bits11, bits16a, bits16b, bits21, bitssp]
|
51
|
+
|
52
|
+
pa.each do |na|
|
53
|
+
n = 0
|
54
|
+
na.first.upto(na.last) do |nn|
|
55
|
+
n += 1
|
56
|
+
hs = sprintf("\\u%04x", nn)
|
57
|
+
puts "#{dquote(hs)}, # CPData Name: #{na.name} n:#{n}"
|
58
|
+
end
|
59
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: utf8_validator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Guy Allard
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-07-
|
11
|
+
date: 2016-07-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: jeweler
|
@@ -41,9 +41,12 @@ files:
|
|
41
41
|
- lib/validation/errors.rb
|
42
42
|
- lib/validation/validator.rb
|
43
43
|
- test/helper.rb
|
44
|
+
- test/test_code_points.rb
|
44
45
|
- test/test_raise_request.rb
|
46
|
+
- test/test_surrogate_half_first_point.rb
|
45
47
|
- test/test_utf8_validator.rb
|
46
48
|
- utf8_validator.gemspec
|
49
|
+
- utils/gencp.rb
|
47
50
|
homepage: http://github.com/gmallard/utf8_validator
|
48
51
|
licenses:
|
49
52
|
- MIT
|