utf8_validator 1.0.13 → 1.0.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/examples/fullstring.rb +4 -0
- data/lib/utf8_validator.rb +3 -1
- data/lib/validation/errors.rb +3 -1
- data/lib/validation/validator.rb +3 -0
- data/test/helper.rb +5 -0
- data/test/test_code_points.rb +127023 -0
- data/test/test_raise_request.rb +3 -0
- data/test/test_surrogate_half_first_point.rb +2094 -0
- data/test/test_utf8_validator.rb +578 -3
- data/utf8_validator.gemspec +7 -4
- data/utils/gencp.rb +59 -0
- metadata +5 -2
data/utils/gencp.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# Copyright (c) 2016 Guy Allard
|
5
|
+
#--
|
6
|
+
#
|
7
|
+
# Generate UTF8 Code Points
|
8
|
+
#
|
9
|
+
=begin
|
10
|
+
|
11
|
+
From wikipediia:
|
12
|
+
|
13
|
+
Bits_of_code_point First_code_point Last_code_point Bytes_in_sequence Byte_1 Byte_2 Byte_3 Byte_4 Byte_5 Byte_6
|
14
|
+
7 U+0000 U+007F 1 0xxxxxxx
|
15
|
+
11 U+0080 U+07FF 2 110xxxxx 10xxxxxx
|
16
|
+
16 U+0800 U+FFFF 3 1110xxxx 10xxxxxx 10xxxxxx
|
17
|
+
21 U+10000 U+1FFFFF 4 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
18
|
+
The following sequences are not part of the UTF-8 standard, only part of the original proposal
|
19
|
+
26 U+200000 U+3FFFFFF 5 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
20
|
+
31 U+4000000 U+7FFFFFFF 6 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
21
|
+
|
22
|
+
Note: in the U+ notation the numbers are hexadecimal
|
23
|
+
|
24
|
+
=end
|
25
|
+
|
26
|
+
class CPData
|
27
|
+
|
28
|
+
attr_reader :name, :ubytes, :first, :last
|
29
|
+
|
30
|
+
def initialize(name, ubytes, first, last)
|
31
|
+
@name, @ubytes, @first, @last = name, ubytes, first, last
|
32
|
+
end
|
33
|
+
end
|
34
|
+
#
|
35
|
+
bits7 = CPData.new("char1", 4, 0x0000, 0x007f)
|
36
|
+
bits11 = CPData.new("char2", 4, 0x0080, 0x07ff)
|
37
|
+
bits16a = CPData.new("char3a", 4, 0x0800, 0xcfff)
|
38
|
+
bits16b = CPData.new("char3b", 4, 0xe000, 0xffff)
|
39
|
+
bits21 = CPData.new("char4", 5, 0x10000, 0x1ffff)
|
40
|
+
#
|
41
|
+
bits26 = CPData.new("char5", 6, 0x10000, 0x1ffff)
|
42
|
+
bits31 = CPData.new("char6", 7, 0x10000, 0x1ffff)
|
43
|
+
bitssp = CPData.new("surrpair", 4, 0xd800, 0xdfff)
|
44
|
+
###########
|
45
|
+
|
46
|
+
def dquote(inp)
|
47
|
+
"\"#{inp}\""
|
48
|
+
end
|
49
|
+
|
50
|
+
pa = [bits7, bits11, bits16a, bits16b, bits21, bitssp]
|
51
|
+
|
52
|
+
pa.each do |na|
|
53
|
+
n = 0
|
54
|
+
na.first.upto(na.last) do |nn|
|
55
|
+
n += 1
|
56
|
+
hs = sprintf("\\u%04x", nn)
|
57
|
+
puts "#{dquote(hs)}, # CPData Name: #{na.name} n:#{n}"
|
58
|
+
end
|
59
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: utf8_validator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Guy Allard
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-07-
|
11
|
+
date: 2016-07-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: jeweler
|
@@ -41,9 +41,12 @@ files:
|
|
41
41
|
- lib/validation/errors.rb
|
42
42
|
- lib/validation/validator.rb
|
43
43
|
- test/helper.rb
|
44
|
+
- test/test_code_points.rb
|
44
45
|
- test/test_raise_request.rb
|
46
|
+
- test/test_surrogate_half_first_point.rb
|
45
47
|
- test/test_utf8_validator.rb
|
46
48
|
- utf8_validator.gemspec
|
49
|
+
- utils/gencp.rb
|
47
50
|
homepage: http://github.com/gmallard/utf8_validator
|
48
51
|
licenses:
|
49
52
|
- MIT
|