utf8_validator 1.0.13 → 1.0.14

Sign up to get free protection for your applications and to get access to all the features.
data/utils/gencp.rb ADDED
@@ -0,0 +1,59 @@
1
+ # encoding: utf-8
2
+ #
3
+ #--
4
+ # Copyright (c) 2016 Guy Allard
5
+ #--
6
+ #
7
+ # Generate UTF8 Code Points
8
+ #
9
+ =begin
10
+
11
+ From wikipediia:
12
+
13
+ Bits_of_code_point First_code_point Last_code_point Bytes_in_sequence Byte_1 Byte_2 Byte_3 Byte_4 Byte_5 Byte_6
14
+ 7 U+0000 U+007F 1 0xxxxxxx
15
+ 11 U+0080 U+07FF 2 110xxxxx 10xxxxxx
16
+ 16 U+0800 U+FFFF 3 1110xxxx 10xxxxxx 10xxxxxx
17
+ 21 U+10000 U+1FFFFF 4 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
18
+ The following sequences are not part of the UTF-8 standard, only part of the original proposal
19
+ 26 U+200000 U+3FFFFFF 5 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
20
+ 31 U+4000000 U+7FFFFFFF 6 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
21
+
22
+ Note: in the U+ notation the numbers are hexadecimal
23
+
24
+ =end
25
+
26
+ class CPData
27
+
28
+ attr_reader :name, :ubytes, :first, :last
29
+
30
+ def initialize(name, ubytes, first, last)
31
+ @name, @ubytes, @first, @last = name, ubytes, first, last
32
+ end
33
+ end
34
+ #
35
+ bits7 = CPData.new("char1", 4, 0x0000, 0x007f)
36
+ bits11 = CPData.new("char2", 4, 0x0080, 0x07ff)
37
+ bits16a = CPData.new("char3a", 4, 0x0800, 0xcfff)
38
+ bits16b = CPData.new("char3b", 4, 0xe000, 0xffff)
39
+ bits21 = CPData.new("char4", 5, 0x10000, 0x1ffff)
40
+ #
41
+ bits26 = CPData.new("char5", 6, 0x10000, 0x1ffff)
42
+ bits31 = CPData.new("char6", 7, 0x10000, 0x1ffff)
43
+ bitssp = CPData.new("surrpair", 4, 0xd800, 0xdfff)
44
+ ###########
45
+
46
+ def dquote(inp)
47
+ "\"#{inp}\""
48
+ end
49
+
50
+ pa = [bits7, bits11, bits16a, bits16b, bits21, bitssp]
51
+
52
+ pa.each do |na|
53
+ n = 0
54
+ na.first.upto(na.last) do |nn|
55
+ n += 1
56
+ hs = sprintf("\\u%04x", nn)
57
+ puts "#{dquote(hs)}, # CPData Name: #{na.name} n:#{n}"
58
+ end
59
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: utf8_validator
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.13
4
+ version: 1.0.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Guy Allard
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-08 00:00:00.000000000 Z
11
+ date: 2016-07-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: jeweler
@@ -41,9 +41,12 @@ files:
41
41
  - lib/validation/errors.rb
42
42
  - lib/validation/validator.rb
43
43
  - test/helper.rb
44
+ - test/test_code_points.rb
44
45
  - test/test_raise_request.rb
46
+ - test/test_surrogate_half_first_point.rb
45
47
  - test/test_utf8_validator.rb
46
48
  - utf8_validator.gemspec
49
+ - utils/gencp.rb
47
50
  homepage: http://github.com/gmallard/utf8_validator
48
51
  licenses:
49
52
  - MIT