utf8_validator 1.0.13 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/utils/gencp.rb ADDED
@@ -0,0 +1,59 @@
1
+ # encoding: utf-8
2
+ #
3
+ #--
4
+ # Copyright (c) 2016 Guy Allard
5
+ #--
6
+ #
7
+ # Generate UTF8 Code Points
8
+ #
9
+ =begin
10
+
11
+ From wikipediia:
12
+
13
+ Bits_of_code_point First_code_point Last_code_point Bytes_in_sequence Byte_1 Byte_2 Byte_3 Byte_4 Byte_5 Byte_6
14
+ 7 U+0000 U+007F 1 0xxxxxxx
15
+ 11 U+0080 U+07FF 2 110xxxxx 10xxxxxx
16
+ 16 U+0800 U+FFFF 3 1110xxxx 10xxxxxx 10xxxxxx
17
+ 21 U+10000 U+1FFFFF 4 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
18
+ The following sequences are not part of the UTF-8 standard, only part of the original proposal
19
+ 26 U+200000 U+3FFFFFF 5 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
20
+ 31 U+4000000 U+7FFFFFFF 6 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
21
+
22
+ Note: in the U+ notation the numbers are hexadecimal
23
+
24
+ =end
25
+
26
+ class CPData
27
+
28
+ attr_reader :name, :ubytes, :first, :last
29
+
30
+ def initialize(name, ubytes, first, last)
31
+ @name, @ubytes, @first, @last = name, ubytes, first, last
32
+ end
33
+ end
34
+ #
35
+ bits7 = CPData.new("char1", 4, 0x0000, 0x007f)
36
+ bits11 = CPData.new("char2", 4, 0x0080, 0x07ff)
37
+ bits16a = CPData.new("char3a", 4, 0x0800, 0xcfff)
38
+ bits16b = CPData.new("char3b", 4, 0xe000, 0xffff)
39
+ bits21 = CPData.new("char4", 5, 0x10000, 0x1ffff)
40
+ #
41
+ bits26 = CPData.new("char5", 6, 0x10000, 0x1ffff)
42
+ bits31 = CPData.new("char6", 7, 0x10000, 0x1ffff)
43
+ bitssp = CPData.new("surrpair", 4, 0xd800, 0xdfff)
44
+ ###########
45
+
46
+ def dquote(inp)
47
+ "\"#{inp}\""
48
+ end
49
+
50
+ pa = [bits7, bits11, bits16a, bits16b, bits21, bitssp]
51
+
52
+ pa.each do |na|
53
+ n = 0
54
+ na.first.upto(na.last) do |nn|
55
+ n += 1
56
+ hs = sprintf("\\u%04x", nn)
57
+ puts "#{dquote(hs)}, # CPData Name: #{na.name} n:#{n}"
58
+ end
59
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: utf8_validator
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.13
4
+ version: 1.0.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Guy Allard
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-08 00:00:00.000000000 Z
11
+ date: 2016-07-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: jeweler
@@ -41,9 +41,12 @@ files:
41
41
  - lib/validation/errors.rb
42
42
  - lib/validation/validator.rb
43
43
  - test/helper.rb
44
+ - test/test_code_points.rb
44
45
  - test/test_raise_request.rb
46
+ - test/test_surrogate_half_first_point.rb
45
47
  - test/test_utf8_validator.rb
46
48
  - utf8_validator.gemspec
49
+ - utils/gencp.rb
47
50
  homepage: http://github.com/gmallard/utf8_validator
48
51
  licenses:
49
52
  - MIT