characteristics 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: be2da5691ab55cf6657a961f4ad7f4fc880a8b65
4
- data.tar.gz: 71d2b668a4e2383aac937505ebaf8ec5450d3969
3
+ metadata.gz: 382e8a6535fa4152dbdbf7f213a12ba6e5a82830
4
+ data.tar.gz: c92c1bdf4eb32f8ee66c0dce0d72327f95ca0260
5
5
  SHA512:
6
- metadata.gz: da6dad0bb4c897c7145064894c483625f8fb68162cf0e3e8b812a8dfc3cddb5c4aec49f04a7a8452ff7166ba5c74ebdd0600f084e6333e010b11345335bb1ac0
7
- data.tar.gz: 02b16977833dbaa784516e08b9bb65581dad9c8bf2e32207b5f95f3d6b65a83f2fd3557c5abe05e0a878730906a5a193342eba248706a28c00cf37d7741e0901
6
+ metadata.gz: e6e886ba4aa8b3605c5a8918e3e97c2982a1843779e780562757373fe30641e7e56e6e8bb0ea90bcca6b20a1ce752780c587a87902f3f346fa42a56c1cca07bd
7
+ data.tar.gz: 273ee12ec3e4bb2426743ac146790b658bdfc236e396c73d7af369b4cf76d89248fcb074a1515c4280e322a05644feeaf838622782210501fab2c36e7819e384
@@ -1,5 +1,10 @@
1
1
  ## CHANGELOG
2
2
 
3
+ ### 0.2.0
4
+
5
+ * Fix detection of supported Windows encodings and some unassigned codepoints
6
+ * Include unassigned codepoints of ISO-8859-X
7
+
3
8
  ### 0.1.0
4
9
 
5
10
  * Initial release
data/README.md CHANGED
@@ -32,10 +32,10 @@ char_info.blank? # => true / false
32
32
 
33
33
  This library knows of four different kinds of encodings:
34
34
 
35
- - **:unicode** Unicode familiy of multibyte encodings (*UTF-\**)
35
+ - **:unicode** Unicode familiy of multibyte encodings (*UTF-X*)
36
36
  - **:ascii** 7-Bit ASCII (*US-ASCII*)
37
37
  - **:binary** Arbitrary string (*ASCII-8BIT*)
38
- - **:byte ** Known byte encoding (*ISO-8859-\**, *Windows-\**)
38
+ - **:byte** Known byte encoding (*ISO-8859-X*, *Windows-125X*)
39
39
 
40
40
  Other encodings are not supported, yet.
41
41
 
@@ -47,11 +47,11 @@ Validness is determined by Ruby's String#valid_encoding?
47
47
 
48
48
  ### `unicode?`
49
49
 
50
- `true` for Unicode encodings (`UTF-*`)
50
+ `true` for Unicode encodings (`UTF-X`)
51
51
 
52
52
  ### `control?`
53
53
 
54
- Control characters are codepoints in the is C0, delete or C1 control character range.
54
+ Control characters are codepoints in the is [C0, delete or C1 control character range](https://en.wikipedia.org/wiki/C0_and_C1_control_codes).
55
55
 
56
56
  ### `assigned?`
57
57
 
@@ -14,7 +14,7 @@ class Characteristics
14
14
  :binary
15
15
  when /^UTF-/
16
16
  :unicode
17
- when /^ISO-8859-/, /^Windows/
17
+ when /^ISO-8859-/, /^Windows-125/
18
18
  :byte
19
19
  else
20
20
  raise ArgumentError, "encoding <#{encoding_name}> not supported"
@@ -2,40 +2,102 @@ class ByteCharacteristics < Characteristics
2
2
  HAS_C1 = /^(ISO-8859-)/
3
3
 
4
4
  UNASSIGNED = {
5
- 0x81 => /^Windows-(1250|1252|1253|1254|1255|1257|1258)/,
6
- 0x83 => /^Windows-(1250|1257)/,
7
- 0x88 => /^Windows-(1250|1253|1257)/,
8
- 0x8A => /^Windows-(1253|1255|1257|1258)/,
9
- 0x8C => /^Windows-(1253|1255|1257)/,
10
- 0x8D => /^Windows-(1252|1253|1254|1255|1258)/,
11
- 0x8E => /^Windows-(1253|1254|1255|1258)/,
12
- 0x8F => /^Windows-(1252|1253|1254|1255|1258)/,
13
-
14
- 0x90 => /^Windows-(1250|1252|1253|1254|1255|1257|1258)/,
15
- 0x98 => /^Windows-(1250|1251|1253|1257)/,
16
- 0x9A => /^Windows-(1253|1255|1257|1258)/,
17
- 0x9B => /^Windows-(1252)/,
18
- 0x9C => /^Windows-(1253|1255|1257)/,
19
- 0x9D => /^Windows-(1253|1254|1255|1258)/,
20
- 0x9E => /^Windows-(1253|1254|1255|1258)/,
21
- 0x9F => /^Windows-(1253|1255|1257)/,
22
-
23
- 0xA1 => /^Windows-(1257)/,
24
- 0xA5 => /^Windows-(1257)/,
25
- 0xAA => /^Windows-(1253)/,
26
-
27
- 0xD2 => /^Windows-(1253)/,
28
- 0xD9 => /^Windows-(1255)/,
29
- 0xDA => /^Windows-(1255)/,
30
- 0xDB => /^Windows-(1255)/,
31
- 0xDC => /^Windows-(1255)/,
32
- 0xDD => /^Windows-(1255)/,
33
- 0xDE => /^Windows-(1255)/,
34
- 0xDF => /^Windows-(1255)/,
35
-
36
- 0xFB => /^Windows-(1255)/,
37
- 0xFC => /^Windows-(1255)/,
38
- 0xFF => /^Windows-(1253|1255)/,
5
+ 0x81 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258))/,
6
+ 0x83 => /^(Windows-(1250|1257))/,
7
+ 0x88 => /^(Windows-(1250|1253|1257))/,
8
+ 0x8A => /^(Windows-(1253|1255|1257|1258))/,
9
+ 0x8C => /^(Windows-(1253|1255|1257))/,
10
+ 0x8D => /^(Windows-(1252|1253|1254|1255|1258))/,
11
+ 0x8E => /^(Windows-(1253|1254|1255|1258))/,
12
+ 0x8F => /^(Windows-(1252|1253|1254|1255|1258))/,
13
+
14
+ 0x90 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258))/,
15
+ 0x98 => /^(Windows-(1250|1251|1253|1257))/,
16
+ 0x9A => /^(Windows-(1253|1255|1257|1258))/,
17
+ 0x9C => /^(Windows-(1253|1255|1257))/,
18
+ 0x9D => /^(Windows-(1252|1253|1254|1255|1258))/,
19
+ 0x9E => /^(Windows-(1253|1254|1255|1258))/,
20
+ 0x9F => /^(Windows-(1253|1255|1257))/,
21
+
22
+ 0xA1 => /^(ISO-8859-(6|8)|Windows-(1257))/,
23
+ 0xA2 => /^(ISO-8859-(6))/,
24
+ 0xA3 => /^(ISO-8859-(6))/,
25
+ 0xA5 => /^(ISO-8859-(3|6)|Windows-(1257))/,
26
+ 0xA6 => /^(ISO-8859-(6))/,
27
+ 0xA7 => /^(ISO-8859-(6))/,
28
+ 0xA8 => /^(ISO-8859-(6))/,
29
+ 0xA9 => /^(ISO-8859-(6))/,
30
+ 0xAA => /^(ISO-8859-(6)|Windows-(1253))/,
31
+ 0xAB => /^(ISO-8859-(6))/,
32
+ 0xAE => /^(ISO-8859-(3|6|7))/,
33
+ 0xAF => /^(ISO-8859-(6))/,
34
+
35
+ 0xB0 => /^(ISO-8859-(6))/,
36
+ 0xB1 => /^(ISO-8859-(6))/,
37
+ 0xB2 => /^(ISO-8859-(6))/,
38
+ 0xB3 => /^(ISO-8859-(6))/,
39
+ 0xB4 => /^(ISO-8859-(6))/,
40
+ 0xB5 => /^(ISO-8859-(6))/,
41
+ 0xB6 => /^(ISO-8859-(6))/,
42
+ 0xB7 => /^(ISO-8859-(6))/,
43
+ 0xB8 => /^(ISO-8859-(6))/,
44
+ 0xB9 => /^(ISO-8859-(6))/,
45
+ 0xBA => /^(ISO-8859-(6))/,
46
+ 0xBC => /^(ISO-8859-(6))/,
47
+ 0xBD => /^(ISO-8859-(6))/,
48
+ 0xBE => /^(ISO-8859-(3|6))/,
49
+ 0xBF => /^(ISO-8859-(8))/,
50
+
51
+ 0xC0 => /^(ISO-8859-(6|8))/,
52
+ 0xC1 => /^(ISO-8859-(8))/,
53
+ 0xC2 => /^(ISO-8859-(8))/,
54
+ 0xC3 => /^(ISO-8859-(3|8))/,
55
+ 0xC4 => /^(ISO-8859-(8))/,
56
+ 0xC5 => /^(ISO-8859-(8))/,
57
+ 0xC6 => /^(ISO-8859-(8))/,
58
+ 0xC7 => /^(ISO-8859-(8))/,
59
+ 0xC8 => /^(ISO-8859-(8))/,
60
+ 0xC9 => /^(ISO-8859-(8))/,
61
+ 0xCA => /^(ISO-8859-(8))/,
62
+ 0xCB => /^(ISO-8859-(8))/,
63
+ 0xCC => /^(ISO-8859-(8))/,
64
+ 0xCD => /^(ISO-8859-(8))/,
65
+ 0xCE => /^(ISO-8859-(8))/,
66
+ 0xCF => /^(ISO-8859-(8))/,
67
+
68
+ 0xD0 => /^(ISO-8859-(3|8))/,
69
+ 0xD1 => /^(ISO-8859-(8))/,
70
+ 0xD2 => /^(ISO-8859-(7|8)|Windows-(1253))/,
71
+ 0xD3 => /^(ISO-8859-(8))/,
72
+ 0xD4 => /^(ISO-8859-(8))/,
73
+ 0xD5 => /^(ISO-8859-(8))/,
74
+ 0xD6 => /^(ISO-8859-(8))/,
75
+ 0xD7 => /^(ISO-8859-(8))/,
76
+ 0xD8 => /^(ISO-8859-(8))/,
77
+ 0xD9 => /^(ISO-8859-(8)|Windows-(1255))/,
78
+ 0xDA => /^(ISO-8859-(8)|Windows-(1255))/,
79
+ 0xDB => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
80
+ 0xDC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
81
+ 0xDD => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
82
+ 0xDE => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
83
+ 0xDF => /^(ISO-8859-(6)|Windows-(1255))/,
84
+
85
+ 0xE3 => /^(ISO-8859-(3))/,
86
+
87
+ 0xF0 => /^(ISO-8859-(3))/,
88
+ 0xF3 => /^(ISO-8859-(6))/,
89
+ 0xF4 => /^(ISO-8859-(6))/,
90
+ 0xF5 => /^(ISO-8859-(6))/,
91
+ 0xF6 => /^(ISO-8859-(6))/,
92
+ 0xF7 => /^(ISO-8859-(6))/,
93
+ 0xF8 => /^(ISO-8859-(6))/,
94
+ 0xF9 => /^(ISO-8859-(6))/,
95
+ 0xFA => /^(ISO-8859-(6))/,
96
+ 0xFB => /^(ISO-8859-(6|8)|Windows-(1255))/,
97
+ 0xFC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
98
+ 0xFD => /^(ISO-8859-(6|11))/,
99
+ 0xFE => /^(ISO-8859-(6|11))/,
100
+ 0xFF => /^(ISO-8859-(6|7|8|11)|Windows-(1253|1255))/,
39
101
  }.freeze
40
102
 
41
103
  BLANKS = [
@@ -51,7 +113,7 @@ class ByteCharacteristics < Characteristics
51
113
  ].freeze
52
114
 
53
115
  EXTRA_BLANKS = {
54
- 0xA0 => /^(ISO-8859-|Windows-)/,
116
+ 0xA0 => /^(ISO-8859-|Windows-125)/,
55
117
  0x9D => /^Windows-(1256)/,
56
118
  0x9F => /^Windows-(1256)/,
57
119
  }.freeze
@@ -1,4 +1,4 @@
1
1
  class Characteristics
2
- VERSION = "0.1.0".freeze
2
+ VERSION = "0.2.0".freeze
3
3
  end
4
4
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: characteristics
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-12 00:00:00.000000000 Z
11
+ date: 2017-03-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: unicode-categories