characteristics 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: be2da5691ab55cf6657a961f4ad7f4fc880a8b65
4
- data.tar.gz: 71d2b668a4e2383aac937505ebaf8ec5450d3969
3
+ metadata.gz: 382e8a6535fa4152dbdbf7f213a12ba6e5a82830
4
+ data.tar.gz: c92c1bdf4eb32f8ee66c0dce0d72327f95ca0260
5
5
  SHA512:
6
- metadata.gz: da6dad0bb4c897c7145064894c483625f8fb68162cf0e3e8b812a8dfc3cddb5c4aec49f04a7a8452ff7166ba5c74ebdd0600f084e6333e010b11345335bb1ac0
7
- data.tar.gz: 02b16977833dbaa784516e08b9bb65581dad9c8bf2e32207b5f95f3d6b65a83f2fd3557c5abe05e0a878730906a5a193342eba248706a28c00cf37d7741e0901
6
+ metadata.gz: e6e886ba4aa8b3605c5a8918e3e97c2982a1843779e780562757373fe30641e7e56e6e8bb0ea90bcca6b20a1ce752780c587a87902f3f346fa42a56c1cca07bd
7
+ data.tar.gz: 273ee12ec3e4bb2426743ac146790b658bdfc236e396c73d7af369b4cf76d89248fcb074a1515c4280e322a05644feeaf838622782210501fab2c36e7819e384
@@ -1,5 +1,10 @@
1
1
  ## CHANGELOG
2
2
 
3
+ ### 0.2.0
4
+
5
+ * Fix detection of supported Windows encodings and some unassigned codepoints
6
+ * Include unassigned codepoints of ISO-8859-X
7
+
3
8
  ### 0.1.0
4
9
 
5
10
  * Initial release
data/README.md CHANGED
@@ -32,10 +32,10 @@ char_info.blank? # => true / false
32
32
 
33
33
  This library knows of four different kinds of encodings:
34
34
 
35
- - **:unicode** Unicode familiy of multibyte encodings (*UTF-\**)
35
+ - **:unicode** Unicode familiy of multibyte encodings (*UTF-X*)
36
36
  - **:ascii** 7-Bit ASCII (*US-ASCII*)
37
37
  - **:binary** Arbitrary string (*ASCII-8BIT*)
38
- - **:byte ** Known byte encoding (*ISO-8859-\**, *Windows-\**)
38
+ - **:byte** Known byte encoding (*ISO-8859-X*, *Windows-125X*)
39
39
 
40
40
  Other encodings are not supported, yet.
41
41
 
@@ -47,11 +47,11 @@ Validness is determined by Ruby's String#valid_encoding?
47
47
 
48
48
  ### `unicode?`
49
49
 
50
- `true` for Unicode encodings (`UTF-*`)
50
+ `true` for Unicode encodings (`UTF-X`)
51
51
 
52
52
  ### `control?`
53
53
 
54
- Control characters are codepoints in the is C0, delete or C1 control character range.
54
+ Control characters are codepoints in the is [C0, delete or C1 control character range](https://en.wikipedia.org/wiki/C0_and_C1_control_codes).
55
55
 
56
56
  ### `assigned?`
57
57
 
@@ -14,7 +14,7 @@ class Characteristics
14
14
  :binary
15
15
  when /^UTF-/
16
16
  :unicode
17
- when /^ISO-8859-/, /^Windows/
17
+ when /^ISO-8859-/, /^Windows-125/
18
18
  :byte
19
19
  else
20
20
  raise ArgumentError, "encoding <#{encoding_name}> not supported"
@@ -2,40 +2,102 @@ class ByteCharacteristics < Characteristics
2
2
  HAS_C1 = /^(ISO-8859-)/
3
3
 
4
4
  UNASSIGNED = {
5
- 0x81 => /^Windows-(1250|1252|1253|1254|1255|1257|1258)/,
6
- 0x83 => /^Windows-(1250|1257)/,
7
- 0x88 => /^Windows-(1250|1253|1257)/,
8
- 0x8A => /^Windows-(1253|1255|1257|1258)/,
9
- 0x8C => /^Windows-(1253|1255|1257)/,
10
- 0x8D => /^Windows-(1252|1253|1254|1255|1258)/,
11
- 0x8E => /^Windows-(1253|1254|1255|1258)/,
12
- 0x8F => /^Windows-(1252|1253|1254|1255|1258)/,
13
-
14
- 0x90 => /^Windows-(1250|1252|1253|1254|1255|1257|1258)/,
15
- 0x98 => /^Windows-(1250|1251|1253|1257)/,
16
- 0x9A => /^Windows-(1253|1255|1257|1258)/,
17
- 0x9B => /^Windows-(1252)/,
18
- 0x9C => /^Windows-(1253|1255|1257)/,
19
- 0x9D => /^Windows-(1253|1254|1255|1258)/,
20
- 0x9E => /^Windows-(1253|1254|1255|1258)/,
21
- 0x9F => /^Windows-(1253|1255|1257)/,
22
-
23
- 0xA1 => /^Windows-(1257)/,
24
- 0xA5 => /^Windows-(1257)/,
25
- 0xAA => /^Windows-(1253)/,
26
-
27
- 0xD2 => /^Windows-(1253)/,
28
- 0xD9 => /^Windows-(1255)/,
29
- 0xDA => /^Windows-(1255)/,
30
- 0xDB => /^Windows-(1255)/,
31
- 0xDC => /^Windows-(1255)/,
32
- 0xDD => /^Windows-(1255)/,
33
- 0xDE => /^Windows-(1255)/,
34
- 0xDF => /^Windows-(1255)/,
35
-
36
- 0xFB => /^Windows-(1255)/,
37
- 0xFC => /^Windows-(1255)/,
38
- 0xFF => /^Windows-(1253|1255)/,
5
+ 0x81 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258))/,
6
+ 0x83 => /^(Windows-(1250|1257))/,
7
+ 0x88 => /^(Windows-(1250|1253|1257))/,
8
+ 0x8A => /^(Windows-(1253|1255|1257|1258))/,
9
+ 0x8C => /^(Windows-(1253|1255|1257))/,
10
+ 0x8D => /^(Windows-(1252|1253|1254|1255|1258))/,
11
+ 0x8E => /^(Windows-(1253|1254|1255|1258))/,
12
+ 0x8F => /^(Windows-(1252|1253|1254|1255|1258))/,
13
+
14
+ 0x90 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258))/,
15
+ 0x98 => /^(Windows-(1250|1251|1253|1257))/,
16
+ 0x9A => /^(Windows-(1253|1255|1257|1258))/,
17
+ 0x9C => /^(Windows-(1253|1255|1257))/,
18
+ 0x9D => /^(Windows-(1252|1253|1254|1255|1258))/,
19
+ 0x9E => /^(Windows-(1253|1254|1255|1258))/,
20
+ 0x9F => /^(Windows-(1253|1255|1257))/,
21
+
22
+ 0xA1 => /^(ISO-8859-(6|8)|Windows-(1257))/,
23
+ 0xA2 => /^(ISO-8859-(6))/,
24
+ 0xA3 => /^(ISO-8859-(6))/,
25
+ 0xA5 => /^(ISO-8859-(3|6)|Windows-(1257))/,
26
+ 0xA6 => /^(ISO-8859-(6))/,
27
+ 0xA7 => /^(ISO-8859-(6))/,
28
+ 0xA8 => /^(ISO-8859-(6))/,
29
+ 0xA9 => /^(ISO-8859-(6))/,
30
+ 0xAA => /^(ISO-8859-(6)|Windows-(1253))/,
31
+ 0xAB => /^(ISO-8859-(6))/,
32
+ 0xAE => /^(ISO-8859-(3|6|7))/,
33
+ 0xAF => /^(ISO-8859-(6))/,
34
+
35
+ 0xB0 => /^(ISO-8859-(6))/,
36
+ 0xB1 => /^(ISO-8859-(6))/,
37
+ 0xB2 => /^(ISO-8859-(6))/,
38
+ 0xB3 => /^(ISO-8859-(6))/,
39
+ 0xB4 => /^(ISO-8859-(6))/,
40
+ 0xB5 => /^(ISO-8859-(6))/,
41
+ 0xB6 => /^(ISO-8859-(6))/,
42
+ 0xB7 => /^(ISO-8859-(6))/,
43
+ 0xB8 => /^(ISO-8859-(6))/,
44
+ 0xB9 => /^(ISO-8859-(6))/,
45
+ 0xBA => /^(ISO-8859-(6))/,
46
+ 0xBC => /^(ISO-8859-(6))/,
47
+ 0xBD => /^(ISO-8859-(6))/,
48
+ 0xBE => /^(ISO-8859-(3|6))/,
49
+ 0xBF => /^(ISO-8859-(8))/,
50
+
51
+ 0xC0 => /^(ISO-8859-(6|8))/,
52
+ 0xC1 => /^(ISO-8859-(8))/,
53
+ 0xC2 => /^(ISO-8859-(8))/,
54
+ 0xC3 => /^(ISO-8859-(3|8))/,
55
+ 0xC4 => /^(ISO-8859-(8))/,
56
+ 0xC5 => /^(ISO-8859-(8))/,
57
+ 0xC6 => /^(ISO-8859-(8))/,
58
+ 0xC7 => /^(ISO-8859-(8))/,
59
+ 0xC8 => /^(ISO-8859-(8))/,
60
+ 0xC9 => /^(ISO-8859-(8))/,
61
+ 0xCA => /^(ISO-8859-(8))/,
62
+ 0xCB => /^(ISO-8859-(8))/,
63
+ 0xCC => /^(ISO-8859-(8))/,
64
+ 0xCD => /^(ISO-8859-(8))/,
65
+ 0xCE => /^(ISO-8859-(8))/,
66
+ 0xCF => /^(ISO-8859-(8))/,
67
+
68
+ 0xD0 => /^(ISO-8859-(3|8))/,
69
+ 0xD1 => /^(ISO-8859-(8))/,
70
+ 0xD2 => /^(ISO-8859-(7|8)|Windows-(1253))/,
71
+ 0xD3 => /^(ISO-8859-(8))/,
72
+ 0xD4 => /^(ISO-8859-(8))/,
73
+ 0xD5 => /^(ISO-8859-(8))/,
74
+ 0xD6 => /^(ISO-8859-(8))/,
75
+ 0xD7 => /^(ISO-8859-(8))/,
76
+ 0xD8 => /^(ISO-8859-(8))/,
77
+ 0xD9 => /^(ISO-8859-(8)|Windows-(1255))/,
78
+ 0xDA => /^(ISO-8859-(8)|Windows-(1255))/,
79
+ 0xDB => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
80
+ 0xDC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
81
+ 0xDD => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
82
+ 0xDE => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
83
+ 0xDF => /^(ISO-8859-(6)|Windows-(1255))/,
84
+
85
+ 0xE3 => /^(ISO-8859-(3))/,
86
+
87
+ 0xF0 => /^(ISO-8859-(3))/,
88
+ 0xF3 => /^(ISO-8859-(6))/,
89
+ 0xF4 => /^(ISO-8859-(6))/,
90
+ 0xF5 => /^(ISO-8859-(6))/,
91
+ 0xF6 => /^(ISO-8859-(6))/,
92
+ 0xF7 => /^(ISO-8859-(6))/,
93
+ 0xF8 => /^(ISO-8859-(6))/,
94
+ 0xF9 => /^(ISO-8859-(6))/,
95
+ 0xFA => /^(ISO-8859-(6))/,
96
+ 0xFB => /^(ISO-8859-(6|8)|Windows-(1255))/,
97
+ 0xFC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
98
+ 0xFD => /^(ISO-8859-(6|11))/,
99
+ 0xFE => /^(ISO-8859-(6|11))/,
100
+ 0xFF => /^(ISO-8859-(6|7|8|11)|Windows-(1253|1255))/,
39
101
  }.freeze
40
102
 
41
103
  BLANKS = [
@@ -51,7 +113,7 @@ class ByteCharacteristics < Characteristics
51
113
  ].freeze
52
114
 
53
115
  EXTRA_BLANKS = {
54
- 0xA0 => /^(ISO-8859-|Windows-)/,
116
+ 0xA0 => /^(ISO-8859-|Windows-125)/,
55
117
  0x9D => /^Windows-(1256)/,
56
118
  0x9F => /^Windows-(1256)/,
57
119
  }.freeze
@@ -1,4 +1,4 @@
1
1
  class Characteristics
2
- VERSION = "0.1.0".freeze
2
+ VERSION = "0.2.0".freeze
3
3
  end
4
4
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: characteristics
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-12 00:00:00.000000000 Z
11
+ date: 2017-03-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: unicode-categories