characteristics 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +7 -2
- data/lib/characteristics.rb +6 -2
- data/lib/characteristics/ascii.rb +8 -0
- data/lib/characteristics/binary.rb +8 -0
- data/lib/characteristics/byte.rb +68 -34
- data/lib/characteristics/unicode.rb +4 -0
- data/lib/characteristics/version.rb +1 -1
- data/spec/characteristics_spec.rb +220 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fde954a2c2f2028fb7656a57c18a6556416a2f2f
|
4
|
+
data.tar.gz: 1856bc93124425900c7785c2107965d6620ef194
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7d2e5f27972d6db2c3164ca9a915fd59ed69e2b99f4ec4fb4084a73be0fe8283b5c619c8b3bc6f06547a46592c3ac74bbaf07c2dd1c1afa1bfb5eb98aa6f0719
|
7
|
+
data.tar.gz: 52c27cda5f0e20b7d7b1a7f99c0fc505e2250d6a8263850bb889fb1841ad95984f0f67c3e1d7203c3efac00284fd3d8f92d7fbb36df8881f16586bf4610f9333
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
## CHANGELOG
|
2
2
|
|
3
|
+
### 0.3.0
|
4
|
+
|
5
|
+
* Add soft-hyphen to single byte encodings
|
6
|
+
* Add format? property (e.g. RLM)
|
7
|
+
* Support more encodings: IBMX, CP85X, macX, TIS-620, Windows-874, KOI8-X
|
8
|
+
|
3
9
|
### 0.2.0
|
4
10
|
|
5
11
|
* Fix detection of supported Windows encodings and some unassigned codepoints
|
data/README.md
CHANGED
@@ -26,6 +26,7 @@ char_info.unicode? # => true / false
|
|
26
26
|
char_info.assigned? # => true / false
|
27
27
|
char_info.control? # => true / false
|
28
28
|
char_info.blank? # => true / false
|
29
|
+
char_info.format? # => true / false
|
29
30
|
```
|
30
31
|
|
31
32
|
## Types of Encodings
|
@@ -35,7 +36,7 @@ This library knows of four different kinds of encodings:
|
|
35
36
|
- **:unicode** Unicode familiy of multibyte encodings (*UTF-X*)
|
36
37
|
- **:ascii** 7-Bit ASCII (*US-ASCII*)
|
37
38
|
- **:binary** Arbitrary string (*ASCII-8BIT*)
|
38
|
-
- **:byte** Known byte encoding (*ISO-8859-X*, *Windows-125X*)
|
39
|
+
- **:byte** Known single byte encoding (*ISO-8859-X*, *Windows-125X*, *IBMX*, *CP85X*, *macX*, *TIS-620*, *Windows-874*, **KOI-X**)
|
39
40
|
|
40
41
|
Other encodings are not supported, yet.
|
41
42
|
|
@@ -51,7 +52,7 @@ Validness is determined by Ruby's String#valid_encoding?
|
|
51
52
|
|
52
53
|
### `control?`
|
53
54
|
|
54
|
-
Control characters are codepoints in the is [C0, delete or C1 control character range](https://en.wikipedia.org/wiki/C0_and_C1_control_codes).
|
55
|
+
Control characters are codepoints in the is [C0, delete or C1 control character range](https://en.wikipedia.org/wiki/C0_and_C1_control_codes). Characters in this range of [IBM codepage 437](https://en.wikipedia.org/wiki/Code_page_437) based encodings are always treated as control characters.
|
55
56
|
|
56
57
|
### `assigned?`
|
57
58
|
|
@@ -63,6 +64,10 @@ Control characters are codepoints in the is [C0, delete or C1 control character
|
|
63
64
|
|
64
65
|
The library includes a list of characters that might not be rendered visually. This list does not include unassigned codepoints, control characters (except for `\t`, `\n`, `\v`, `\f`, `\r`), or special formatting characters (right-to-left marker, variation selectors, etc).
|
65
66
|
|
67
|
+
### `format?`
|
68
|
+
|
69
|
+
This flag is `true` only for special formatting characters, which are not control characters, like Right-to-left marks. In Unicode, this means codepoints with the General Category of **Cf**.
|
70
|
+
|
66
71
|
## Todo
|
67
72
|
|
68
73
|
- Support all non-dummy encodings that Ruby supports
|
data/lib/characteristics.rb
CHANGED
@@ -14,7 +14,7 @@ class Characteristics
|
|
14
14
|
:binary
|
15
15
|
when /^UTF-/
|
16
16
|
:unicode
|
17
|
-
when /^ISO-8859-/, /^Windows-125
|
17
|
+
when /^ISO-8859-/, /^Windows-125/, /^(IBM|CP85)/, /^mac/, 'TIS-620', 'Windows-874', /^KOI8-/
|
18
18
|
:byte
|
19
19
|
else
|
20
20
|
raise ArgumentError, "encoding <#{encoding_name}> not supported"
|
@@ -41,6 +41,8 @@ class Characteristics
|
|
41
41
|
attr_reader :encoding
|
42
42
|
|
43
43
|
def initialize(char)
|
44
|
+
raise ArgumentError, "Do not use abstract Characteristics.new(char) directly, please use Characteristics.create(char)" if self.class == Characteristics
|
45
|
+
|
44
46
|
@is_valid = char.valid_encoding?
|
45
47
|
@encoding = char.encoding
|
46
48
|
@encoding_name = @encoding.name
|
@@ -51,7 +53,6 @@ class Characteristics
|
|
51
53
|
end
|
52
54
|
|
53
55
|
def unicode?
|
54
|
-
false
|
55
56
|
end
|
56
57
|
|
57
58
|
def assigned?
|
@@ -62,4 +63,7 @@ class Characteristics
|
|
62
63
|
|
63
64
|
def blank?
|
64
65
|
end
|
66
|
+
|
67
|
+
def format?
|
68
|
+
end
|
65
69
|
end
|
@@ -16,6 +16,10 @@ class AsciiCharacteristics < Characteristics
|
|
16
16
|
@ord = char.ord if @is_valid
|
17
17
|
end
|
18
18
|
|
19
|
+
def unicode?
|
20
|
+
false
|
21
|
+
end
|
22
|
+
|
19
23
|
def assigned?
|
20
24
|
true
|
21
25
|
end
|
@@ -39,4 +43,8 @@ class AsciiCharacteristics < Characteristics
|
|
39
43
|
def blank?
|
40
44
|
@is_valid && ( BLANKS.include?(@ord) || SEPARATORS.include?(@ord) )
|
41
45
|
end
|
46
|
+
|
47
|
+
def format?
|
48
|
+
false
|
49
|
+
end
|
42
50
|
end
|
@@ -21,6 +21,10 @@ class BinaryCharacteristics < Characteristics
|
|
21
21
|
true
|
22
22
|
end
|
23
23
|
|
24
|
+
def unicode?
|
25
|
+
false
|
26
|
+
end
|
27
|
+
|
24
28
|
def assigned?
|
25
29
|
true
|
26
30
|
end
|
@@ -40,4 +44,8 @@ class BinaryCharacteristics < Characteristics
|
|
40
44
|
def blank?
|
41
45
|
BLANKS.include?(@ord) || SEPARATORS.include?(@ord)
|
42
46
|
end
|
47
|
+
|
48
|
+
def format?
|
49
|
+
false
|
50
|
+
end
|
43
51
|
end
|
data/lib/characteristics/byte.rb
CHANGED
@@ -1,29 +1,42 @@
|
|
1
1
|
class ByteCharacteristics < Characteristics
|
2
|
-
HAS_C1 = /^(ISO-8859-)/
|
2
|
+
HAS_C1 = /^(ISO-8859-|TIS-620)/
|
3
3
|
|
4
4
|
UNASSIGNED = {
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
5
|
+
0x80 => /^(IBM869)/,
|
6
|
+
0x81 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258)|IBM869|Windows-874)/,
|
7
|
+
0x82 => /^(IBM869|Windows-874)/,
|
8
|
+
0x83 => /^(Windows-(1250|1257)|IBM869|Windows-874)/,
|
9
|
+
0x84 => /^(IBM869|Windows-874)/,
|
10
|
+
0x85 => /^(IBM869)/,
|
11
|
+
0x86 => /^(Windows-874)/,
|
12
|
+
0x87 => /^(IBM869|Windows-874)/,
|
13
|
+
0x88 => /^(Windows-(1250|1253|1257)|Windows-874)/,
|
14
|
+
0x89 => /^(Windows-874)/,
|
15
|
+
0x8A => /^(Windows-(1253|1255|1257|1258)|Windows-874)/,
|
16
|
+
0x8B => /^(Windows-874)/,
|
17
|
+
0x8C => /^(Windows-(1253|1255|1257)|Windows-874)/,
|
18
|
+
0x8D => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/,
|
19
|
+
0x8E => /^(Windows-(1253|1254|1255|1258)|Windows-874)/,
|
20
|
+
0x8F => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/,
|
21
|
+
|
22
|
+
0x90 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258)|macThai|Windows-874)/,
|
23
|
+
0x93 => /^(IBM869)/,
|
24
|
+
0x94 => /^(IBM869)/,
|
25
|
+
0x98 => /^(Windows-(1250|1251|1253|1257)|Windows-874)/,
|
26
|
+
0x99 => /^(Windows-874)/,
|
27
|
+
0x9A => /^(Windows-(1253|1255|1257|1258)|Windows-874)/,
|
28
|
+
0x9B => /^(IBM864|Windows-874)/,
|
29
|
+
0x9C => /^(Windows-(1253|1255|1257)|IBM864|Windows-874)/,
|
30
|
+
0x9D => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/,
|
31
|
+
0x9E => /^(Windows-(1253|1254|1255|1258)|Windows-874)/,
|
32
|
+
0x9F => /^(Windows-(1253|1255|1257)|IBM864|macThai|Windows-874)/,
|
33
|
+
|
34
|
+
0xA0 => /^(TIS-620)/,
|
22
35
|
0xA1 => /^(ISO-8859-(6|8)|Windows-(1257))/,
|
23
36
|
0xA2 => /^(ISO-8859-(6))/,
|
24
37
|
0xA3 => /^(ISO-8859-(6))/,
|
25
38
|
0xA5 => /^(ISO-8859-(3|6)|Windows-(1257))/,
|
26
|
-
0xA6 => /^(ISO-8859-(6))/,
|
39
|
+
0xA6 => /^(ISO-8859-(6)|IBM864)/,
|
27
40
|
0xA7 => /^(ISO-8859-(6))/,
|
28
41
|
0xA8 => /^(ISO-8859-(6))/,
|
29
42
|
0xA9 => /^(ISO-8859-(6))/,
|
@@ -70,34 +83,36 @@ class ByteCharacteristics < Characteristics
|
|
70
83
|
0xD2 => /^(ISO-8859-(7|8)|Windows-(1253))/,
|
71
84
|
0xD3 => /^(ISO-8859-(8))/,
|
72
85
|
0xD4 => /^(ISO-8859-(8))/,
|
73
|
-
0xD5 => /^(ISO-8859-(8))/,
|
86
|
+
0xD5 => /^(ISO-8859-(8)|IBM857)/, # IBM857: Ruby does not support euro sign
|
74
87
|
0xD6 => /^(ISO-8859-(8))/,
|
75
88
|
0xD7 => /^(ISO-8859-(8))/,
|
76
89
|
0xD8 => /^(ISO-8859-(8))/,
|
77
90
|
0xD9 => /^(ISO-8859-(8)|Windows-(1255))/,
|
78
91
|
0xDA => /^(ISO-8859-(8)|Windows-(1255))/,
|
79
|
-
0xDB => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
80
|
-
0xDC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
81
|
-
0xDD => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
82
|
-
0xDE => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
92
|
+
0xDB => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
|
93
|
+
0xDC => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
|
94
|
+
0xDD => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
|
95
|
+
0xDE => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
|
83
96
|
0xDF => /^(ISO-8859-(6)|Windows-(1255))/,
|
84
97
|
|
85
98
|
0xE3 => /^(ISO-8859-(3))/,
|
99
|
+
0xE7 => /^(IBM857)/,
|
86
100
|
|
87
|
-
0xF0 => /^(ISO-8859-(3))/,
|
101
|
+
0xF0 => /^(ISO-8859-(3))/, # mac: Treating F0 as always assigned
|
102
|
+
0xF2 => /^(IBM857)/,
|
88
103
|
0xF3 => /^(ISO-8859-(6))/,
|
89
104
|
0xF4 => /^(ISO-8859-(6))/,
|
90
|
-
0xF5 => /^(ISO-8859-(6))/,
|
105
|
+
0xF5 => /^(ISO-8859-(6)|macTurkish)/,
|
91
106
|
0xF6 => /^(ISO-8859-(6))/,
|
92
107
|
0xF7 => /^(ISO-8859-(6))/,
|
93
108
|
0xF8 => /^(ISO-8859-(6))/,
|
94
109
|
0xF9 => /^(ISO-8859-(6))/,
|
95
110
|
0xFA => /^(ISO-8859-(6))/,
|
96
111
|
0xFB => /^(ISO-8859-(6|8)|Windows-(1255))/,
|
97
|
-
0xFC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
98
|
-
0xFD => /^(ISO-8859-(6|11))/,
|
99
|
-
0xFE => /^(ISO-8859-(6|11))/,
|
100
|
-
0xFF => /^(ISO-8859-(6|7|8|11)|Windows-(1253|1255))/,
|
112
|
+
0xFC => /^(ISO-8859-(6|8|11)|Windows-(1255)|macThai|TIS-620|Windows-874)/,
|
113
|
+
0xFD => /^(ISO-8859-(6|11)|macThai|TIS-620|Windows-874)/,
|
114
|
+
0xFE => /^(ISO-8859-(6|11)|macThai|TIS-620|Windows-874)/,
|
115
|
+
0xFF => /^(ISO-8859-(6|7|8|11)|Windows-(1253|1255)|IBM864|macGreek|macThai|TIS-620|Windows-874)/, # macGreek: Ruby does not know of soft hyphen at FF
|
101
116
|
}.freeze
|
102
117
|
|
103
118
|
BLANKS = [
|
@@ -113,9 +128,22 @@ class ByteCharacteristics < Characteristics
|
|
113
128
|
].freeze
|
114
129
|
|
115
130
|
EXTRA_BLANKS = {
|
116
|
-
0xA0 => /^(ISO-8859-|Windows-125)/,
|
131
|
+
0xA0 => /^(ISO-8859-|Windows-125|macThai|Windows-874)/,
|
132
|
+
0xA1 => /^IBM864/,
|
133
|
+
0xAD => /^(ISO-8859-(?!11)|Windows-125)/,
|
134
|
+
0x9A => /^KOI8-/,
|
117
135
|
0x9D => /^Windows-(1256)/,
|
118
|
-
|
136
|
+
0x9E => /^Windows-(1256)/,
|
137
|
+
0xCA => /^mac(?!Thai)/,
|
138
|
+
0xDB => /^macThai/,
|
139
|
+
0xDC => /^macThai/,
|
140
|
+
0xF0 => /^(IBM(?!437|737|86)|IBM869|CP)/,
|
141
|
+
0xFF => /^(IBM(?!864)|CP)/, # |macGreek, but is unnasigned in Ruby
|
142
|
+
}.freeze
|
143
|
+
|
144
|
+
FORMATS = {
|
145
|
+
0xFD => /^(ISO-8859-8|Windows-(1255|1256))/,
|
146
|
+
0xFE => /^(ISO-8859-8|Windows-(1255|1256))/,
|
119
147
|
}.freeze
|
120
148
|
|
121
149
|
def initialize(char)
|
@@ -124,12 +152,10 @@ class ByteCharacteristics < Characteristics
|
|
124
152
|
end
|
125
153
|
|
126
154
|
def encoding_has_c0?
|
127
|
-
# !!(HAS_C0 =~ @encoding_name)
|
128
155
|
true
|
129
156
|
end
|
130
157
|
|
131
158
|
def encoding_has_delete?
|
132
|
-
# !!(HAS_C0 =~ @encoding_name)
|
133
159
|
true
|
134
160
|
end
|
135
161
|
|
@@ -137,6 +163,10 @@ class ByteCharacteristics < Characteristics
|
|
137
163
|
!!(HAS_C1 =~ @encoding_name)
|
138
164
|
end
|
139
165
|
|
166
|
+
def unicode?
|
167
|
+
false
|
168
|
+
end
|
169
|
+
|
140
170
|
def assigned?
|
141
171
|
control? || UNASSIGNED[@ord] !~ @encoding_name
|
142
172
|
end
|
@@ -162,4 +192,8 @@ class ByteCharacteristics < Characteristics
|
|
162
192
|
SEPARATORS.include?(@ord) ||
|
163
193
|
EXTRA_BLANKS[@ord] =~ @encoding_name
|
164
194
|
end
|
195
|
+
|
196
|
+
def format?
|
197
|
+
FORMATS[@ord] =~ @encoding_name
|
198
|
+
end
|
165
199
|
end
|
@@ -18,6 +18,10 @@ describe Characteristics do
|
|
18
18
|
Characteristics.create(char.force_encoding(encoding)).blank?
|
19
19
|
end
|
20
20
|
|
21
|
+
def format?(char)
|
22
|
+
Characteristics.create(char.force_encoding(encoding)).format?
|
23
|
+
end
|
24
|
+
|
21
25
|
describe UnicodeCharacteristics do
|
22
26
|
describe "UTF-*" do
|
23
27
|
let(:encoding) { "UTF-8" }
|
@@ -43,6 +47,11 @@ describe Characteristics do
|
|
43
47
|
assert blank? "\x20"
|
44
48
|
refute blank? "\x21"
|
45
49
|
end
|
50
|
+
|
51
|
+
it "is format or not" do
|
52
|
+
assert format? "\uFFF9"
|
53
|
+
refute format? "\x21"
|
54
|
+
end
|
46
55
|
end
|
47
56
|
end
|
48
57
|
|
@@ -68,6 +77,10 @@ describe Characteristics do
|
|
68
77
|
assert blank? "\x20"
|
69
78
|
refute blank? "\x21"
|
70
79
|
end
|
80
|
+
|
81
|
+
it "is never format" do
|
82
|
+
refute format? "\x21"
|
83
|
+
end
|
71
84
|
end
|
72
85
|
end
|
73
86
|
|
@@ -94,6 +107,10 @@ describe Characteristics do
|
|
94
107
|
assert blank? "\x20"
|
95
108
|
refute blank? "\x21"
|
96
109
|
end
|
110
|
+
|
111
|
+
it "is never format" do
|
112
|
+
refute format? "\x21"
|
113
|
+
end
|
97
114
|
end
|
98
115
|
end
|
99
116
|
|
@@ -122,6 +139,10 @@ describe Characteristics do
|
|
122
139
|
assert blank? "\x20"
|
123
140
|
refute blank? "\x21"
|
124
141
|
end
|
142
|
+
|
143
|
+
it "is never format" do
|
144
|
+
refute format? "\x21"
|
145
|
+
end
|
125
146
|
end
|
126
147
|
|
127
148
|
# TODO
|
@@ -142,7 +163,7 @@ describe Characteristics do
|
|
142
163
|
# describe "ISO-8859-16" do
|
143
164
|
end
|
144
165
|
|
145
|
-
describe "Windows
|
166
|
+
describe "Windows-125*" do
|
146
167
|
describe "Windows-1252" do
|
147
168
|
let(:encoding) { "Windows-1252" }
|
148
169
|
|
@@ -164,6 +185,10 @@ describe Characteristics do
|
|
164
185
|
assert blank? "\x20"
|
165
186
|
refute blank? "\x21"
|
166
187
|
end
|
188
|
+
|
189
|
+
it "is never format" do
|
190
|
+
refute format? "\x21"
|
191
|
+
end
|
167
192
|
end
|
168
193
|
|
169
194
|
# TODO
|
@@ -177,5 +202,199 @@ describe Characteristics do
|
|
177
202
|
# describe "Windows-1257" do
|
178
203
|
# describe "Windows-1258" do
|
179
204
|
end
|
205
|
+
|
206
|
+
describe "IBM*, CP85*" do
|
207
|
+
describe "IBM869" do
|
208
|
+
let(:encoding) { "IBM869" }
|
209
|
+
|
210
|
+
it "is always valid" do
|
211
|
+
assert valid? "\x80"
|
212
|
+
end
|
213
|
+
|
214
|
+
it "is assigned or not" do
|
215
|
+
assert assigned? "\x21"
|
216
|
+
refute assigned? "\x80"
|
217
|
+
end
|
218
|
+
|
219
|
+
it "is control or not" do
|
220
|
+
assert control? "\x1E"
|
221
|
+
refute control? "\x67"
|
222
|
+
end
|
223
|
+
|
224
|
+
it "is blank or not" do
|
225
|
+
assert blank? "\x20"
|
226
|
+
refute blank? "\x21"
|
227
|
+
end
|
228
|
+
|
229
|
+
it "is never format" do
|
230
|
+
refute format? "\x21"
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
# describe "IBM437" do
|
235
|
+
# describe "IBM737" do
|
236
|
+
# describe "IBM775" do
|
237
|
+
# describe "CP850" do
|
238
|
+
# describe "IBM852" do
|
239
|
+
# describe "CP852" do
|
240
|
+
# describe "IBM855" do
|
241
|
+
# describe "CP855" do
|
242
|
+
# describe "IBM857" do
|
243
|
+
# describe "IBM860" do
|
244
|
+
# describe "IBM861" do
|
245
|
+
# describe "IBM862" do
|
246
|
+
# describe "IBM863" do
|
247
|
+
# describe "IBM864" do
|
248
|
+
# describe "IBM865" do
|
249
|
+
# describe "IBM866" do
|
250
|
+
end
|
251
|
+
|
252
|
+
describe "mac*" do
|
253
|
+
describe "macRoman" do
|
254
|
+
let(:encoding) { "macRoman" }
|
255
|
+
|
256
|
+
it "is always valid" do
|
257
|
+
assert valid? "\x80"
|
258
|
+
end
|
259
|
+
|
260
|
+
it "is always assigned" do
|
261
|
+
assert assigned? "\x21"
|
262
|
+
end
|
263
|
+
|
264
|
+
it "is control or not" do
|
265
|
+
assert control? "\x1E"
|
266
|
+
refute control? "\x67"
|
267
|
+
end
|
268
|
+
|
269
|
+
it "is blank or not" do
|
270
|
+
assert blank? "\x20"
|
271
|
+
refute blank? "\x21"
|
272
|
+
end
|
273
|
+
|
274
|
+
it "is never format" do
|
275
|
+
refute format? "\x21"
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
# describe "macCentEuro" do
|
280
|
+
# describe "macCroatian" do
|
281
|
+
# describe "macCyrillic" do
|
282
|
+
# describe "macGreek" do
|
283
|
+
# describe "macIceland" do
|
284
|
+
# describe "macRomania" do
|
285
|
+
# describe "macThai" do
|
286
|
+
# describe "macTurkish" do
|
287
|
+
# describe "macUkraine" do
|
288
|
+
end
|
289
|
+
|
290
|
+
describe "TIS-620/Windows-874" do
|
291
|
+
describe "TIS-620" do
|
292
|
+
let(:encoding) { "TIS-620" }
|
293
|
+
|
294
|
+
it "is always valid" do
|
295
|
+
assert valid? "\x80"
|
296
|
+
end
|
297
|
+
|
298
|
+
it "is assigned or not" do
|
299
|
+
assert assigned? "\x21"
|
300
|
+
refute assigned? "\xA0"
|
301
|
+
end
|
302
|
+
|
303
|
+
it "is control or not" do
|
304
|
+
assert control? "\x1E"
|
305
|
+
refute control? "\x67"
|
306
|
+
end
|
307
|
+
|
308
|
+
it "is blank or not" do
|
309
|
+
assert blank? "\x20"
|
310
|
+
refute blank? "\x21"
|
311
|
+
end
|
312
|
+
|
313
|
+
it "is never format" do
|
314
|
+
refute format? "\x21"
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
describe "Windows-874" do
|
319
|
+
let(:encoding) { "Windows-874" }
|
320
|
+
|
321
|
+
it "is always valid" do
|
322
|
+
assert valid? "\x80"
|
323
|
+
end
|
324
|
+
|
325
|
+
it "is assigned or not" do
|
326
|
+
assert assigned? "\xA0"
|
327
|
+
refute assigned? "\x99"
|
328
|
+
end
|
329
|
+
|
330
|
+
it "is control or not" do
|
331
|
+
assert control? "\x1E"
|
332
|
+
refute control? "\x67"
|
333
|
+
end
|
334
|
+
|
335
|
+
it "is blank or not" do
|
336
|
+
assert blank? "\x20"
|
337
|
+
refute blank? "\x21"
|
338
|
+
end
|
339
|
+
|
340
|
+
it "is never format" do
|
341
|
+
refute format? "\x21"
|
342
|
+
end
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
describe "KOI8-*" do
|
347
|
+
describe "KOI8-R" do
|
348
|
+
let(:encoding) { "KOI8-R" }
|
349
|
+
|
350
|
+
it "is always valid" do
|
351
|
+
assert valid? "\x80"
|
352
|
+
end
|
353
|
+
|
354
|
+
it "is always assigned" do
|
355
|
+
assert assigned? "\x21"
|
356
|
+
end
|
357
|
+
|
358
|
+
it "is control or not" do
|
359
|
+
assert control? "\x1E"
|
360
|
+
refute control? "\x67"
|
361
|
+
end
|
362
|
+
|
363
|
+
it "is blank or not" do
|
364
|
+
assert blank? "\x20"
|
365
|
+
refute blank? "\x21"
|
366
|
+
end
|
367
|
+
|
368
|
+
it "is never format" do
|
369
|
+
refute format? "\x21"
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
describe "KOI8-U" do
|
374
|
+
let(:encoding) { "KOI8-U" }
|
375
|
+
|
376
|
+
it "is always valid" do
|
377
|
+
assert valid? "\x80"
|
378
|
+
end
|
379
|
+
|
380
|
+
it "is always assigned" do
|
381
|
+
assert assigned? "\x21"
|
382
|
+
end
|
383
|
+
|
384
|
+
it "is control or not" do
|
385
|
+
assert control? "\x1E"
|
386
|
+
refute control? "\x67"
|
387
|
+
end
|
388
|
+
|
389
|
+
it "is blank or not" do
|
390
|
+
assert blank? "\x20"
|
391
|
+
refute blank? "\x21"
|
392
|
+
end
|
393
|
+
|
394
|
+
it "is never format" do
|
395
|
+
refute format? "\x21"
|
396
|
+
end
|
397
|
+
end
|
398
|
+
end
|
180
399
|
end
|
181
400
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: characteristics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: unicode-categories
|