characteristics 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +7 -2
- data/lib/characteristics.rb +6 -2
- data/lib/characteristics/ascii.rb +8 -0
- data/lib/characteristics/binary.rb +8 -0
- data/lib/characteristics/byte.rb +68 -34
- data/lib/characteristics/unicode.rb +4 -0
- data/lib/characteristics/version.rb +1 -1
- data/spec/characteristics_spec.rb +220 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fde954a2c2f2028fb7656a57c18a6556416a2f2f
|
4
|
+
data.tar.gz: 1856bc93124425900c7785c2107965d6620ef194
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7d2e5f27972d6db2c3164ca9a915fd59ed69e2b99f4ec4fb4084a73be0fe8283b5c619c8b3bc6f06547a46592c3ac74bbaf07c2dd1c1afa1bfb5eb98aa6f0719
|
7
|
+
data.tar.gz: 52c27cda5f0e20b7d7b1a7f99c0fc505e2250d6a8263850bb889fb1841ad95984f0f67c3e1d7203c3efac00284fd3d8f92d7fbb36df8881f16586bf4610f9333
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
## CHANGELOG
|
2
2
|
|
3
|
+
### 0.3.0
|
4
|
+
|
5
|
+
* Add soft-hyphen to single byte encodings
|
6
|
+
* Add format? property (e.g. RLM)
|
7
|
+
* Support more encodings: IBMX, CP85X, macX, TIS-620, Windows-874, KOI8-X
|
8
|
+
|
3
9
|
### 0.2.0
|
4
10
|
|
5
11
|
* Fix detection of supported Windows encodings and some unassigned codepoints
|
data/README.md
CHANGED
@@ -26,6 +26,7 @@ char_info.unicode? # => true / false
|
|
26
26
|
char_info.assigned? # => true / false
|
27
27
|
char_info.control? # => true / false
|
28
28
|
char_info.blank? # => true / false
|
29
|
+
char_info.format? # => true / false
|
29
30
|
```
|
30
31
|
|
31
32
|
## Types of Encodings
|
@@ -35,7 +36,7 @@ This library knows of four different kinds of encodings:
|
|
35
36
|
- **:unicode** Unicode familiy of multibyte encodings (*UTF-X*)
|
36
37
|
- **:ascii** 7-Bit ASCII (*US-ASCII*)
|
37
38
|
- **:binary** Arbitrary string (*ASCII-8BIT*)
|
38
|
-
- **:byte** Known byte encoding (*ISO-8859-X*, *Windows-125X*)
|
39
|
+
- **:byte** Known single byte encoding (*ISO-8859-X*, *Windows-125X*, *IBMX*, *CP85X*, *macX*, *TIS-620*, *Windows-874*, **KOI-X**)
|
39
40
|
|
40
41
|
Other encodings are not supported, yet.
|
41
42
|
|
@@ -51,7 +52,7 @@ Validness is determined by Ruby's String#valid_encoding?
|
|
51
52
|
|
52
53
|
### `control?`
|
53
54
|
|
54
|
-
Control characters are codepoints in the is [C0, delete or C1 control character range](https://en.wikipedia.org/wiki/C0_and_C1_control_codes).
|
55
|
+
Control characters are codepoints in the is [C0, delete or C1 control character range](https://en.wikipedia.org/wiki/C0_and_C1_control_codes). Characters in this range of [IBM codepage 437](https://en.wikipedia.org/wiki/Code_page_437) based encodings are always treated as control characters.
|
55
56
|
|
56
57
|
### `assigned?`
|
57
58
|
|
@@ -63,6 +64,10 @@ Control characters are codepoints in the is [C0, delete or C1 control character
|
|
63
64
|
|
64
65
|
The library includes a list of characters that might not be rendered visually. This list does not include unassigned codepoints, control characters (except for `\t`, `\n`, `\v`, `\f`, `\r`), or special formatting characters (right-to-left marker, variation selectors, etc).
|
65
66
|
|
67
|
+
### `format?`
|
68
|
+
|
69
|
+
This flag is `true` only for special formatting characters, which are not control characters, like Right-to-left marks. In Unicode, this means codepoints with the General Category of **Cf**.
|
70
|
+
|
66
71
|
## Todo
|
67
72
|
|
68
73
|
- Support all non-dummy encodings that Ruby supports
|
data/lib/characteristics.rb
CHANGED
@@ -14,7 +14,7 @@ class Characteristics
|
|
14
14
|
:binary
|
15
15
|
when /^UTF-/
|
16
16
|
:unicode
|
17
|
-
when /^ISO-8859-/, /^Windows-125
|
17
|
+
when /^ISO-8859-/, /^Windows-125/, /^(IBM|CP85)/, /^mac/, 'TIS-620', 'Windows-874', /^KOI8-/
|
18
18
|
:byte
|
19
19
|
else
|
20
20
|
raise ArgumentError, "encoding <#{encoding_name}> not supported"
|
@@ -41,6 +41,8 @@ class Characteristics
|
|
41
41
|
attr_reader :encoding
|
42
42
|
|
43
43
|
def initialize(char)
|
44
|
+
raise ArgumentError, "Do not use abstract Characteristics.new(char) directly, please use Characteristics.create(char)" if self.class == Characteristics
|
45
|
+
|
44
46
|
@is_valid = char.valid_encoding?
|
45
47
|
@encoding = char.encoding
|
46
48
|
@encoding_name = @encoding.name
|
@@ -51,7 +53,6 @@ class Characteristics
|
|
51
53
|
end
|
52
54
|
|
53
55
|
def unicode?
|
54
|
-
false
|
55
56
|
end
|
56
57
|
|
57
58
|
def assigned?
|
@@ -62,4 +63,7 @@ class Characteristics
|
|
62
63
|
|
63
64
|
def blank?
|
64
65
|
end
|
66
|
+
|
67
|
+
def format?
|
68
|
+
end
|
65
69
|
end
|
@@ -16,6 +16,10 @@ class AsciiCharacteristics < Characteristics
|
|
16
16
|
@ord = char.ord if @is_valid
|
17
17
|
end
|
18
18
|
|
19
|
+
def unicode?
|
20
|
+
false
|
21
|
+
end
|
22
|
+
|
19
23
|
def assigned?
|
20
24
|
true
|
21
25
|
end
|
@@ -39,4 +43,8 @@ class AsciiCharacteristics < Characteristics
|
|
39
43
|
def blank?
|
40
44
|
@is_valid && ( BLANKS.include?(@ord) || SEPARATORS.include?(@ord) )
|
41
45
|
end
|
46
|
+
|
47
|
+
def format?
|
48
|
+
false
|
49
|
+
end
|
42
50
|
end
|
@@ -21,6 +21,10 @@ class BinaryCharacteristics < Characteristics
|
|
21
21
|
true
|
22
22
|
end
|
23
23
|
|
24
|
+
def unicode?
|
25
|
+
false
|
26
|
+
end
|
27
|
+
|
24
28
|
def assigned?
|
25
29
|
true
|
26
30
|
end
|
@@ -40,4 +44,8 @@ class BinaryCharacteristics < Characteristics
|
|
40
44
|
def blank?
|
41
45
|
BLANKS.include?(@ord) || SEPARATORS.include?(@ord)
|
42
46
|
end
|
47
|
+
|
48
|
+
def format?
|
49
|
+
false
|
50
|
+
end
|
43
51
|
end
|
data/lib/characteristics/byte.rb
CHANGED
@@ -1,29 +1,42 @@
|
|
1
1
|
class ByteCharacteristics < Characteristics
|
2
|
-
HAS_C1 = /^(ISO-8859-)/
|
2
|
+
HAS_C1 = /^(ISO-8859-|TIS-620)/
|
3
3
|
|
4
4
|
UNASSIGNED = {
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
5
|
+
0x80 => /^(IBM869)/,
|
6
|
+
0x81 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258)|IBM869|Windows-874)/,
|
7
|
+
0x82 => /^(IBM869|Windows-874)/,
|
8
|
+
0x83 => /^(Windows-(1250|1257)|IBM869|Windows-874)/,
|
9
|
+
0x84 => /^(IBM869|Windows-874)/,
|
10
|
+
0x85 => /^(IBM869)/,
|
11
|
+
0x86 => /^(Windows-874)/,
|
12
|
+
0x87 => /^(IBM869|Windows-874)/,
|
13
|
+
0x88 => /^(Windows-(1250|1253|1257)|Windows-874)/,
|
14
|
+
0x89 => /^(Windows-874)/,
|
15
|
+
0x8A => /^(Windows-(1253|1255|1257|1258)|Windows-874)/,
|
16
|
+
0x8B => /^(Windows-874)/,
|
17
|
+
0x8C => /^(Windows-(1253|1255|1257)|Windows-874)/,
|
18
|
+
0x8D => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/,
|
19
|
+
0x8E => /^(Windows-(1253|1254|1255|1258)|Windows-874)/,
|
20
|
+
0x8F => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/,
|
21
|
+
|
22
|
+
0x90 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258)|macThai|Windows-874)/,
|
23
|
+
0x93 => /^(IBM869)/,
|
24
|
+
0x94 => /^(IBM869)/,
|
25
|
+
0x98 => /^(Windows-(1250|1251|1253|1257)|Windows-874)/,
|
26
|
+
0x99 => /^(Windows-874)/,
|
27
|
+
0x9A => /^(Windows-(1253|1255|1257|1258)|Windows-874)/,
|
28
|
+
0x9B => /^(IBM864|Windows-874)/,
|
29
|
+
0x9C => /^(Windows-(1253|1255|1257)|IBM864|Windows-874)/,
|
30
|
+
0x9D => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/,
|
31
|
+
0x9E => /^(Windows-(1253|1254|1255|1258)|Windows-874)/,
|
32
|
+
0x9F => /^(Windows-(1253|1255|1257)|IBM864|macThai|Windows-874)/,
|
33
|
+
|
34
|
+
0xA0 => /^(TIS-620)/,
|
22
35
|
0xA1 => /^(ISO-8859-(6|8)|Windows-(1257))/,
|
23
36
|
0xA2 => /^(ISO-8859-(6))/,
|
24
37
|
0xA3 => /^(ISO-8859-(6))/,
|
25
38
|
0xA5 => /^(ISO-8859-(3|6)|Windows-(1257))/,
|
26
|
-
0xA6 => /^(ISO-8859-(6))/,
|
39
|
+
0xA6 => /^(ISO-8859-(6)|IBM864)/,
|
27
40
|
0xA7 => /^(ISO-8859-(6))/,
|
28
41
|
0xA8 => /^(ISO-8859-(6))/,
|
29
42
|
0xA9 => /^(ISO-8859-(6))/,
|
@@ -70,34 +83,36 @@ class ByteCharacteristics < Characteristics
|
|
70
83
|
0xD2 => /^(ISO-8859-(7|8)|Windows-(1253))/,
|
71
84
|
0xD3 => /^(ISO-8859-(8))/,
|
72
85
|
0xD4 => /^(ISO-8859-(8))/,
|
73
|
-
0xD5 => /^(ISO-8859-(8))/,
|
86
|
+
0xD5 => /^(ISO-8859-(8)|IBM857)/, # IBM857: Ruby does not support euro sign
|
74
87
|
0xD6 => /^(ISO-8859-(8))/,
|
75
88
|
0xD7 => /^(ISO-8859-(8))/,
|
76
89
|
0xD8 => /^(ISO-8859-(8))/,
|
77
90
|
0xD9 => /^(ISO-8859-(8)|Windows-(1255))/,
|
78
91
|
0xDA => /^(ISO-8859-(8)|Windows-(1255))/,
|
79
|
-
0xDB => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
80
|
-
0xDC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
81
|
-
0xDD => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
82
|
-
0xDE => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
92
|
+
0xDB => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
|
93
|
+
0xDC => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
|
94
|
+
0xDD => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
|
95
|
+
0xDE => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
|
83
96
|
0xDF => /^(ISO-8859-(6)|Windows-(1255))/,
|
84
97
|
|
85
98
|
0xE3 => /^(ISO-8859-(3))/,
|
99
|
+
0xE7 => /^(IBM857)/,
|
86
100
|
|
87
|
-
0xF0 => /^(ISO-8859-(3))/,
|
101
|
+
0xF0 => /^(ISO-8859-(3))/, # mac: Treating F0 as always assigned
|
102
|
+
0xF2 => /^(IBM857)/,
|
88
103
|
0xF3 => /^(ISO-8859-(6))/,
|
89
104
|
0xF4 => /^(ISO-8859-(6))/,
|
90
|
-
0xF5 => /^(ISO-8859-(6))/,
|
105
|
+
0xF5 => /^(ISO-8859-(6)|macTurkish)/,
|
91
106
|
0xF6 => /^(ISO-8859-(6))/,
|
92
107
|
0xF7 => /^(ISO-8859-(6))/,
|
93
108
|
0xF8 => /^(ISO-8859-(6))/,
|
94
109
|
0xF9 => /^(ISO-8859-(6))/,
|
95
110
|
0xFA => /^(ISO-8859-(6))/,
|
96
111
|
0xFB => /^(ISO-8859-(6|8)|Windows-(1255))/,
|
97
|
-
0xFC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
|
98
|
-
0xFD => /^(ISO-8859-(6|11))/,
|
99
|
-
0xFE => /^(ISO-8859-(6|11))/,
|
100
|
-
0xFF => /^(ISO-8859-(6|7|8|11)|Windows-(1253|1255))/,
|
112
|
+
0xFC => /^(ISO-8859-(6|8|11)|Windows-(1255)|macThai|TIS-620|Windows-874)/,
|
113
|
+
0xFD => /^(ISO-8859-(6|11)|macThai|TIS-620|Windows-874)/,
|
114
|
+
0xFE => /^(ISO-8859-(6|11)|macThai|TIS-620|Windows-874)/,
|
115
|
+
0xFF => /^(ISO-8859-(6|7|8|11)|Windows-(1253|1255)|IBM864|macGreek|macThai|TIS-620|Windows-874)/, # macGreek: Ruby does not know of soft hyphen at FF
|
101
116
|
}.freeze
|
102
117
|
|
103
118
|
BLANKS = [
|
@@ -113,9 +128,22 @@ class ByteCharacteristics < Characteristics
|
|
113
128
|
].freeze
|
114
129
|
|
115
130
|
EXTRA_BLANKS = {
|
116
|
-
0xA0 => /^(ISO-8859-|Windows-125)/,
|
131
|
+
0xA0 => /^(ISO-8859-|Windows-125|macThai|Windows-874)/,
|
132
|
+
0xA1 => /^IBM864/,
|
133
|
+
0xAD => /^(ISO-8859-(?!11)|Windows-125)/,
|
134
|
+
0x9A => /^KOI8-/,
|
117
135
|
0x9D => /^Windows-(1256)/,
|
118
|
-
|
136
|
+
0x9E => /^Windows-(1256)/,
|
137
|
+
0xCA => /^mac(?!Thai)/,
|
138
|
+
0xDB => /^macThai/,
|
139
|
+
0xDC => /^macThai/,
|
140
|
+
0xF0 => /^(IBM(?!437|737|86)|IBM869|CP)/,
|
141
|
+
0xFF => /^(IBM(?!864)|CP)/, # |macGreek, but is unnasigned in Ruby
|
142
|
+
}.freeze
|
143
|
+
|
144
|
+
FORMATS = {
|
145
|
+
0xFD => /^(ISO-8859-8|Windows-(1255|1256))/,
|
146
|
+
0xFE => /^(ISO-8859-8|Windows-(1255|1256))/,
|
119
147
|
}.freeze
|
120
148
|
|
121
149
|
def initialize(char)
|
@@ -124,12 +152,10 @@ class ByteCharacteristics < Characteristics
|
|
124
152
|
end
|
125
153
|
|
126
154
|
def encoding_has_c0?
|
127
|
-
# !!(HAS_C0 =~ @encoding_name)
|
128
155
|
true
|
129
156
|
end
|
130
157
|
|
131
158
|
def encoding_has_delete?
|
132
|
-
# !!(HAS_C0 =~ @encoding_name)
|
133
159
|
true
|
134
160
|
end
|
135
161
|
|
@@ -137,6 +163,10 @@ class ByteCharacteristics < Characteristics
|
|
137
163
|
!!(HAS_C1 =~ @encoding_name)
|
138
164
|
end
|
139
165
|
|
166
|
+
def unicode?
|
167
|
+
false
|
168
|
+
end
|
169
|
+
|
140
170
|
def assigned?
|
141
171
|
control? || UNASSIGNED[@ord] !~ @encoding_name
|
142
172
|
end
|
@@ -162,4 +192,8 @@ class ByteCharacteristics < Characteristics
|
|
162
192
|
SEPARATORS.include?(@ord) ||
|
163
193
|
EXTRA_BLANKS[@ord] =~ @encoding_name
|
164
194
|
end
|
195
|
+
|
196
|
+
def format?
|
197
|
+
FORMATS[@ord] =~ @encoding_name
|
198
|
+
end
|
165
199
|
end
|
@@ -18,6 +18,10 @@ describe Characteristics do
|
|
18
18
|
Characteristics.create(char.force_encoding(encoding)).blank?
|
19
19
|
end
|
20
20
|
|
21
|
+
def format?(char)
|
22
|
+
Characteristics.create(char.force_encoding(encoding)).format?
|
23
|
+
end
|
24
|
+
|
21
25
|
describe UnicodeCharacteristics do
|
22
26
|
describe "UTF-*" do
|
23
27
|
let(:encoding) { "UTF-8" }
|
@@ -43,6 +47,11 @@ describe Characteristics do
|
|
43
47
|
assert blank? "\x20"
|
44
48
|
refute blank? "\x21"
|
45
49
|
end
|
50
|
+
|
51
|
+
it "is format or not" do
|
52
|
+
assert format? "\uFFF9"
|
53
|
+
refute format? "\x21"
|
54
|
+
end
|
46
55
|
end
|
47
56
|
end
|
48
57
|
|
@@ -68,6 +77,10 @@ describe Characteristics do
|
|
68
77
|
assert blank? "\x20"
|
69
78
|
refute blank? "\x21"
|
70
79
|
end
|
80
|
+
|
81
|
+
it "is never format" do
|
82
|
+
refute format? "\x21"
|
83
|
+
end
|
71
84
|
end
|
72
85
|
end
|
73
86
|
|
@@ -94,6 +107,10 @@ describe Characteristics do
|
|
94
107
|
assert blank? "\x20"
|
95
108
|
refute blank? "\x21"
|
96
109
|
end
|
110
|
+
|
111
|
+
it "is never format" do
|
112
|
+
refute format? "\x21"
|
113
|
+
end
|
97
114
|
end
|
98
115
|
end
|
99
116
|
|
@@ -122,6 +139,10 @@ describe Characteristics do
|
|
122
139
|
assert blank? "\x20"
|
123
140
|
refute blank? "\x21"
|
124
141
|
end
|
142
|
+
|
143
|
+
it "is never format" do
|
144
|
+
refute format? "\x21"
|
145
|
+
end
|
125
146
|
end
|
126
147
|
|
127
148
|
# TODO
|
@@ -142,7 +163,7 @@ describe Characteristics do
|
|
142
163
|
# describe "ISO-8859-16" do
|
143
164
|
end
|
144
165
|
|
145
|
-
describe "Windows
|
166
|
+
describe "Windows-125*" do
|
146
167
|
describe "Windows-1252" do
|
147
168
|
let(:encoding) { "Windows-1252" }
|
148
169
|
|
@@ -164,6 +185,10 @@ describe Characteristics do
|
|
164
185
|
assert blank? "\x20"
|
165
186
|
refute blank? "\x21"
|
166
187
|
end
|
188
|
+
|
189
|
+
it "is never format" do
|
190
|
+
refute format? "\x21"
|
191
|
+
end
|
167
192
|
end
|
168
193
|
|
169
194
|
# TODO
|
@@ -177,5 +202,199 @@ describe Characteristics do
|
|
177
202
|
# describe "Windows-1257" do
|
178
203
|
# describe "Windows-1258" do
|
179
204
|
end
|
205
|
+
|
206
|
+
describe "IBM*, CP85*" do
|
207
|
+
describe "IBM869" do
|
208
|
+
let(:encoding) { "IBM869" }
|
209
|
+
|
210
|
+
it "is always valid" do
|
211
|
+
assert valid? "\x80"
|
212
|
+
end
|
213
|
+
|
214
|
+
it "is assigned or not" do
|
215
|
+
assert assigned? "\x21"
|
216
|
+
refute assigned? "\x80"
|
217
|
+
end
|
218
|
+
|
219
|
+
it "is control or not" do
|
220
|
+
assert control? "\x1E"
|
221
|
+
refute control? "\x67"
|
222
|
+
end
|
223
|
+
|
224
|
+
it "is blank or not" do
|
225
|
+
assert blank? "\x20"
|
226
|
+
refute blank? "\x21"
|
227
|
+
end
|
228
|
+
|
229
|
+
it "is never format" do
|
230
|
+
refute format? "\x21"
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
# describe "IBM437" do
|
235
|
+
# describe "IBM737" do
|
236
|
+
# describe "IBM775" do
|
237
|
+
# describe "CP850" do
|
238
|
+
# describe "IBM852" do
|
239
|
+
# describe "CP852" do
|
240
|
+
# describe "IBM855" do
|
241
|
+
# describe "CP855" do
|
242
|
+
# describe "IBM857" do
|
243
|
+
# describe "IBM860" do
|
244
|
+
# describe "IBM861" do
|
245
|
+
# describe "IBM862" do
|
246
|
+
# describe "IBM863" do
|
247
|
+
# describe "IBM864" do
|
248
|
+
# describe "IBM865" do
|
249
|
+
# describe "IBM866" do
|
250
|
+
end
|
251
|
+
|
252
|
+
describe "mac*" do
|
253
|
+
describe "macRoman" do
|
254
|
+
let(:encoding) { "macRoman" }
|
255
|
+
|
256
|
+
it "is always valid" do
|
257
|
+
assert valid? "\x80"
|
258
|
+
end
|
259
|
+
|
260
|
+
it "is always assigned" do
|
261
|
+
assert assigned? "\x21"
|
262
|
+
end
|
263
|
+
|
264
|
+
it "is control or not" do
|
265
|
+
assert control? "\x1E"
|
266
|
+
refute control? "\x67"
|
267
|
+
end
|
268
|
+
|
269
|
+
it "is blank or not" do
|
270
|
+
assert blank? "\x20"
|
271
|
+
refute blank? "\x21"
|
272
|
+
end
|
273
|
+
|
274
|
+
it "is never format" do
|
275
|
+
refute format? "\x21"
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
# describe "macCentEuro" do
|
280
|
+
# describe "macCroatian" do
|
281
|
+
# describe "macCyrillic" do
|
282
|
+
# describe "macGreek" do
|
283
|
+
# describe "macIceland" do
|
284
|
+
# describe "macRomania" do
|
285
|
+
# describe "macThai" do
|
286
|
+
# describe "macTurkish" do
|
287
|
+
# describe "macUkraine" do
|
288
|
+
end
|
289
|
+
|
290
|
+
describe "TIS-620/Windows-874" do
|
291
|
+
describe "TIS-620" do
|
292
|
+
let(:encoding) { "TIS-620" }
|
293
|
+
|
294
|
+
it "is always valid" do
|
295
|
+
assert valid? "\x80"
|
296
|
+
end
|
297
|
+
|
298
|
+
it "is assigned or not" do
|
299
|
+
assert assigned? "\x21"
|
300
|
+
refute assigned? "\xA0"
|
301
|
+
end
|
302
|
+
|
303
|
+
it "is control or not" do
|
304
|
+
assert control? "\x1E"
|
305
|
+
refute control? "\x67"
|
306
|
+
end
|
307
|
+
|
308
|
+
it "is blank or not" do
|
309
|
+
assert blank? "\x20"
|
310
|
+
refute blank? "\x21"
|
311
|
+
end
|
312
|
+
|
313
|
+
it "is never format" do
|
314
|
+
refute format? "\x21"
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
describe "Windows-874" do
|
319
|
+
let(:encoding) { "Windows-874" }
|
320
|
+
|
321
|
+
it "is always valid" do
|
322
|
+
assert valid? "\x80"
|
323
|
+
end
|
324
|
+
|
325
|
+
it "is assigned or not" do
|
326
|
+
assert assigned? "\xA0"
|
327
|
+
refute assigned? "\x99"
|
328
|
+
end
|
329
|
+
|
330
|
+
it "is control or not" do
|
331
|
+
assert control? "\x1E"
|
332
|
+
refute control? "\x67"
|
333
|
+
end
|
334
|
+
|
335
|
+
it "is blank or not" do
|
336
|
+
assert blank? "\x20"
|
337
|
+
refute blank? "\x21"
|
338
|
+
end
|
339
|
+
|
340
|
+
it "is never format" do
|
341
|
+
refute format? "\x21"
|
342
|
+
end
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
describe "KOI8-*" do
|
347
|
+
describe "KOI8-R" do
|
348
|
+
let(:encoding) { "KOI8-R" }
|
349
|
+
|
350
|
+
it "is always valid" do
|
351
|
+
assert valid? "\x80"
|
352
|
+
end
|
353
|
+
|
354
|
+
it "is always assigned" do
|
355
|
+
assert assigned? "\x21"
|
356
|
+
end
|
357
|
+
|
358
|
+
it "is control or not" do
|
359
|
+
assert control? "\x1E"
|
360
|
+
refute control? "\x67"
|
361
|
+
end
|
362
|
+
|
363
|
+
it "is blank or not" do
|
364
|
+
assert blank? "\x20"
|
365
|
+
refute blank? "\x21"
|
366
|
+
end
|
367
|
+
|
368
|
+
it "is never format" do
|
369
|
+
refute format? "\x21"
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
describe "KOI8-U" do
|
374
|
+
let(:encoding) { "KOI8-U" }
|
375
|
+
|
376
|
+
it "is always valid" do
|
377
|
+
assert valid? "\x80"
|
378
|
+
end
|
379
|
+
|
380
|
+
it "is always assigned" do
|
381
|
+
assert assigned? "\x21"
|
382
|
+
end
|
383
|
+
|
384
|
+
it "is control or not" do
|
385
|
+
assert control? "\x1E"
|
386
|
+
refute control? "\x67"
|
387
|
+
end
|
388
|
+
|
389
|
+
it "is blank or not" do
|
390
|
+
assert blank? "\x20"
|
391
|
+
refute blank? "\x21"
|
392
|
+
end
|
393
|
+
|
394
|
+
it "is never format" do
|
395
|
+
refute format? "\x21"
|
396
|
+
end
|
397
|
+
end
|
398
|
+
end
|
180
399
|
end
|
181
400
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: characteristics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: unicode-categories
|