characteristics 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 382e8a6535fa4152dbdbf7f213a12ba6e5a82830
4
- data.tar.gz: c92c1bdf4eb32f8ee66c0dce0d72327f95ca0260
3
+ metadata.gz: fde954a2c2f2028fb7656a57c18a6556416a2f2f
4
+ data.tar.gz: 1856bc93124425900c7785c2107965d6620ef194
5
5
  SHA512:
6
- metadata.gz: e6e886ba4aa8b3605c5a8918e3e97c2982a1843779e780562757373fe30641e7e56e6e8bb0ea90bcca6b20a1ce752780c587a87902f3f346fa42a56c1cca07bd
7
- data.tar.gz: 273ee12ec3e4bb2426743ac146790b658bdfc236e396c73d7af369b4cf76d89248fcb074a1515c4280e322a05644feeaf838622782210501fab2c36e7819e384
6
+ metadata.gz: 7d2e5f27972d6db2c3164ca9a915fd59ed69e2b99f4ec4fb4084a73be0fe8283b5c619c8b3bc6f06547a46592c3ac74bbaf07c2dd1c1afa1bfb5eb98aa6f0719
7
+ data.tar.gz: 52c27cda5f0e20b7d7b1a7f99c0fc505e2250d6a8263850bb889fb1841ad95984f0f67c3e1d7203c3efac00284fd3d8f92d7fbb36df8881f16586bf4610f9333
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  ## CHANGELOG
2
2
 
3
+ ### 0.3.0
4
+
5
+ * Add soft-hyphen to single byte encodings
6
+ * Add format? property (e.g. RLM)
7
+ * Support more encodings: IBMX, CP85X, macX, TIS-620, Windows-874, KOI8-X
8
+
3
9
  ### 0.2.0
4
10
 
5
11
  * Fix detection of supported Windows encodings and some unassigned codepoints
data/README.md CHANGED
@@ -26,6 +26,7 @@ char_info.unicode? # => true / false
26
26
  char_info.assigned? # => true / false
27
27
  char_info.control? # => true / false
28
28
  char_info.blank? # => true / false
29
+ char_info.format? # => true / false
29
30
  ```
30
31
 
31
32
  ## Types of Encodings
@@ -35,7 +36,7 @@ This library knows of four different kinds of encodings:
35
36
  - **:unicode** Unicode familiy of multibyte encodings (*UTF-X*)
36
37
  - **:ascii** 7-Bit ASCII (*US-ASCII*)
37
38
  - **:binary** Arbitrary string (*ASCII-8BIT*)
38
- - **:byte** Known byte encoding (*ISO-8859-X*, *Windows-125X*)
39
+ - **:byte** Known single byte encoding (*ISO-8859-X*, *Windows-125X*, *IBMX*, *CP85X*, *macX*, *TIS-620*, *Windows-874*, **KOI-X**)
39
40
 
40
41
  Other encodings are not supported, yet.
41
42
 
@@ -51,7 +52,7 @@ Validness is determined by Ruby's String#valid_encoding?
51
52
 
52
53
  ### `control?`
53
54
 
54
- Control characters are codepoints in the is [C0, delete or C1 control character range](https://en.wikipedia.org/wiki/C0_and_C1_control_codes).
55
+ Control characters are codepoints in the is [C0, delete or C1 control character range](https://en.wikipedia.org/wiki/C0_and_C1_control_codes). Characters in this range of [IBM codepage 437](https://en.wikipedia.org/wiki/Code_page_437) based encodings are always treated as control characters.
55
56
 
56
57
  ### `assigned?`
57
58
 
@@ -63,6 +64,10 @@ Control characters are codepoints in the is [C0, delete or C1 control character
63
64
 
64
65
  The library includes a list of characters that might not be rendered visually. This list does not include unassigned codepoints, control characters (except for `\t`, `\n`, `\v`, `\f`, `\r`), or special formatting characters (right-to-left marker, variation selectors, etc).
65
66
 
67
+ ### `format?`
68
+
69
+ This flag is `true` only for special formatting characters, which are not control characters, like Right-to-left marks. In Unicode, this means codepoints with the General Category of **Cf**.
70
+
66
71
  ## Todo
67
72
 
68
73
  - Support all non-dummy encodings that Ruby supports
@@ -14,7 +14,7 @@ class Characteristics
14
14
  :binary
15
15
  when /^UTF-/
16
16
  :unicode
17
- when /^ISO-8859-/, /^Windows-125/
17
+ when /^ISO-8859-/, /^Windows-125/, /^(IBM|CP85)/, /^mac/, 'TIS-620', 'Windows-874', /^KOI8-/
18
18
  :byte
19
19
  else
20
20
  raise ArgumentError, "encoding <#{encoding_name}> not supported"
@@ -41,6 +41,8 @@ class Characteristics
41
41
  attr_reader :encoding
42
42
 
43
43
  def initialize(char)
44
+ raise ArgumentError, "Do not use abstract Characteristics.new(char) directly, please use Characteristics.create(char)" if self.class == Characteristics
45
+
44
46
  @is_valid = char.valid_encoding?
45
47
  @encoding = char.encoding
46
48
  @encoding_name = @encoding.name
@@ -51,7 +53,6 @@ class Characteristics
51
53
  end
52
54
 
53
55
  def unicode?
54
- false
55
56
  end
56
57
 
57
58
  def assigned?
@@ -62,4 +63,7 @@ class Characteristics
62
63
 
63
64
  def blank?
64
65
  end
66
+
67
+ def format?
68
+ end
65
69
  end
@@ -16,6 +16,10 @@ class AsciiCharacteristics < Characteristics
16
16
  @ord = char.ord if @is_valid
17
17
  end
18
18
 
19
+ def unicode?
20
+ false
21
+ end
22
+
19
23
  def assigned?
20
24
  true
21
25
  end
@@ -39,4 +43,8 @@ class AsciiCharacteristics < Characteristics
39
43
  def blank?
40
44
  @is_valid && ( BLANKS.include?(@ord) || SEPARATORS.include?(@ord) )
41
45
  end
46
+
47
+ def format?
48
+ false
49
+ end
42
50
  end
@@ -21,6 +21,10 @@ class BinaryCharacteristics < Characteristics
21
21
  true
22
22
  end
23
23
 
24
+ def unicode?
25
+ false
26
+ end
27
+
24
28
  def assigned?
25
29
  true
26
30
  end
@@ -40,4 +44,8 @@ class BinaryCharacteristics < Characteristics
40
44
  def blank?
41
45
  BLANKS.include?(@ord) || SEPARATORS.include?(@ord)
42
46
  end
47
+
48
+ def format?
49
+ false
50
+ end
43
51
  end
@@ -1,29 +1,42 @@
1
1
  class ByteCharacteristics < Characteristics
2
- HAS_C1 = /^(ISO-8859-)/
2
+ HAS_C1 = /^(ISO-8859-|TIS-620)/
3
3
 
4
4
  UNASSIGNED = {
5
- 0x81 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258))/,
6
- 0x83 => /^(Windows-(1250|1257))/,
7
- 0x88 => /^(Windows-(1250|1253|1257))/,
8
- 0x8A => /^(Windows-(1253|1255|1257|1258))/,
9
- 0x8C => /^(Windows-(1253|1255|1257))/,
10
- 0x8D => /^(Windows-(1252|1253|1254|1255|1258))/,
11
- 0x8E => /^(Windows-(1253|1254|1255|1258))/,
12
- 0x8F => /^(Windows-(1252|1253|1254|1255|1258))/,
13
-
14
- 0x90 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258))/,
15
- 0x98 => /^(Windows-(1250|1251|1253|1257))/,
16
- 0x9A => /^(Windows-(1253|1255|1257|1258))/,
17
- 0x9C => /^(Windows-(1253|1255|1257))/,
18
- 0x9D => /^(Windows-(1252|1253|1254|1255|1258))/,
19
- 0x9E => /^(Windows-(1253|1254|1255|1258))/,
20
- 0x9F => /^(Windows-(1253|1255|1257))/,
21
-
5
+ 0x80 => /^(IBM869)/,
6
+ 0x81 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258)|IBM869|Windows-874)/,
7
+ 0x82 => /^(IBM869|Windows-874)/,
8
+ 0x83 => /^(Windows-(1250|1257)|IBM869|Windows-874)/,
9
+ 0x84 => /^(IBM869|Windows-874)/,
10
+ 0x85 => /^(IBM869)/,
11
+ 0x86 => /^(Windows-874)/,
12
+ 0x87 => /^(IBM869|Windows-874)/,
13
+ 0x88 => /^(Windows-(1250|1253|1257)|Windows-874)/,
14
+ 0x89 => /^(Windows-874)/,
15
+ 0x8A => /^(Windows-(1253|1255|1257|1258)|Windows-874)/,
16
+ 0x8B => /^(Windows-874)/,
17
+ 0x8C => /^(Windows-(1253|1255|1257)|Windows-874)/,
18
+ 0x8D => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/,
19
+ 0x8E => /^(Windows-(1253|1254|1255|1258)|Windows-874)/,
20
+ 0x8F => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/,
21
+
22
+ 0x90 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258)|macThai|Windows-874)/,
23
+ 0x93 => /^(IBM869)/,
24
+ 0x94 => /^(IBM869)/,
25
+ 0x98 => /^(Windows-(1250|1251|1253|1257)|Windows-874)/,
26
+ 0x99 => /^(Windows-874)/,
27
+ 0x9A => /^(Windows-(1253|1255|1257|1258)|Windows-874)/,
28
+ 0x9B => /^(IBM864|Windows-874)/,
29
+ 0x9C => /^(Windows-(1253|1255|1257)|IBM864|Windows-874)/,
30
+ 0x9D => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/,
31
+ 0x9E => /^(Windows-(1253|1254|1255|1258)|Windows-874)/,
32
+ 0x9F => /^(Windows-(1253|1255|1257)|IBM864|macThai|Windows-874)/,
33
+
34
+ 0xA0 => /^(TIS-620)/,
22
35
  0xA1 => /^(ISO-8859-(6|8)|Windows-(1257))/,
23
36
  0xA2 => /^(ISO-8859-(6))/,
24
37
  0xA3 => /^(ISO-8859-(6))/,
25
38
  0xA5 => /^(ISO-8859-(3|6)|Windows-(1257))/,
26
- 0xA6 => /^(ISO-8859-(6))/,
39
+ 0xA6 => /^(ISO-8859-(6)|IBM864)/,
27
40
  0xA7 => /^(ISO-8859-(6))/,
28
41
  0xA8 => /^(ISO-8859-(6))/,
29
42
  0xA9 => /^(ISO-8859-(6))/,
@@ -70,34 +83,36 @@ class ByteCharacteristics < Characteristics
70
83
  0xD2 => /^(ISO-8859-(7|8)|Windows-(1253))/,
71
84
  0xD3 => /^(ISO-8859-(8))/,
72
85
  0xD4 => /^(ISO-8859-(8))/,
73
- 0xD5 => /^(ISO-8859-(8))/,
86
+ 0xD5 => /^(ISO-8859-(8)|IBM857)/, # IBM857: Ruby does not support euro sign
74
87
  0xD6 => /^(ISO-8859-(8))/,
75
88
  0xD7 => /^(ISO-8859-(8))/,
76
89
  0xD8 => /^(ISO-8859-(8))/,
77
90
  0xD9 => /^(ISO-8859-(8)|Windows-(1255))/,
78
91
  0xDA => /^(ISO-8859-(8)|Windows-(1255))/,
79
- 0xDB => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
80
- 0xDC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
81
- 0xDD => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
82
- 0xDE => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
92
+ 0xDB => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
93
+ 0xDC => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
94
+ 0xDD => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
95
+ 0xDE => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
83
96
  0xDF => /^(ISO-8859-(6)|Windows-(1255))/,
84
97
 
85
98
  0xE3 => /^(ISO-8859-(3))/,
99
+ 0xE7 => /^(IBM857)/,
86
100
 
87
- 0xF0 => /^(ISO-8859-(3))/,
101
+ 0xF0 => /^(ISO-8859-(3))/, # mac: Treating F0 as always assigned
102
+ 0xF2 => /^(IBM857)/,
88
103
  0xF3 => /^(ISO-8859-(6))/,
89
104
  0xF4 => /^(ISO-8859-(6))/,
90
- 0xF5 => /^(ISO-8859-(6))/,
105
+ 0xF5 => /^(ISO-8859-(6)|macTurkish)/,
91
106
  0xF6 => /^(ISO-8859-(6))/,
92
107
  0xF7 => /^(ISO-8859-(6))/,
93
108
  0xF8 => /^(ISO-8859-(6))/,
94
109
  0xF9 => /^(ISO-8859-(6))/,
95
110
  0xFA => /^(ISO-8859-(6))/,
96
111
  0xFB => /^(ISO-8859-(6|8)|Windows-(1255))/,
97
- 0xFC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
98
- 0xFD => /^(ISO-8859-(6|11))/,
99
- 0xFE => /^(ISO-8859-(6|11))/,
100
- 0xFF => /^(ISO-8859-(6|7|8|11)|Windows-(1253|1255))/,
112
+ 0xFC => /^(ISO-8859-(6|8|11)|Windows-(1255)|macThai|TIS-620|Windows-874)/,
113
+ 0xFD => /^(ISO-8859-(6|11)|macThai|TIS-620|Windows-874)/,
114
+ 0xFE => /^(ISO-8859-(6|11)|macThai|TIS-620|Windows-874)/,
115
+ 0xFF => /^(ISO-8859-(6|7|8|11)|Windows-(1253|1255)|IBM864|macGreek|macThai|TIS-620|Windows-874)/, # macGreek: Ruby does not know of soft hyphen at FF
101
116
  }.freeze
102
117
 
103
118
  BLANKS = [
@@ -113,9 +128,22 @@ class ByteCharacteristics < Characteristics
113
128
  ].freeze
114
129
 
115
130
  EXTRA_BLANKS = {
116
- 0xA0 => /^(ISO-8859-|Windows-125)/,
131
+ 0xA0 => /^(ISO-8859-|Windows-125|macThai|Windows-874)/,
132
+ 0xA1 => /^IBM864/,
133
+ 0xAD => /^(ISO-8859-(?!11)|Windows-125)/,
134
+ 0x9A => /^KOI8-/,
117
135
  0x9D => /^Windows-(1256)/,
118
- 0x9F => /^Windows-(1256)/,
136
+ 0x9E => /^Windows-(1256)/,
137
+ 0xCA => /^mac(?!Thai)/,
138
+ 0xDB => /^macThai/,
139
+ 0xDC => /^macThai/,
140
+ 0xF0 => /^(IBM(?!437|737|86)|IBM869|CP)/,
141
+ 0xFF => /^(IBM(?!864)|CP)/, # |macGreek, but is unnasigned in Ruby
142
+ }.freeze
143
+
144
+ FORMATS = {
145
+ 0xFD => /^(ISO-8859-8|Windows-(1255|1256))/,
146
+ 0xFE => /^(ISO-8859-8|Windows-(1255|1256))/,
119
147
  }.freeze
120
148
 
121
149
  def initialize(char)
@@ -124,12 +152,10 @@ class ByteCharacteristics < Characteristics
124
152
  end
125
153
 
126
154
  def encoding_has_c0?
127
- # !!(HAS_C0 =~ @encoding_name)
128
155
  true
129
156
  end
130
157
 
131
158
  def encoding_has_delete?
132
- # !!(HAS_C0 =~ @encoding_name)
133
159
  true
134
160
  end
135
161
 
@@ -137,6 +163,10 @@ class ByteCharacteristics < Characteristics
137
163
  !!(HAS_C1 =~ @encoding_name)
138
164
  end
139
165
 
166
+ def unicode?
167
+ false
168
+ end
169
+
140
170
  def assigned?
141
171
  control? || UNASSIGNED[@ord] !~ @encoding_name
142
172
  end
@@ -162,4 +192,8 @@ class ByteCharacteristics < Characteristics
162
192
  SEPARATORS.include?(@ord) ||
163
193
  EXTRA_BLANKS[@ord] =~ @encoding_name
164
194
  end
195
+
196
+ def format?
197
+ FORMATS[@ord] =~ @encoding_name
198
+ end
165
199
  end
@@ -100,4 +100,8 @@ class UnicodeCharacteristics < Characteristics
100
100
  def blank?
101
101
  @is_valid && ( BLANKS.include?(@ord) || SEPARATORS.include?(@ord) )
102
102
  end
103
+
104
+ def format?
105
+ @is_valid && @category == "Cf"
106
+ end
103
107
  end
@@ -1,4 +1,4 @@
1
1
  class Characteristics
2
- VERSION = "0.2.0".freeze
2
+ VERSION = "0.3.0".freeze
3
3
  end
4
4
 
@@ -18,6 +18,10 @@ describe Characteristics do
18
18
  Characteristics.create(char.force_encoding(encoding)).blank?
19
19
  end
20
20
 
21
+ def format?(char)
22
+ Characteristics.create(char.force_encoding(encoding)).format?
23
+ end
24
+
21
25
  describe UnicodeCharacteristics do
22
26
  describe "UTF-*" do
23
27
  let(:encoding) { "UTF-8" }
@@ -43,6 +47,11 @@ describe Characteristics do
43
47
  assert blank? "\x20"
44
48
  refute blank? "\x21"
45
49
  end
50
+
51
+ it "is format or not" do
52
+ assert format? "\uFFF9"
53
+ refute format? "\x21"
54
+ end
46
55
  end
47
56
  end
48
57
 
@@ -68,6 +77,10 @@ describe Characteristics do
68
77
  assert blank? "\x20"
69
78
  refute blank? "\x21"
70
79
  end
80
+
81
+ it "is never format" do
82
+ refute format? "\x21"
83
+ end
71
84
  end
72
85
  end
73
86
 
@@ -94,6 +107,10 @@ describe Characteristics do
94
107
  assert blank? "\x20"
95
108
  refute blank? "\x21"
96
109
  end
110
+
111
+ it "is never format" do
112
+ refute format? "\x21"
113
+ end
97
114
  end
98
115
  end
99
116
 
@@ -122,6 +139,10 @@ describe Characteristics do
122
139
  assert blank? "\x20"
123
140
  refute blank? "\x21"
124
141
  end
142
+
143
+ it "is never format" do
144
+ refute format? "\x21"
145
+ end
125
146
  end
126
147
 
127
148
  # TODO
@@ -142,7 +163,7 @@ describe Characteristics do
142
163
  # describe "ISO-8859-16" do
143
164
  end
144
165
 
145
- describe "Windows-*" do
166
+ describe "Windows-125*" do
146
167
  describe "Windows-1252" do
147
168
  let(:encoding) { "Windows-1252" }
148
169
 
@@ -164,6 +185,10 @@ describe Characteristics do
164
185
  assert blank? "\x20"
165
186
  refute blank? "\x21"
166
187
  end
188
+
189
+ it "is never format" do
190
+ refute format? "\x21"
191
+ end
167
192
  end
168
193
 
169
194
  # TODO
@@ -177,5 +202,199 @@ describe Characteristics do
177
202
  # describe "Windows-1257" do
178
203
  # describe "Windows-1258" do
179
204
  end
205
+
206
+ describe "IBM*, CP85*" do
207
+ describe "IBM869" do
208
+ let(:encoding) { "IBM869" }
209
+
210
+ it "is always valid" do
211
+ assert valid? "\x80"
212
+ end
213
+
214
+ it "is assigned or not" do
215
+ assert assigned? "\x21"
216
+ refute assigned? "\x80"
217
+ end
218
+
219
+ it "is control or not" do
220
+ assert control? "\x1E"
221
+ refute control? "\x67"
222
+ end
223
+
224
+ it "is blank or not" do
225
+ assert blank? "\x20"
226
+ refute blank? "\x21"
227
+ end
228
+
229
+ it "is never format" do
230
+ refute format? "\x21"
231
+ end
232
+ end
233
+
234
+ # describe "IBM437" do
235
+ # describe "IBM737" do
236
+ # describe "IBM775" do
237
+ # describe "CP850" do
238
+ # describe "IBM852" do
239
+ # describe "CP852" do
240
+ # describe "IBM855" do
241
+ # describe "CP855" do
242
+ # describe "IBM857" do
243
+ # describe "IBM860" do
244
+ # describe "IBM861" do
245
+ # describe "IBM862" do
246
+ # describe "IBM863" do
247
+ # describe "IBM864" do
248
+ # describe "IBM865" do
249
+ # describe "IBM866" do
250
+ end
251
+
252
+ describe "mac*" do
253
+ describe "macRoman" do
254
+ let(:encoding) { "macRoman" }
255
+
256
+ it "is always valid" do
257
+ assert valid? "\x80"
258
+ end
259
+
260
+ it "is always assigned" do
261
+ assert assigned? "\x21"
262
+ end
263
+
264
+ it "is control or not" do
265
+ assert control? "\x1E"
266
+ refute control? "\x67"
267
+ end
268
+
269
+ it "is blank or not" do
270
+ assert blank? "\x20"
271
+ refute blank? "\x21"
272
+ end
273
+
274
+ it "is never format" do
275
+ refute format? "\x21"
276
+ end
277
+ end
278
+
279
+ # describe "macCentEuro" do
280
+ # describe "macCroatian" do
281
+ # describe "macCyrillic" do
282
+ # describe "macGreek" do
283
+ # describe "macIceland" do
284
+ # describe "macRomania" do
285
+ # describe "macThai" do
286
+ # describe "macTurkish" do
287
+ # describe "macUkraine" do
288
+ end
289
+
290
+ describe "TIS-620/Windows-874" do
291
+ describe "TIS-620" do
292
+ let(:encoding) { "TIS-620" }
293
+
294
+ it "is always valid" do
295
+ assert valid? "\x80"
296
+ end
297
+
298
+ it "is assigned or not" do
299
+ assert assigned? "\x21"
300
+ refute assigned? "\xA0"
301
+ end
302
+
303
+ it "is control or not" do
304
+ assert control? "\x1E"
305
+ refute control? "\x67"
306
+ end
307
+
308
+ it "is blank or not" do
309
+ assert blank? "\x20"
310
+ refute blank? "\x21"
311
+ end
312
+
313
+ it "is never format" do
314
+ refute format? "\x21"
315
+ end
316
+ end
317
+
318
+ describe "Windows-874" do
319
+ let(:encoding) { "Windows-874" }
320
+
321
+ it "is always valid" do
322
+ assert valid? "\x80"
323
+ end
324
+
325
+ it "is assigned or not" do
326
+ assert assigned? "\xA0"
327
+ refute assigned? "\x99"
328
+ end
329
+
330
+ it "is control or not" do
331
+ assert control? "\x1E"
332
+ refute control? "\x67"
333
+ end
334
+
335
+ it "is blank or not" do
336
+ assert blank? "\x20"
337
+ refute blank? "\x21"
338
+ end
339
+
340
+ it "is never format" do
341
+ refute format? "\x21"
342
+ end
343
+ end
344
+ end
345
+
346
+ describe "KOI8-*" do
347
+ describe "KOI8-R" do
348
+ let(:encoding) { "KOI8-R" }
349
+
350
+ it "is always valid" do
351
+ assert valid? "\x80"
352
+ end
353
+
354
+ it "is always assigned" do
355
+ assert assigned? "\x21"
356
+ end
357
+
358
+ it "is control or not" do
359
+ assert control? "\x1E"
360
+ refute control? "\x67"
361
+ end
362
+
363
+ it "is blank or not" do
364
+ assert blank? "\x20"
365
+ refute blank? "\x21"
366
+ end
367
+
368
+ it "is never format" do
369
+ refute format? "\x21"
370
+ end
371
+ end
372
+
373
+ describe "KOI8-U" do
374
+ let(:encoding) { "KOI8-U" }
375
+
376
+ it "is always valid" do
377
+ assert valid? "\x80"
378
+ end
379
+
380
+ it "is always assigned" do
381
+ assert assigned? "\x21"
382
+ end
383
+
384
+ it "is control or not" do
385
+ assert control? "\x1E"
386
+ refute control? "\x67"
387
+ end
388
+
389
+ it "is blank or not" do
390
+ assert blank? "\x20"
391
+ refute blank? "\x21"
392
+ end
393
+
394
+ it "is never format" do
395
+ refute format? "\x21"
396
+ end
397
+ end
398
+ end
180
399
  end
181
400
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: characteristics
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-13 00:00:00.000000000 Z
11
+ date: 2017-03-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: unicode-categories