characteristics 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 382e8a6535fa4152dbdbf7f213a12ba6e5a82830
4
- data.tar.gz: c92c1bdf4eb32f8ee66c0dce0d72327f95ca0260
3
+ metadata.gz: fde954a2c2f2028fb7656a57c18a6556416a2f2f
4
+ data.tar.gz: 1856bc93124425900c7785c2107965d6620ef194
5
5
  SHA512:
6
- metadata.gz: e6e886ba4aa8b3605c5a8918e3e97c2982a1843779e780562757373fe30641e7e56e6e8bb0ea90bcca6b20a1ce752780c587a87902f3f346fa42a56c1cca07bd
7
- data.tar.gz: 273ee12ec3e4bb2426743ac146790b658bdfc236e396c73d7af369b4cf76d89248fcb074a1515c4280e322a05644feeaf838622782210501fab2c36e7819e384
6
+ metadata.gz: 7d2e5f27972d6db2c3164ca9a915fd59ed69e2b99f4ec4fb4084a73be0fe8283b5c619c8b3bc6f06547a46592c3ac74bbaf07c2dd1c1afa1bfb5eb98aa6f0719
7
+ data.tar.gz: 52c27cda5f0e20b7d7b1a7f99c0fc505e2250d6a8263850bb889fb1841ad95984f0f67c3e1d7203c3efac00284fd3d8f92d7fbb36df8881f16586bf4610f9333
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  ## CHANGELOG
2
2
 
3
+ ### 0.3.0
4
+
5
+ * Add soft-hyphen to single byte encodings
6
+ * Add format? property (e.g. RLM)
7
+ * Support more encodings: IBMX, CP85X, macX, TIS-620, Windows-874, KOI8-X
8
+
3
9
  ### 0.2.0
4
10
 
5
11
  * Fix detection of supported Windows encodings and some unassigned codepoints
data/README.md CHANGED
@@ -26,6 +26,7 @@ char_info.unicode? # => true / false
26
26
  char_info.assigned? # => true / false
27
27
  char_info.control? # => true / false
28
28
  char_info.blank? # => true / false
29
+ char_info.format? # => true / false
29
30
  ```
30
31
 
31
32
  ## Types of Encodings
@@ -35,7 +36,7 @@ This library knows of four different kinds of encodings:
35
36
  - **:unicode** Unicode familiy of multibyte encodings (*UTF-X*)
36
37
  - **:ascii** 7-Bit ASCII (*US-ASCII*)
37
38
  - **:binary** Arbitrary string (*ASCII-8BIT*)
38
- - **:byte** Known byte encoding (*ISO-8859-X*, *Windows-125X*)
39
+ - **:byte** Known single byte encoding (*ISO-8859-X*, *Windows-125X*, *IBMX*, *CP85X*, *macX*, *TIS-620*, *Windows-874*, **KOI-X**)
39
40
 
40
41
  Other encodings are not supported, yet.
41
42
 
@@ -51,7 +52,7 @@ Validness is determined by Ruby's String#valid_encoding?
51
52
 
52
53
  ### `control?`
53
54
 
54
- Control characters are codepoints in the is [C0, delete or C1 control character range](https://en.wikipedia.org/wiki/C0_and_C1_control_codes).
55
+ Control characters are codepoints in the is [C0, delete or C1 control character range](https://en.wikipedia.org/wiki/C0_and_C1_control_codes). Characters in this range of [IBM codepage 437](https://en.wikipedia.org/wiki/Code_page_437) based encodings are always treated as control characters.
55
56
 
56
57
  ### `assigned?`
57
58
 
@@ -63,6 +64,10 @@ Control characters are codepoints in the is [C0, delete or C1 control character
63
64
 
64
65
  The library includes a list of characters that might not be rendered visually. This list does not include unassigned codepoints, control characters (except for `\t`, `\n`, `\v`, `\f`, `\r`), or special formatting characters (right-to-left marker, variation selectors, etc).
65
66
 
67
+ ### `format?`
68
+
69
+ This flag is `true` only for special formatting characters, which are not control characters, like Right-to-left marks. In Unicode, this means codepoints with the General Category of **Cf**.
70
+
66
71
  ## Todo
67
72
 
68
73
  - Support all non-dummy encodings that Ruby supports
@@ -14,7 +14,7 @@ class Characteristics
14
14
  :binary
15
15
  when /^UTF-/
16
16
  :unicode
17
- when /^ISO-8859-/, /^Windows-125/
17
+ when /^ISO-8859-/, /^Windows-125/, /^(IBM|CP85)/, /^mac/, 'TIS-620', 'Windows-874', /^KOI8-/
18
18
  :byte
19
19
  else
20
20
  raise ArgumentError, "encoding <#{encoding_name}> not supported"
@@ -41,6 +41,8 @@ class Characteristics
41
41
  attr_reader :encoding
42
42
 
43
43
  def initialize(char)
44
+ raise ArgumentError, "Do not use abstract Characteristics.new(char) directly, please use Characteristics.create(char)" if self.class == Characteristics
45
+
44
46
  @is_valid = char.valid_encoding?
45
47
  @encoding = char.encoding
46
48
  @encoding_name = @encoding.name
@@ -51,7 +53,6 @@ class Characteristics
51
53
  end
52
54
 
53
55
  def unicode?
54
- false
55
56
  end
56
57
 
57
58
  def assigned?
@@ -62,4 +63,7 @@ class Characteristics
62
63
 
63
64
  def blank?
64
65
  end
66
+
67
+ def format?
68
+ end
65
69
  end
@@ -16,6 +16,10 @@ class AsciiCharacteristics < Characteristics
16
16
  @ord = char.ord if @is_valid
17
17
  end
18
18
 
19
+ def unicode?
20
+ false
21
+ end
22
+
19
23
  def assigned?
20
24
  true
21
25
  end
@@ -39,4 +43,8 @@ class AsciiCharacteristics < Characteristics
39
43
  def blank?
40
44
  @is_valid && ( BLANKS.include?(@ord) || SEPARATORS.include?(@ord) )
41
45
  end
46
+
47
+ def format?
48
+ false
49
+ end
42
50
  end
@@ -21,6 +21,10 @@ class BinaryCharacteristics < Characteristics
21
21
  true
22
22
  end
23
23
 
24
+ def unicode?
25
+ false
26
+ end
27
+
24
28
  def assigned?
25
29
  true
26
30
  end
@@ -40,4 +44,8 @@ class BinaryCharacteristics < Characteristics
40
44
  def blank?
41
45
  BLANKS.include?(@ord) || SEPARATORS.include?(@ord)
42
46
  end
47
+
48
+ def format?
49
+ false
50
+ end
43
51
  end
@@ -1,29 +1,42 @@
1
1
  class ByteCharacteristics < Characteristics
2
- HAS_C1 = /^(ISO-8859-)/
2
+ HAS_C1 = /^(ISO-8859-|TIS-620)/
3
3
 
4
4
  UNASSIGNED = {
5
- 0x81 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258))/,
6
- 0x83 => /^(Windows-(1250|1257))/,
7
- 0x88 => /^(Windows-(1250|1253|1257))/,
8
- 0x8A => /^(Windows-(1253|1255|1257|1258))/,
9
- 0x8C => /^(Windows-(1253|1255|1257))/,
10
- 0x8D => /^(Windows-(1252|1253|1254|1255|1258))/,
11
- 0x8E => /^(Windows-(1253|1254|1255|1258))/,
12
- 0x8F => /^(Windows-(1252|1253|1254|1255|1258))/,
13
-
14
- 0x90 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258))/,
15
- 0x98 => /^(Windows-(1250|1251|1253|1257))/,
16
- 0x9A => /^(Windows-(1253|1255|1257|1258))/,
17
- 0x9C => /^(Windows-(1253|1255|1257))/,
18
- 0x9D => /^(Windows-(1252|1253|1254|1255|1258))/,
19
- 0x9E => /^(Windows-(1253|1254|1255|1258))/,
20
- 0x9F => /^(Windows-(1253|1255|1257))/,
21
-
5
+ 0x80 => /^(IBM869)/,
6
+ 0x81 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258)|IBM869|Windows-874)/,
7
+ 0x82 => /^(IBM869|Windows-874)/,
8
+ 0x83 => /^(Windows-(1250|1257)|IBM869|Windows-874)/,
9
+ 0x84 => /^(IBM869|Windows-874)/,
10
+ 0x85 => /^(IBM869)/,
11
+ 0x86 => /^(Windows-874)/,
12
+ 0x87 => /^(IBM869|Windows-874)/,
13
+ 0x88 => /^(Windows-(1250|1253|1257)|Windows-874)/,
14
+ 0x89 => /^(Windows-874)/,
15
+ 0x8A => /^(Windows-(1253|1255|1257|1258)|Windows-874)/,
16
+ 0x8B => /^(Windows-874)/,
17
+ 0x8C => /^(Windows-(1253|1255|1257)|Windows-874)/,
18
+ 0x8D => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/,
19
+ 0x8E => /^(Windows-(1253|1254|1255|1258)|Windows-874)/,
20
+ 0x8F => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/,
21
+
22
+ 0x90 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258)|macThai|Windows-874)/,
23
+ 0x93 => /^(IBM869)/,
24
+ 0x94 => /^(IBM869)/,
25
+ 0x98 => /^(Windows-(1250|1251|1253|1257)|Windows-874)/,
26
+ 0x99 => /^(Windows-874)/,
27
+ 0x9A => /^(Windows-(1253|1255|1257|1258)|Windows-874)/,
28
+ 0x9B => /^(IBM864|Windows-874)/,
29
+ 0x9C => /^(Windows-(1253|1255|1257)|IBM864|Windows-874)/,
30
+ 0x9D => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/,
31
+ 0x9E => /^(Windows-(1253|1254|1255|1258)|Windows-874)/,
32
+ 0x9F => /^(Windows-(1253|1255|1257)|IBM864|macThai|Windows-874)/,
33
+
34
+ 0xA0 => /^(TIS-620)/,
22
35
  0xA1 => /^(ISO-8859-(6|8)|Windows-(1257))/,
23
36
  0xA2 => /^(ISO-8859-(6))/,
24
37
  0xA3 => /^(ISO-8859-(6))/,
25
38
  0xA5 => /^(ISO-8859-(3|6)|Windows-(1257))/,
26
- 0xA6 => /^(ISO-8859-(6))/,
39
+ 0xA6 => /^(ISO-8859-(6)|IBM864)/,
27
40
  0xA7 => /^(ISO-8859-(6))/,
28
41
  0xA8 => /^(ISO-8859-(6))/,
29
42
  0xA9 => /^(ISO-8859-(6))/,
@@ -70,34 +83,36 @@ class ByteCharacteristics < Characteristics
70
83
  0xD2 => /^(ISO-8859-(7|8)|Windows-(1253))/,
71
84
  0xD3 => /^(ISO-8859-(8))/,
72
85
  0xD4 => /^(ISO-8859-(8))/,
73
- 0xD5 => /^(ISO-8859-(8))/,
86
+ 0xD5 => /^(ISO-8859-(8)|IBM857)/, # IBM857: Ruby does not support euro sign
74
87
  0xD6 => /^(ISO-8859-(8))/,
75
88
  0xD7 => /^(ISO-8859-(8))/,
76
89
  0xD8 => /^(ISO-8859-(8))/,
77
90
  0xD9 => /^(ISO-8859-(8)|Windows-(1255))/,
78
91
  0xDA => /^(ISO-8859-(8)|Windows-(1255))/,
79
- 0xDB => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
80
- 0xDC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
81
- 0xDD => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
82
- 0xDE => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
92
+ 0xDB => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
93
+ 0xDC => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
94
+ 0xDD => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
95
+ 0xDE => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/,
83
96
  0xDF => /^(ISO-8859-(6)|Windows-(1255))/,
84
97
 
85
98
  0xE3 => /^(ISO-8859-(3))/,
99
+ 0xE7 => /^(IBM857)/,
86
100
 
87
- 0xF0 => /^(ISO-8859-(3))/,
101
+ 0xF0 => /^(ISO-8859-(3))/, # mac: Treating F0 as always assigned
102
+ 0xF2 => /^(IBM857)/,
88
103
  0xF3 => /^(ISO-8859-(6))/,
89
104
  0xF4 => /^(ISO-8859-(6))/,
90
- 0xF5 => /^(ISO-8859-(6))/,
105
+ 0xF5 => /^(ISO-8859-(6)|macTurkish)/,
91
106
  0xF6 => /^(ISO-8859-(6))/,
92
107
  0xF7 => /^(ISO-8859-(6))/,
93
108
  0xF8 => /^(ISO-8859-(6))/,
94
109
  0xF9 => /^(ISO-8859-(6))/,
95
110
  0xFA => /^(ISO-8859-(6))/,
96
111
  0xFB => /^(ISO-8859-(6|8)|Windows-(1255))/,
97
- 0xFC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
98
- 0xFD => /^(ISO-8859-(6|11))/,
99
- 0xFE => /^(ISO-8859-(6|11))/,
100
- 0xFF => /^(ISO-8859-(6|7|8|11)|Windows-(1253|1255))/,
112
+ 0xFC => /^(ISO-8859-(6|8|11)|Windows-(1255)|macThai|TIS-620|Windows-874)/,
113
+ 0xFD => /^(ISO-8859-(6|11)|macThai|TIS-620|Windows-874)/,
114
+ 0xFE => /^(ISO-8859-(6|11)|macThai|TIS-620|Windows-874)/,
115
+ 0xFF => /^(ISO-8859-(6|7|8|11)|Windows-(1253|1255)|IBM864|macGreek|macThai|TIS-620|Windows-874)/, # macGreek: Ruby does not know of soft hyphen at FF
101
116
  }.freeze
102
117
 
103
118
  BLANKS = [
@@ -113,9 +128,22 @@ class ByteCharacteristics < Characteristics
113
128
  ].freeze
114
129
 
115
130
  EXTRA_BLANKS = {
116
- 0xA0 => /^(ISO-8859-|Windows-125)/,
131
+ 0xA0 => /^(ISO-8859-|Windows-125|macThai|Windows-874)/,
132
+ 0xA1 => /^IBM864/,
133
+ 0xAD => /^(ISO-8859-(?!11)|Windows-125)/,
134
+ 0x9A => /^KOI8-/,
117
135
  0x9D => /^Windows-(1256)/,
118
- 0x9F => /^Windows-(1256)/,
136
+ 0x9E => /^Windows-(1256)/,
137
+ 0xCA => /^mac(?!Thai)/,
138
+ 0xDB => /^macThai/,
139
+ 0xDC => /^macThai/,
140
+ 0xF0 => /^(IBM(?!437|737|86)|IBM869|CP)/,
141
+ 0xFF => /^(IBM(?!864)|CP)/, # |macGreek, but is unnasigned in Ruby
142
+ }.freeze
143
+
144
+ FORMATS = {
145
+ 0xFD => /^(ISO-8859-8|Windows-(1255|1256))/,
146
+ 0xFE => /^(ISO-8859-8|Windows-(1255|1256))/,
119
147
  }.freeze
120
148
 
121
149
  def initialize(char)
@@ -124,12 +152,10 @@ class ByteCharacteristics < Characteristics
124
152
  end
125
153
 
126
154
  def encoding_has_c0?
127
- # !!(HAS_C0 =~ @encoding_name)
128
155
  true
129
156
  end
130
157
 
131
158
  def encoding_has_delete?
132
- # !!(HAS_C0 =~ @encoding_name)
133
159
  true
134
160
  end
135
161
 
@@ -137,6 +163,10 @@ class ByteCharacteristics < Characteristics
137
163
  !!(HAS_C1 =~ @encoding_name)
138
164
  end
139
165
 
166
+ def unicode?
167
+ false
168
+ end
169
+
140
170
  def assigned?
141
171
  control? || UNASSIGNED[@ord] !~ @encoding_name
142
172
  end
@@ -162,4 +192,8 @@ class ByteCharacteristics < Characteristics
162
192
  SEPARATORS.include?(@ord) ||
163
193
  EXTRA_BLANKS[@ord] =~ @encoding_name
164
194
  end
195
+
196
+ def format?
197
+ FORMATS[@ord] =~ @encoding_name
198
+ end
165
199
  end
@@ -100,4 +100,8 @@ class UnicodeCharacteristics < Characteristics
100
100
  def blank?
101
101
  @is_valid && ( BLANKS.include?(@ord) || SEPARATORS.include?(@ord) )
102
102
  end
103
+
104
+ def format?
105
+ @is_valid && @category == "Cf"
106
+ end
103
107
  end
@@ -1,4 +1,4 @@
1
1
  class Characteristics
2
- VERSION = "0.2.0".freeze
2
+ VERSION = "0.3.0".freeze
3
3
  end
4
4
 
@@ -18,6 +18,10 @@ describe Characteristics do
18
18
  Characteristics.create(char.force_encoding(encoding)).blank?
19
19
  end
20
20
 
21
+ def format?(char)
22
+ Characteristics.create(char.force_encoding(encoding)).format?
23
+ end
24
+
21
25
  describe UnicodeCharacteristics do
22
26
  describe "UTF-*" do
23
27
  let(:encoding) { "UTF-8" }
@@ -43,6 +47,11 @@ describe Characteristics do
43
47
  assert blank? "\x20"
44
48
  refute blank? "\x21"
45
49
  end
50
+
51
+ it "is format or not" do
52
+ assert format? "\uFFF9"
53
+ refute format? "\x21"
54
+ end
46
55
  end
47
56
  end
48
57
 
@@ -68,6 +77,10 @@ describe Characteristics do
68
77
  assert blank? "\x20"
69
78
  refute blank? "\x21"
70
79
  end
80
+
81
+ it "is never format" do
82
+ refute format? "\x21"
83
+ end
71
84
  end
72
85
  end
73
86
 
@@ -94,6 +107,10 @@ describe Characteristics do
94
107
  assert blank? "\x20"
95
108
  refute blank? "\x21"
96
109
  end
110
+
111
+ it "is never format" do
112
+ refute format? "\x21"
113
+ end
97
114
  end
98
115
  end
99
116
 
@@ -122,6 +139,10 @@ describe Characteristics do
122
139
  assert blank? "\x20"
123
140
  refute blank? "\x21"
124
141
  end
142
+
143
+ it "is never format" do
144
+ refute format? "\x21"
145
+ end
125
146
  end
126
147
 
127
148
  # TODO
@@ -142,7 +163,7 @@ describe Characteristics do
142
163
  # describe "ISO-8859-16" do
143
164
  end
144
165
 
145
- describe "Windows-*" do
166
+ describe "Windows-125*" do
146
167
  describe "Windows-1252" do
147
168
  let(:encoding) { "Windows-1252" }
148
169
 
@@ -164,6 +185,10 @@ describe Characteristics do
164
185
  assert blank? "\x20"
165
186
  refute blank? "\x21"
166
187
  end
188
+
189
+ it "is never format" do
190
+ refute format? "\x21"
191
+ end
167
192
  end
168
193
 
169
194
  # TODO
@@ -177,5 +202,199 @@ describe Characteristics do
177
202
  # describe "Windows-1257" do
178
203
  # describe "Windows-1258" do
179
204
  end
205
+
206
+ describe "IBM*, CP85*" do
207
+ describe "IBM869" do
208
+ let(:encoding) { "IBM869" }
209
+
210
+ it "is always valid" do
211
+ assert valid? "\x80"
212
+ end
213
+
214
+ it "is assigned or not" do
215
+ assert assigned? "\x21"
216
+ refute assigned? "\x80"
217
+ end
218
+
219
+ it "is control or not" do
220
+ assert control? "\x1E"
221
+ refute control? "\x67"
222
+ end
223
+
224
+ it "is blank or not" do
225
+ assert blank? "\x20"
226
+ refute blank? "\x21"
227
+ end
228
+
229
+ it "is never format" do
230
+ refute format? "\x21"
231
+ end
232
+ end
233
+
234
+ # describe "IBM437" do
235
+ # describe "IBM737" do
236
+ # describe "IBM775" do
237
+ # describe "CP850" do
238
+ # describe "IBM852" do
239
+ # describe "CP852" do
240
+ # describe "IBM855" do
241
+ # describe "CP855" do
242
+ # describe "IBM857" do
243
+ # describe "IBM860" do
244
+ # describe "IBM861" do
245
+ # describe "IBM862" do
246
+ # describe "IBM863" do
247
+ # describe "IBM864" do
248
+ # describe "IBM865" do
249
+ # describe "IBM866" do
250
+ end
251
+
252
+ describe "mac*" do
253
+ describe "macRoman" do
254
+ let(:encoding) { "macRoman" }
255
+
256
+ it "is always valid" do
257
+ assert valid? "\x80"
258
+ end
259
+
260
+ it "is always assigned" do
261
+ assert assigned? "\x21"
262
+ end
263
+
264
+ it "is control or not" do
265
+ assert control? "\x1E"
266
+ refute control? "\x67"
267
+ end
268
+
269
+ it "is blank or not" do
270
+ assert blank? "\x20"
271
+ refute blank? "\x21"
272
+ end
273
+
274
+ it "is never format" do
275
+ refute format? "\x21"
276
+ end
277
+ end
278
+
279
+ # describe "macCentEuro" do
280
+ # describe "macCroatian" do
281
+ # describe "macCyrillic" do
282
+ # describe "macGreek" do
283
+ # describe "macIceland" do
284
+ # describe "macRomania" do
285
+ # describe "macThai" do
286
+ # describe "macTurkish" do
287
+ # describe "macUkraine" do
288
+ end
289
+
290
+ describe "TIS-620/Windows-874" do
291
+ describe "TIS-620" do
292
+ let(:encoding) { "TIS-620" }
293
+
294
+ it "is always valid" do
295
+ assert valid? "\x80"
296
+ end
297
+
298
+ it "is assigned or not" do
299
+ assert assigned? "\x21"
300
+ refute assigned? "\xA0"
301
+ end
302
+
303
+ it "is control or not" do
304
+ assert control? "\x1E"
305
+ refute control? "\x67"
306
+ end
307
+
308
+ it "is blank or not" do
309
+ assert blank? "\x20"
310
+ refute blank? "\x21"
311
+ end
312
+
313
+ it "is never format" do
314
+ refute format? "\x21"
315
+ end
316
+ end
317
+
318
+ describe "Windows-874" do
319
+ let(:encoding) { "Windows-874" }
320
+
321
+ it "is always valid" do
322
+ assert valid? "\x80"
323
+ end
324
+
325
+ it "is assigned or not" do
326
+ assert assigned? "\xA0"
327
+ refute assigned? "\x99"
328
+ end
329
+
330
+ it "is control or not" do
331
+ assert control? "\x1E"
332
+ refute control? "\x67"
333
+ end
334
+
335
+ it "is blank or not" do
336
+ assert blank? "\x20"
337
+ refute blank? "\x21"
338
+ end
339
+
340
+ it "is never format" do
341
+ refute format? "\x21"
342
+ end
343
+ end
344
+ end
345
+
346
+ describe "KOI8-*" do
347
+ describe "KOI8-R" do
348
+ let(:encoding) { "KOI8-R" }
349
+
350
+ it "is always valid" do
351
+ assert valid? "\x80"
352
+ end
353
+
354
+ it "is always assigned" do
355
+ assert assigned? "\x21"
356
+ end
357
+
358
+ it "is control or not" do
359
+ assert control? "\x1E"
360
+ refute control? "\x67"
361
+ end
362
+
363
+ it "is blank or not" do
364
+ assert blank? "\x20"
365
+ refute blank? "\x21"
366
+ end
367
+
368
+ it "is never format" do
369
+ refute format? "\x21"
370
+ end
371
+ end
372
+
373
+ describe "KOI8-U" do
374
+ let(:encoding) { "KOI8-U" }
375
+
376
+ it "is always valid" do
377
+ assert valid? "\x80"
378
+ end
379
+
380
+ it "is always assigned" do
381
+ assert assigned? "\x21"
382
+ end
383
+
384
+ it "is control or not" do
385
+ assert control? "\x1E"
386
+ refute control? "\x67"
387
+ end
388
+
389
+ it "is blank or not" do
390
+ assert blank? "\x20"
391
+ refute blank? "\x21"
392
+ end
393
+
394
+ it "is never format" do
395
+ refute format? "\x21"
396
+ end
397
+ end
398
+ end
180
399
  end
181
400
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: characteristics
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-13 00:00:00.000000000 Z
11
+ date: 2017-03-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: unicode-categories