unibits 2.0.0 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -1
- data/Gemfile.lock +3 -3
- data/README.md +5 -4
- data/bin/unibits +9 -2
- data/lib/unibits/symbolify.rb +28 -1
- data/lib/unibits/version.rb +1 -1
- data/lib/unibits.rb +94 -87
- data/spec/unibits_spec.rb +58 -2
- data/unibits.gemspec +2 -2
- metadata +4 -13
- data/screenshots/ascii.invalid.png +0 -0
- data/screenshots/ascii.png +0 -0
- data/screenshots/binary.png +0 -0
- data/screenshots/utf-16be.png +0 -0
- data/screenshots/utf-16le.png +0 -0
- data/screenshots/utf-32be.png +0 -0
- data/screenshots/utf-32le.png +0 -0
- data/screenshots/utf-8.invalid.png +0 -0
- data/screenshots/utf-8.png +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 93d43c363159fed623abd9acd021e0aa6ed90061
|
4
|
+
data.tar.gz: 7fe58c18ca55c1a595345235fa3cf6fa9b0d3635
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b580cbfc775274bd9790e0b000a039ed304ffe4e5f41349f5e445118154b9351d5084dfe204c77203b0f4dd4b3e777fa3119e3b47bbba0a422df89bdfec6bad
|
7
|
+
data.tar.gz: 54da485c62dcf7390dde62460d977472130e1c3c6d091e3fa3a71be07c3854260c06c2eb067f7ac2abc96d934e69c6393861c06197a519d1b432f541bc9cf0f8
|
data/CHANGELOG.md
CHANGED
@@ -1,10 +1,16 @@
|
|
1
1
|
## CHANGELOG
|
2
2
|
|
3
|
+
### 2.1.0
|
4
|
+
|
5
|
+
* Support more encoding: IBMX, CP85X, macX, TIS-620/Windows-874, and KOI8-X
|
6
|
+
* Highlight non-control formatting characters in pink
|
7
|
+
* Improve `unibits --help` command
|
8
|
+
|
3
9
|
### 2.0.0
|
4
10
|
|
5
11
|
* Support more encodings: ISO-8859-X and Windows-125X
|
6
12
|
* Add three HANGUL characters (U+115F, U+1160, U+3164) to list of possible white spaces
|
7
|
-
* Move character handling to separate gem. It is called
|
13
|
+
* Move character handling to separate gem. It is called [characteristics](https://github.com/janlelis/characteristics).
|
8
14
|
* Highlight control chars in blue and blanks in light blue
|
9
15
|
* Handle encodings that are not convertible to UTF-8
|
10
16
|
|
data/Gemfile.lock
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
unibits (2.
|
5
|
-
characteristics (~> 0.
|
4
|
+
unibits (2.1.0)
|
5
|
+
characteristics (~> 0.3.0)
|
6
6
|
paint (>= 0.9, < 3.0)
|
7
7
|
rationalist (~> 2.0)
|
8
8
|
unicode-display_width (~> 1.1)
|
@@ -17,7 +17,7 @@ GEM
|
|
17
17
|
boson-more (0.2.2)
|
18
18
|
boson (>= 1.2.0)
|
19
19
|
cd (1.0.1)
|
20
|
-
characteristics (0.
|
20
|
+
characteristics (0.3.0)
|
21
21
|
unicode-categories (~> 1.1, >= 1.1.2)
|
22
22
|
clipboard (1.0.6)
|
23
23
|
coderay (1.1.1)
|
data/README.md
CHANGED
@@ -1,20 +1,21 @@
|
|
1
1
|
# unibits | Reveal the Unicode [![[version]](https://badge.fury.io/rb/unibits.svg)](http://badge.fury.io/rb/unibits) [![[travis]](https://travis-ci.org/janlelis/unibits.svg)](https://travis-ci.org/janlelis/unibits)
|
2
2
|
|
3
|
-
Ruby library and CLI command that visualizes various Unicode and ASCII encodings in the terminal:
|
3
|
+
Ruby library and CLI command that visualizes various Unicode and ASCII/single byte encodings in the terminal:
|
4
4
|
|
5
5
|
- Makes analyzing encodings easier
|
6
6
|
- Helps you with debugging strings
|
7
|
-
- Supports **UTF-8**, **UTF-16LE**/**UTF-16BE**, **UTF-32LE**/**UTF-32BE**, **ISO-8859-X**, **Windows-125X** arbitrary **BINARY** data, and **ASCII**
|
8
|
-
- Highlights invalid
|
7
|
+
- Supports **UTF-8**, **UTF-16LE**/**UTF-16BE**, **UTF-32LE**/**UTF-32BE**, **ISO-8859-X**, **Windows-125X**, **IBMX**, **CP85X**, **macX**, **TIS-620**/**Windows-874**, **KOI8-R**/**KOI8-U**, arbitrary **BINARY** data, and 7-Bit **ASCII**
|
8
|
+
- Highlights invalid/special/blank bytes/characters/codepoints
|
9
9
|
|
10
10
|
## Color Coding
|
11
11
|
|
12
|
-
Each byte of the given string is highlighted using the following mechanism:
|
12
|
+
Each byte of the given string is highlighted using the following mechanism (characters -> codepoints):
|
13
13
|
|
14
14
|
- Red for invalid bytes
|
15
15
|
- Orange for unassigned bytes/characters
|
16
16
|
- Blue for control characters
|
17
17
|
- Light blue for blanks
|
18
|
+
- Non-control formatting characters in pink
|
18
19
|
- Random color for all other characters
|
19
20
|
|
20
21
|
## Setup
|
data/bin/unibits
CHANGED
@@ -33,7 +33,7 @@ if argv[:help]
|
|
33
33
|
|
34
34
|
#{Paint["DESCRIPTION", :underline]}
|
35
35
|
|
36
|
-
Visualizes
|
36
|
+
Visualizes Unicode and ASCII/single byte encodings in the terminal.
|
37
37
|
|
38
38
|
#{Paint["USAGE", :underline]}
|
39
39
|
|
@@ -48,7 +48,14 @@ if argv[:help]
|
|
48
48
|
|
49
49
|
#{Paint["ENCODINGS", :underline]}
|
50
50
|
|
51
|
-
#{Unibits::SUPPORTED_ENCODINGS.join(', ')}
|
51
|
+
#{Unibits::SUPPORTED_ENCODINGS.join(', ').scan(/.{,80}(?> |\z)/).join("\n ")}
|
52
|
+
#{Paint["COLOR CODING", :underline]}
|
53
|
+
|
54
|
+
#{Paint["invalid", Unibits::COLORS[:invalid]]}
|
55
|
+
#{Paint["unassigned", Unibits::COLORS[:unassigned]]}
|
56
|
+
#{Paint["control", Unibits::COLORS[:control]]}
|
57
|
+
#{Paint["blank", Unibits::COLORS[:blank]]}
|
58
|
+
#{Paint["format", Unibits::COLORS[:format]]}
|
52
59
|
|
53
60
|
#{Paint["STATS", :underline]}
|
54
61
|
|
data/lib/unibits/symbolify.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module Unibits
|
2
2
|
module Symbolify
|
3
|
-
NO_UTF8_CONVERTER = /^Windows-1258/
|
3
|
+
NO_UTF8_CONVERTER = /^(Windows-1258|IBM864|macCentEuro|macThai)/
|
4
4
|
ASCII_CHARS = "\x20-\x7E".freeze
|
5
5
|
ASCII_CONTROL_CODEPOINTS = "\x00-\x1F\x7F".freeze
|
6
6
|
ASCII_CONTROL_SYMBOLS = "\u{2400}-\u{241F}\u{2421}".freeze
|
@@ -354,6 +354,27 @@ module Unibits
|
|
354
354
|
0xE01EF => "VS256",
|
355
355
|
}.freeze
|
356
356
|
|
357
|
+
INTERESTING_BYTES_ENCODINGS = {
|
358
|
+
0xD8 => /^macCroatian/,
|
359
|
+
0xF0 => /^mac(Iceland|Roman|Turkish)/,
|
360
|
+
0xFD => /^(ISO-8859-8|Windows-(1255|1256))/,
|
361
|
+
0xFE => /^(ISO-8859-8|Windows-(1255|1256))/,
|
362
|
+
}.freeze
|
363
|
+
|
364
|
+
INTERESTING_BYTES_VALUES = {
|
365
|
+
0xD8 => "Logo",
|
366
|
+
0xF0 => "Logo",
|
367
|
+
0xFD => "LRM",
|
368
|
+
0xFE => "RLM",
|
369
|
+
}.freeze
|
370
|
+
|
371
|
+
MAC_KEY_SYMBOLS = {
|
372
|
+
0x11 => "⌘",
|
373
|
+
0x12 => "⇧",
|
374
|
+
0x13 => "⌥",
|
375
|
+
0x14 => "⌃",
|
376
|
+
}
|
377
|
+
|
357
378
|
def self.symbolify(char, char_info)
|
358
379
|
if !char_info.valid?
|
359
380
|
"�"
|
@@ -407,13 +428,19 @@ module Unibits
|
|
407
428
|
if char_info.delete?
|
408
429
|
char = CONTROL_DELETE_SYMBOL
|
409
430
|
elsif char_info.c0?
|
431
|
+
if ord >= 0x11 && ord <= 0x14 && encoding.name =~ /^mac/
|
432
|
+
char = MAC_KEY_SYMBOLS[ord]
|
433
|
+
else
|
410
434
|
char = CONTROL_C0_SYMBOLS[ord]
|
435
|
+
end
|
411
436
|
elsif char_info.c1?
|
412
437
|
char = CONTROL_C1_NAMES[ord]
|
413
438
|
elsif no_converter
|
414
439
|
treat_char_unconverted = true
|
415
440
|
elsif char_info.blank?
|
416
441
|
char = "]".encode(encoding) + char + "[".encode(encoding)
|
442
|
+
elsif INTERESTING_BYTES_ENCODINGS[ord] =~ encoding.name
|
443
|
+
char = INTERESTING_BYTES_VALUES[ord]
|
417
444
|
end
|
418
445
|
|
419
446
|
if no_converter && treat_char_unconverted
|
data/lib/unibits/version.rb
CHANGED
data/lib/unibits.rb
CHANGED
@@ -7,17 +7,31 @@ require "unicode/display_width"
|
|
7
7
|
require "characteristics"
|
8
8
|
|
9
9
|
module Unibits
|
10
|
-
SUPPORTED_ENCODINGS =
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
10
|
+
SUPPORTED_ENCODINGS = Encoding.name_list.grep(
|
11
|
+
Regexp.union(
|
12
|
+
/^UTF-8$/,
|
13
|
+
/^UTF-...E$/,
|
14
|
+
/^ASCII-8BIT$/,
|
15
|
+
/^US-ASCII$/,
|
16
|
+
/^ISO-8859-/,
|
17
|
+
/^Windows-125/,
|
18
|
+
/^IBM/,
|
19
|
+
/^CP85/,
|
20
|
+
/^mac/,
|
21
|
+
/^TIS-620$/,
|
22
|
+
/^Windows-874$/,
|
23
|
+
/^KOI8/,
|
24
|
+
)
|
25
|
+
).sort.freeze
|
26
|
+
|
27
|
+
COLORS = {
|
28
|
+
invalid: "#FF0000",
|
29
|
+
unassigned: "#FF5500",
|
30
|
+
control: "#0000FF",
|
31
|
+
blank: "#33AADD",
|
32
|
+
format: "#FF00FF",
|
33
|
+
}
|
34
|
+
|
21
35
|
DEFAULT_TERMINAL_WIDTH = 80
|
22
36
|
|
23
37
|
def self.of(string, encoding: nil, convert: nil, stats: true, wide_ambiguous: false, width: nil)
|
@@ -69,7 +83,7 @@ module Unibits
|
|
69
83
|
|
70
84
|
current_encoding_error = nil if char_info.valid?
|
71
85
|
|
72
|
-
char.each_byte.with_index{ |byte,
|
86
|
+
char.each_byte.with_index{ |byte, byteindex|
|
73
87
|
if Paint.unpaint(hex_buffer[-1]).bytesize > cols - 12
|
74
88
|
cp_buffer << " "
|
75
89
|
enc_buffer << " "
|
@@ -78,7 +92,7 @@ module Unibits
|
|
78
92
|
separator << " "
|
79
93
|
end
|
80
94
|
|
81
|
-
if
|
95
|
+
if byteindex == 0
|
82
96
|
if char_info.valid?
|
83
97
|
codepoint = "U+%04X" % char.ord
|
84
98
|
else
|
@@ -178,9 +192,7 @@ module Unibits
|
|
178
192
|
end
|
179
193
|
end
|
180
194
|
|
181
|
-
cp_buffer[-1] << Paint[
|
182
|
-
codepoint.ljust(10), current_color, :bold
|
183
|
-
]
|
195
|
+
cp_buffer[-1] << Paint[ codepoint.ljust(10), current_color, :bold ]
|
184
196
|
|
185
197
|
symbolified_char = Symbolify.symbolify(char, char_info)
|
186
198
|
|
@@ -190,80 +202,16 @@ module Unibits
|
|
190
202
|
padding = 10 - symbolified_char.size
|
191
203
|
end
|
192
204
|
|
193
|
-
enc_buffer[-1] << Paint[
|
194
|
-
symbolified_char, current_color
|
195
|
-
]
|
205
|
+
enc_buffer[-1] << Paint[ symbolified_char, current_color ]
|
196
206
|
enc_buffer[-1] << " " * padding if padding > 0
|
197
207
|
else
|
198
208
|
cp_buffer[-1] << " " * 10
|
199
209
|
enc_buffer[-1] << " " * 10
|
200
210
|
end
|
201
211
|
|
202
|
-
hex_buffer[-1] << Paint[
|
203
|
-
("%02X" % byte).ljust(10, " "), current_color
|
204
|
-
]
|
205
|
-
|
206
|
-
bin_byte_complete = byte.to_s(2).rjust(8, "0")
|
207
|
-
|
208
|
-
if !char_info.valid?
|
209
|
-
bin_byte_1 = bin_byte_complete
|
210
|
-
bin_byte_2 = ""
|
211
|
-
else
|
212
|
-
case encoding_name
|
213
|
-
when 'US-ASCII'
|
214
|
-
bin_byte_1 = bin_byte_complete[0...1]
|
215
|
-
bin_byte_2 = bin_byte_complete[1...8]
|
216
|
-
when 'ASCII-8BIT'
|
217
|
-
bin_byte_1 = ""
|
218
|
-
bin_byte_2 = bin_byte_complete
|
219
|
-
when 'UTF-8'
|
220
|
-
if index == 0
|
221
|
-
if bin_byte_complete =~ /^(0|1{2,4}0)([01]+)$/
|
222
|
-
bin_byte_1 = $1
|
223
|
-
bin_byte_2 = $2
|
224
|
-
else
|
225
|
-
bin_byte_1 = ""
|
226
|
-
bin_byte_2 = bin_byte_complete
|
227
|
-
end
|
228
|
-
else
|
229
|
-
bin_byte_1 = bin_byte_complete[0...2]
|
230
|
-
bin_byte_2 = bin_byte_complete[2...8]
|
231
|
-
end
|
232
|
-
when 'UTF-16LE'
|
233
|
-
if char.ord <= 0xFFFF || index == 0 || index == 2
|
234
|
-
bin_byte_1 = ""
|
235
|
-
bin_byte_2 = bin_byte_complete
|
236
|
-
else
|
237
|
-
bin_byte_complete =~ /^(11011[01])([01]+)$/
|
238
|
-
bin_byte_1 = $1
|
239
|
-
bin_byte_2 = $2
|
240
|
-
end
|
241
|
-
when 'UTF-16BE'
|
242
|
-
if char.ord <= 0xFFFF || index == 1 || index == 3
|
243
|
-
bin_byte_1 = ""
|
244
|
-
bin_byte_2 = bin_byte_complete
|
245
|
-
else
|
246
|
-
bin_byte_complete =~ /^(11011[01])([01]+)$/
|
247
|
-
bin_byte_1 = $1
|
248
|
-
bin_byte_2 = $2
|
249
|
-
end
|
250
|
-
when 'UTF-32LE', 'UTF-32BE'
|
251
|
-
bin_byte_1 = ""
|
252
|
-
bin_byte_2 = bin_byte_complete
|
253
|
-
when /^(ISO-8859-|Windows-125)/
|
254
|
-
bin_byte_1 = ""
|
255
|
-
bin_byte_2 = bin_byte_complete
|
256
|
-
end
|
257
|
-
end
|
258
|
-
|
259
|
-
bin_buffer[-1] << Paint[
|
260
|
-
bin_byte_1, current_color
|
261
|
-
] unless !bin_byte_1 || bin_byte_1.empty?
|
262
|
-
|
263
|
-
bin_buffer[-1] << Paint[
|
264
|
-
bin_byte_2, current_color, :underline
|
265
|
-
] unless !bin_byte_2 || bin_byte_2.empty?
|
212
|
+
hex_buffer[-1] << Paint[ ("%02X" % byte).ljust(10, " "), current_color ]
|
266
213
|
|
214
|
+
bin_buffer[-1] << highlight_bits(byte, char, char_info, current_color, byteindex)
|
267
215
|
bin_buffer[-1] << " "
|
268
216
|
}
|
269
217
|
}
|
@@ -283,13 +231,15 @@ module Unibits
|
|
283
231
|
|
284
232
|
def self.determine_char_color(char_info)
|
285
233
|
if !char_info.valid?
|
286
|
-
|
234
|
+
COLORS[:invalid]
|
287
235
|
elsif !char_info.assigned?
|
288
|
-
|
236
|
+
COLORS[:unassigned]
|
289
237
|
elsif char_info.control?
|
290
|
-
|
238
|
+
COLORS[:control]
|
291
239
|
elsif char_info.blank?
|
292
|
-
|
240
|
+
COLORS[:blank]
|
241
|
+
elsif char_info.format?
|
242
|
+
COLORS[:format]
|
293
243
|
else
|
294
244
|
random_color
|
295
245
|
end
|
@@ -298,4 +248,61 @@ module Unibits
|
|
298
248
|
def self.random_color
|
299
249
|
"%.2x%.2x%.2x" % [rand(90) + 60, rand(90) + 60, rand(90) + 60]
|
300
250
|
end
|
251
|
+
|
252
|
+
def self.highlight_bits(byte, char, char_info, current_color, byteindex)
|
253
|
+
bin_byte_complete = byte.to_s(2).rjust(8, "0")
|
254
|
+
|
255
|
+
if !char_info.valid?
|
256
|
+
bin_byte_1 = bin_byte_complete
|
257
|
+
bin_byte_2 = ""
|
258
|
+
else
|
259
|
+
case char_info.encoding.name
|
260
|
+
when 'US-ASCII'
|
261
|
+
bin_byte_1 = bin_byte_complete[0...1]
|
262
|
+
bin_byte_2 = bin_byte_complete[1...8]
|
263
|
+
when 'ASCII-8BIT'
|
264
|
+
bin_byte_1 = ""
|
265
|
+
bin_byte_2 = bin_byte_complete
|
266
|
+
when 'UTF-8'
|
267
|
+
if byteindex == 0
|
268
|
+
if bin_byte_complete =~ /^(0|1{2,4}0)([01]+)$/
|
269
|
+
bin_byte_1 = $1
|
270
|
+
bin_byte_2 = $2
|
271
|
+
else
|
272
|
+
bin_byte_1 = ""
|
273
|
+
bin_byte_2 = bin_byte_complete
|
274
|
+
end
|
275
|
+
else
|
276
|
+
bin_byte_1 = bin_byte_complete[0...2]
|
277
|
+
bin_byte_2 = bin_byte_complete[2...8]
|
278
|
+
end
|
279
|
+
when 'UTF-16LE'
|
280
|
+
if char.ord <= 0xFFFF || byteindex == 0 || byteindex == 2
|
281
|
+
bin_byte_1 = ""
|
282
|
+
bin_byte_2 = bin_byte_complete
|
283
|
+
else
|
284
|
+
bin_byte_complete =~ /^(11011[01])([01]+)$/
|
285
|
+
bin_byte_1 = $1
|
286
|
+
bin_byte_2 = $2
|
287
|
+
end
|
288
|
+
when 'UTF-16BE'
|
289
|
+
if char.ord <= 0xFFFF || byteindex == 1 || byteindex == 3
|
290
|
+
bin_byte_1 = ""
|
291
|
+
bin_byte_2 = bin_byte_complete
|
292
|
+
else
|
293
|
+
bin_byte_complete =~ /^(11011[01])([01]+)$/
|
294
|
+
bin_byte_1 = $1
|
295
|
+
bin_byte_2 = $2
|
296
|
+
end
|
297
|
+
else
|
298
|
+
bin_byte_1 = ""
|
299
|
+
bin_byte_2 = bin_byte_complete
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
res = ""
|
304
|
+
res << Paint[ bin_byte_1, current_color ] unless !bin_byte_1 || bin_byte_1.empty?
|
305
|
+
res << Paint[ bin_byte_2, current_color, :underline ] unless !bin_byte_2 || bin_byte_2.empty?
|
306
|
+
res
|
307
|
+
end
|
301
308
|
end
|
data/spec/unibits_spec.rb
CHANGED
@@ -67,7 +67,7 @@ describe Unibits do
|
|
67
67
|
result.must_match "01000011"
|
68
68
|
end
|
69
69
|
|
70
|
-
it "works with 'ISO-8859-' encodings" do
|
70
|
+
it "works with 'ISO-8859-X' encodings" do
|
71
71
|
string = "\xBC Idiosyncr\xE4tic\n\x91".force_encoding("ISO-8859-1")
|
72
72
|
result = Paint.unpaint(Unibits.visualize(string))
|
73
73
|
result.must_match "BC" # ¼
|
@@ -76,7 +76,7 @@ describe Unibits do
|
|
76
76
|
result.must_match "PU1" # C1 name for \x91
|
77
77
|
end
|
78
78
|
|
79
|
-
it "works with 'Windows-
|
79
|
+
it "works with 'Windows-125X' encodings" do
|
80
80
|
string = "\xBC Idiosyncr\xE4tic\n\x81".force_encoding("Windows-1252")
|
81
81
|
result = Paint.unpaint(Unibits.visualize(string))
|
82
82
|
result.must_match "BC" # ¼
|
@@ -85,6 +85,46 @@ describe Unibits do
|
|
85
85
|
result.must_match "n/a" # \x81 is not assigned
|
86
86
|
end
|
87
87
|
|
88
|
+
it "works with 'IBMX' encodings" do
|
89
|
+
string = "\xFE Idiosyncr\x84tic\n".force_encoding("IBM437")
|
90
|
+
result = Paint.unpaint(Unibits.visualize(string))
|
91
|
+
result.must_match "FE" # ■
|
92
|
+
result.must_match "84" # ä
|
93
|
+
result.must_match "␊" # \n
|
94
|
+
end
|
95
|
+
|
96
|
+
it "works with 'CP85X' encodings" do
|
97
|
+
string = "\xFE Idiosyncr\x84tic\n".force_encoding("CP850")
|
98
|
+
result = Paint.unpaint(Unibits.visualize(string))
|
99
|
+
result.must_match "FE" # ■
|
100
|
+
result.must_match "84" # ä
|
101
|
+
result.must_match "␊" # \n
|
102
|
+
end
|
103
|
+
|
104
|
+
it "works with 'macX' encodings" do
|
105
|
+
string = "\xBD Idiosyncr\x8Atic \x11 \xF0\n".force_encoding("macRoman")
|
106
|
+
result = Paint.unpaint(Unibits.visualize(string))
|
107
|
+
result.must_match "BD" # Ω
|
108
|
+
result.must_match "8A" # ä
|
109
|
+
result.must_match "Logo" # \xF0
|
110
|
+
result.must_match "⌘" # \x11
|
111
|
+
result.must_match "␊" # \n
|
112
|
+
end
|
113
|
+
|
114
|
+
it "works with 'TIS-620/Windows-874' encodings" do
|
115
|
+
string = "\xA4 Idiosyncratic\n".force_encoding("TIS-620")
|
116
|
+
result = Paint.unpaint(Unibits.visualize(string))
|
117
|
+
result.must_match "A4" # ค
|
118
|
+
result.must_match "␊" # \n
|
119
|
+
end
|
120
|
+
|
121
|
+
it "works with 'KOI8-X' encodings" do
|
122
|
+
string = "\xE9\xE4\xE9\xEF\xF3\xF9\xEE\xE3\xF2\xE1\xF4\xE9\xE3\n".force_encoding("KOI8-R")
|
123
|
+
result = Paint.unpaint(Unibits.visualize(string))
|
124
|
+
result.must_match "F9" # Ы
|
125
|
+
result.must_match "␊" # \n
|
126
|
+
end
|
127
|
+
|
88
128
|
describe "invalid UTF-8 encodings" do
|
89
129
|
it "- unexpected continuation byte (1/2)" do
|
90
130
|
string = "abc\x80efg"
|
@@ -218,6 +258,22 @@ describe Unibits do
|
|
218
258
|
result.must_match "incompl."
|
219
259
|
result.must_match "�"
|
220
260
|
end
|
261
|
+
|
262
|
+
# TODO implement when https://bugs.ruby-lang.org/issues/13292 is released
|
263
|
+
|
264
|
+
# it "- too large codepoint" do
|
265
|
+
# string = "\x00\x00\x11\x00".force_encoding("UTF-32LE")
|
266
|
+
# result = Paint.unpaint(Unibits.visualize(string))
|
267
|
+
# result.must_match "�"
|
268
|
+
# result.must_match /toolarge.*toolarge.*toolarge.*toolarge/m
|
269
|
+
# end
|
270
|
+
|
271
|
+
# it "- has surrogate" do
|
272
|
+
# string = "\x00\xD8\x00\x00".force_encoding("UTF-32LE")
|
273
|
+
# result = Paint.unpaint(Unibits.visualize(string))
|
274
|
+
# result.must_match "�"
|
275
|
+
# result.must_match "sur.gate"
|
276
|
+
# end
|
221
277
|
end
|
222
278
|
|
223
279
|
describe "invalid ASCII encodings" do
|
data/unibits.gemspec
CHANGED
@@ -12,14 +12,14 @@ Gem::Specification.new do |gem|
|
|
12
12
|
gem.homepage = "https://github.com/janlelis/unibits"
|
13
13
|
gem.license = "MIT"
|
14
14
|
|
15
|
-
gem.files = Dir["{**/}{.*,*}"].select{ |path| File.file?(path) && path !~ /^pkg/ }
|
15
|
+
gem.files = Dir["{**/}{.*,*}"].select{ |path| File.file?(path) && path !~ /^(pkg|screenshots)/ }
|
16
16
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
19
|
|
20
20
|
gem.add_dependency 'paint', '>= 0.9', '< 3.0'
|
21
21
|
gem.add_dependency 'unicode-display_width', '~> 1.1'
|
22
|
-
gem.add_dependency 'characteristics', '~> 0.
|
22
|
+
gem.add_dependency 'characteristics', '~> 0.3.0'
|
23
23
|
gem.add_dependency 'rationalist', '~> 2.0'
|
24
24
|
|
25
25
|
gem.required_ruby_version = "~> 2.0"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unibits
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: paint
|
@@ -50,14 +50,14 @@ dependencies:
|
|
50
50
|
requirements:
|
51
51
|
- - "~>"
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version: 0.
|
53
|
+
version: 0.3.0
|
54
54
|
type: :runtime
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
57
57
|
requirements:
|
58
58
|
- - "~>"
|
59
59
|
- !ruby/object:Gem::Version
|
60
|
-
version: 0.
|
60
|
+
version: 0.3.0
|
61
61
|
- !ruby/object:Gem::Dependency
|
62
62
|
name: rationalist
|
63
63
|
requirement: !ruby/object:Gem::Requirement
|
@@ -96,15 +96,6 @@ files:
|
|
96
96
|
- lib/unibits/kernel_method.rb
|
97
97
|
- lib/unibits/symbolify.rb
|
98
98
|
- lib/unibits/version.rb
|
99
|
-
- screenshots/ascii.invalid.png
|
100
|
-
- screenshots/ascii.png
|
101
|
-
- screenshots/binary.png
|
102
|
-
- screenshots/utf-16be.png
|
103
|
-
- screenshots/utf-16le.png
|
104
|
-
- screenshots/utf-32be.png
|
105
|
-
- screenshots/utf-32le.png
|
106
|
-
- screenshots/utf-8.invalid.png
|
107
|
-
- screenshots/utf-8.png
|
108
99
|
- spec/unibits_spec.rb
|
109
100
|
- unibits.gemspec
|
110
101
|
homepage: https://github.com/janlelis/unibits
|
Binary file
|
data/screenshots/ascii.png
DELETED
Binary file
|
data/screenshots/binary.png
DELETED
Binary file
|
data/screenshots/utf-16be.png
DELETED
Binary file
|
data/screenshots/utf-16le.png
DELETED
Binary file
|
data/screenshots/utf-32be.png
DELETED
Binary file
|
data/screenshots/utf-32le.png
DELETED
Binary file
|
Binary file
|
data/screenshots/utf-8.png
DELETED
Binary file
|