unibits 2.1.0 → 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +4 -24
- data/lib/unibits.rb +17 -2
- data/lib/unibits/version.rb +1 -1
- data/spec/unibits_spec.rb +20 -15
- data/unibits.gemspec +2 -2
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 62770bc741ec5a7693759d4e888d46cf7b3860f2
|
4
|
+
data.tar.gz: 171c479f1f4ebdcf11bc97c4e4e0b94bac713abb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 793cdfeca34183b1b1f6cc219ddd822b9faac5542e92f1ec1065c3fd6ff07c38762becc5312d9bd29fbe3f976bd6eabb3e4c5e228bcfbe8e6340e90b5407234e
|
7
|
+
data.tar.gz: 9691e9072ff3084706e5dfe1ed51b1e6c206d2b11a9eaf710a37ee7b5f1e09b15a6eb3d85e3cb4517a64ef9e5b6aa0bb6bfadee72285487a4a1d0f3f66fa32bd
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -4,8 +4,8 @@ Ruby library and CLI command that visualizes various Unicode and ASCII/single by
|
|
4
4
|
|
5
5
|
- Makes analyzing encodings easier
|
6
6
|
- Helps you with debugging strings
|
7
|
-
- Supports **UTF-8**, **UTF-16LE**/**UTF-16BE**, **UTF-32LE**/**UTF-32BE**, **ISO-8859-X**, **Windows-125X**, **IBMX**, **CP85X**, **macX**, **TIS-620**/**Windows-874**, **KOI8-R**/**KOI8-U**, arbitrary **BINARY** data, and 7-Bit **ASCII**
|
8
7
|
- Highlights invalid/special/blank bytes/characters/codepoints
|
8
|
+
- Supports *UTF-8*, *UTF-16LE*/*UTF-16BE*, *UTF-32LE*/*UTF-32BE*, *ISO-8859-X*, *Windows-125X*, *IBMX*, *CP85X*, *macX*, *TIS-620*/*Windows-874*, *KOI8-R*/*KOI8-U*, 7-Bit *ASCII*, and arbitrary *BINARY* data
|
9
9
|
|
10
10
|
## Color Coding
|
11
11
|
|
@@ -53,7 +53,7 @@ unibits "🌫 Idiosyncrätic ℜսᖯʏ"
|
|
53
53
|
- *wide-ambiguous*: Treat characters of ambiguous width as 2 spaces instead of 1 ([more info](https://github.com/janlelis/unicode-display_width))
|
54
54
|
- *width (w)*: Set a custom column width, if not set, *unibits* will retrieve it from the terminal or just use 80
|
55
55
|
|
56
|
-
##
|
56
|
+
## Examples of Valid Encodings
|
57
57
|
### UTF-8
|
58
58
|
|
59
59
|
CLI: `$ unibits -e utf-8 -c utf-8 "🌫 Idiosyncrätic ℜսᖯʏ"`
|
@@ -70,22 +70,6 @@ Ruby: `unibits "🌫 Idiosyncrätic ℜսᖯʏ", encoding: 'utf-8', convert:
|
|
70
70
|
|
71
71
|
![Screenshot UTF-16LE](/screenshots/utf-16le.png?raw=true "UTF-16LE")
|
72
72
|
|
73
|
-
### UTF-16BE
|
74
|
-
|
75
|
-
CLI: `$ unibits -e utf-8 -c utf-16be "🌫 Idiosyncrätic ℜսᖯʏ"`
|
76
|
-
|
77
|
-
Ruby: `unibits "🌫 Idiosyncrätic ℜսᖯʏ", encoding: 'utf-8', convert: 'utf-16be'`
|
78
|
-
|
79
|
-
![Screenshot UTF-16BE](/screenshots/utf-16be.png?raw=true "UTF-16BE")
|
80
|
-
|
81
|
-
### UTF-32LE
|
82
|
-
|
83
|
-
CLI: `$ unibits -e utf-8 -c utf-32le "🌫 Idiosyncrätic ℜսᖯʏ"`
|
84
|
-
|
85
|
-
Ruby: `unibits "🌫 Idiosyncrätic ℜսᖯʏ", encoding: 'utf-8', convert: 'utf-32le'`
|
86
|
-
|
87
|
-
![Screenshot UTF-32LE](/screenshots/utf-32le.png?raw=true "UTF-32LE")
|
88
|
-
|
89
73
|
### UTF-32BE
|
90
74
|
|
91
75
|
CLI: `$ unibits -e utf-8 -c utf-32be "🌫 Idiosyncrätic ℜսᖯʏ"`
|
@@ -106,11 +90,11 @@ Ruby: `unibits "🌫 Idiosyncrätic ℜսᖯʏ", encoding: 'binary'`
|
|
106
90
|
|
107
91
|
CLI: `$ unibits -e utf-8 -c ascii "ascii"`
|
108
92
|
|
109
|
-
Ruby: `unibits "
|
93
|
+
Ruby: `unibits "ascii", encoding: 'utf-8', convert: 'ascii'`
|
110
94
|
|
111
95
|
![Screenshot ASCII](/screenshots/ascii.png?raw=true "ASCII")
|
112
96
|
|
113
|
-
## Invalid Encodings
|
97
|
+
## Examples of Invalid Encodings
|
114
98
|
### UTF-8
|
115
99
|
|
116
100
|
Example in Ruby: `unibits "unexpected \x80 | not enough \xF0\x9F\x8C | overlong \xE0\x81\x81 | surrogate \xED\xA0\x80 | too large \xF5\x8F\xBF\xBF"`
|
@@ -123,10 +107,6 @@ Example in Ruby: `unibits "🌫 Idiosyncrätic ℜսᖯʏ", encoding: 'ascii'
|
|
123
107
|
|
124
108
|
![Screenshot invalid ASCII](/screenshots/ascii.invalid.png?raw=true "Invalid ASCII")
|
125
109
|
|
126
|
-
### BINARY
|
127
|
-
|
128
|
-
Not possible to produce invalid binary strings
|
129
|
-
|
130
110
|
## Notes
|
131
111
|
|
132
112
|
Also see
|
data/lib/unibits.rb
CHANGED
@@ -79,6 +79,7 @@ module Unibits
|
|
79
79
|
puts
|
80
80
|
string.each_char{ |char|
|
81
81
|
char_info = Characteristics.create_for_type(char, type)
|
82
|
+
double_check_utf32_validness!(char, char_info)
|
82
83
|
current_color = determine_char_color(char_info)
|
83
84
|
|
84
85
|
current_encoding_error = nil if char_info.valid?
|
@@ -184,10 +185,13 @@ module Unibits
|
|
184
185
|
codepoint = "invalid"
|
185
186
|
end
|
186
187
|
when 'UTF-32LE', 'UTF-32BE'
|
187
|
-
if char.bytesize !=
|
188
|
+
if char.bytesize % 4 != 0
|
188
189
|
codepoint = "incompl."
|
190
|
+
elsif char.b.unpack("C*")[encoding_name == 'UTF-32LE' ? 2 : 1] > 16 ||
|
191
|
+
char.b.unpack("C*")[encoding_name == 'UTF-32LE' ? 3 : 0] > 0
|
192
|
+
codepoint = "toolarge"
|
189
193
|
else
|
190
|
-
codepoint = "
|
194
|
+
codepoint = "sur.gate"
|
191
195
|
end
|
192
196
|
end
|
193
197
|
end
|
@@ -305,4 +309,15 @@ module Unibits
|
|
305
309
|
res << Paint[ bin_byte_2, current_color, :underline ] unless !bin_byte_2 || bin_byte_2.empty?
|
306
310
|
res
|
307
311
|
end
|
312
|
+
|
313
|
+
def self.double_check_utf32_validness!(char, char_info)
|
314
|
+
return if RUBY_VERSION > "2.4.0" || char_info.encoding.name[0, 6] != "UTF-32" || !char_info.valid?
|
315
|
+
byte_values = char.b.unpack("C*")
|
316
|
+
le = char_info.encoding.name == 'UTF-32LE'
|
317
|
+
if byte_values[le ? 2 : 1] > 16 ||
|
318
|
+
byte_values[le ? 3 : 0] > 0 ||
|
319
|
+
byte_values[le ? 1 : 2] >= 216 && byte_values[le ? 1 : 2] <= 223
|
320
|
+
char_info.instance_variable_set(:@is_valid, false)
|
321
|
+
end
|
322
|
+
end
|
308
323
|
end
|
data/lib/unibits/version.rb
CHANGED
data/spec/unibits_spec.rb
CHANGED
@@ -259,21 +259,26 @@ describe Unibits do
|
|
259
259
|
result.must_match "�"
|
260
260
|
end
|
261
261
|
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
262
|
+
it "- too large codepoint (1/2)" do
|
263
|
+
string = "\x00\x00\x11\x00".force_encoding("UTF-32LE")
|
264
|
+
result = Paint.unpaint(Unibits.visualize(string))
|
265
|
+
result.must_match "�"
|
266
|
+
result.must_match "toolarge"
|
267
|
+
end
|
268
|
+
|
269
|
+
it "- too large codepoint (2/2)" do
|
270
|
+
string = "\x00\x00\x00\x01".force_encoding("UTF-32LE")
|
271
|
+
result = Paint.unpaint(Unibits.visualize(string))
|
272
|
+
result.must_match "�"
|
273
|
+
result.must_match "toolarge"
|
274
|
+
end
|
275
|
+
|
276
|
+
it "- has surrogate" do
|
277
|
+
string = "\x00\xD8\x00\x00".force_encoding("UTF-32LE")
|
278
|
+
result = Paint.unpaint(Unibits.visualize(string))
|
279
|
+
result.must_match "�"
|
280
|
+
result.must_match "sur.gate"
|
281
|
+
end
|
277
282
|
end
|
278
283
|
|
279
284
|
describe "invalid ASCII encodings" do
|
data/unibits.gemspec
CHANGED
@@ -5,8 +5,8 @@ require File.dirname(__FILE__) + "/lib/unibits/version"
|
|
5
5
|
Gem::Specification.new do |gem|
|
6
6
|
gem.name = "unibits"
|
7
7
|
gem.version = Unibits::VERSION
|
8
|
-
gem.summary = "Visualizes
|
9
|
-
gem.description = "Visualizes
|
8
|
+
gem.summary = "Visualizes encodings."
|
9
|
+
gem.description = "Visualizes encodings in the terminal. Supports UTF-8, UTF-16LE, UTF-16BE, UTF-32LE, UTF-32BE, US-ASCII, ASCII-8BIT, and most of Rubies single-byte encodings. Comes as CLI command and as Ruby Kernel method."
|
10
10
|
gem.authors = ["Jan Lelis"]
|
11
11
|
gem.email = ["mail@janlelis.de"]
|
12
12
|
gem.homepage = "https://github.com/janlelis/unibits"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unibits
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: paint
|
@@ -72,9 +72,9 @@ dependencies:
|
|
72
72
|
- - "~>"
|
73
73
|
- !ruby/object:Gem::Version
|
74
74
|
version: '2.0'
|
75
|
-
description: Visualizes
|
76
|
-
UTF-
|
77
|
-
as Ruby Kernel method.
|
75
|
+
description: Visualizes encodings in the terminal. Supports UTF-8, UTF-16LE, UTF-16BE,
|
76
|
+
UTF-32LE, UTF-32BE, US-ASCII, ASCII-8BIT, and most of Rubies single-byte encodings.
|
77
|
+
Comes as CLI command and as Ruby Kernel method.
|
78
78
|
email:
|
79
79
|
- mail@janlelis.de
|
80
80
|
executables:
|
@@ -121,6 +121,6 @@ rubyforge_project:
|
|
121
121
|
rubygems_version: 2.6.8
|
122
122
|
signing_key:
|
123
123
|
specification_version: 4
|
124
|
-
summary: Visualizes
|
124
|
+
summary: Visualizes encodings.
|
125
125
|
test_files:
|
126
126
|
- spec/unibits_spec.rb
|