unibits 2.1.0 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +4 -24
- data/lib/unibits.rb +17 -2
- data/lib/unibits/version.rb +1 -1
- data/spec/unibits_spec.rb +20 -15
- data/unibits.gemspec +2 -2
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 62770bc741ec5a7693759d4e888d46cf7b3860f2
|
4
|
+
data.tar.gz: 171c479f1f4ebdcf11bc97c4e4e0b94bac713abb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 793cdfeca34183b1b1f6cc219ddd822b9faac5542e92f1ec1065c3fd6ff07c38762becc5312d9bd29fbe3f976bd6eabb3e4c5e228bcfbe8e6340e90b5407234e
|
7
|
+
data.tar.gz: 9691e9072ff3084706e5dfe1ed51b1e6c206d2b11a9eaf710a37ee7b5f1e09b15a6eb3d85e3cb4517a64ef9e5b6aa0bb6bfadee72285487a4a1d0f3f66fa32bd
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -4,8 +4,8 @@ Ruby library and CLI command that visualizes various Unicode and ASCII/single by
|
|
4
4
|
|
5
5
|
- Makes analyzing encodings easier
|
6
6
|
- Helps you with debugging strings
|
7
|
-
- Supports **UTF-8**, **UTF-16LE**/**UTF-16BE**, **UTF-32LE**/**UTF-32BE**, **ISO-8859-X**, **Windows-125X**, **IBMX**, **CP85X**, **macX**, **TIS-620**/**Windows-874**, **KOI8-R**/**KOI8-U**, arbitrary **BINARY** data, and 7-Bit **ASCII**
|
8
7
|
- Highlights invalid/special/blank bytes/characters/codepoints
|
8
|
+
- Supports *UTF-8*, *UTF-16LE*/*UTF-16BE*, *UTF-32LE*/*UTF-32BE*, *ISO-8859-X*, *Windows-125X*, *IBMX*, *CP85X*, *macX*, *TIS-620*/*Windows-874*, *KOI8-R*/*KOI8-U*, 7-Bit *ASCII*, and arbitrary *BINARY* data
|
9
9
|
|
10
10
|
## Color Coding
|
11
11
|
|
@@ -53,7 +53,7 @@ unibits "🌫 Idiosyncrätic ℜսᖯʏ"
|
|
53
53
|
- *wide-ambiguous*: Treat characters of ambiguous width as 2 spaces instead of 1 ([more info](https://github.com/janlelis/unicode-display_width))
|
54
54
|
- *width (w)*: Set a custom column width, if not set, *unibits* will retrieve it from the terminal or just use 80
|
55
55
|
|
56
|
-
##
|
56
|
+
## Examples of Valid Encodings
|
57
57
|
### UTF-8
|
58
58
|
|
59
59
|
CLI: `$ unibits -e utf-8 -c utf-8 "🌫 Idiosyncrätic ℜսᖯʏ"`
|
@@ -70,22 +70,6 @@ Ruby: `unibits "🌫 Idiosyncrätic ℜսᖯʏ", encoding: 'utf-8', convert:
|
|
70
70
|
|
71
71
|

|
72
72
|
|
73
|
-
### UTF-16BE
|
74
|
-
|
75
|
-
CLI: `$ unibits -e utf-8 -c utf-16be "🌫 Idiosyncrätic ℜսᖯʏ"`
|
76
|
-
|
77
|
-
Ruby: `unibits "🌫 Idiosyncrätic ℜսᖯʏ", encoding: 'utf-8', convert: 'utf-16be'`
|
78
|
-
|
79
|
-

|
80
|
-
|
81
|
-
### UTF-32LE
|
82
|
-
|
83
|
-
CLI: `$ unibits -e utf-8 -c utf-32le "🌫 Idiosyncrätic ℜսᖯʏ"`
|
84
|
-
|
85
|
-
Ruby: `unibits "🌫 Idiosyncrätic ℜսᖯʏ", encoding: 'utf-8', convert: 'utf-32le'`
|
86
|
-
|
87
|
-

|
88
|
-
|
89
73
|
### UTF-32BE
|
90
74
|
|
91
75
|
CLI: `$ unibits -e utf-8 -c utf-32be "🌫 Idiosyncrätic ℜսᖯʏ"`
|
@@ -106,11 +90,11 @@ Ruby: `unibits "🌫 Idiosyncrätic ℜսᖯʏ", encoding: 'binary'`
|
|
106
90
|
|
107
91
|
CLI: `$ unibits -e utf-8 -c ascii "ascii"`
|
108
92
|
|
109
|
-
Ruby: `unibits "
|
93
|
+
Ruby: `unibits "ascii", encoding: 'utf-8', convert: 'ascii'`
|
110
94
|
|
111
95
|

|
112
96
|
|
113
|
-
## Invalid Encodings
|
97
|
+
## Examples of Invalid Encodings
|
114
98
|
### UTF-8
|
115
99
|
|
116
100
|
Example in Ruby: `unibits "unexpected \x80 | not enough \xF0\x9F\x8C | overlong \xE0\x81\x81 | surrogate \xED\xA0\x80 | too large \xF5\x8F\xBF\xBF"`
|
@@ -123,10 +107,6 @@ Example in Ruby: `unibits "🌫 Idiosyncrätic ℜսᖯʏ", encoding: 'ascii'
|
|
123
107
|
|
124
108
|

|
125
109
|
|
126
|
-
### BINARY
|
127
|
-
|
128
|
-
Not possible to produce invalid binary strings
|
129
|
-
|
130
110
|
## Notes
|
131
111
|
|
132
112
|
Also see
|
data/lib/unibits.rb
CHANGED
@@ -79,6 +79,7 @@ module Unibits
|
|
79
79
|
puts
|
80
80
|
string.each_char{ |char|
|
81
81
|
char_info = Characteristics.create_for_type(char, type)
|
82
|
+
double_check_utf32_validness!(char, char_info)
|
82
83
|
current_color = determine_char_color(char_info)
|
83
84
|
|
84
85
|
current_encoding_error = nil if char_info.valid?
|
@@ -184,10 +185,13 @@ module Unibits
|
|
184
185
|
codepoint = "invalid"
|
185
186
|
end
|
186
187
|
when 'UTF-32LE', 'UTF-32BE'
|
187
|
-
if char.bytesize !=
|
188
|
+
if char.bytesize % 4 != 0
|
188
189
|
codepoint = "incompl."
|
190
|
+
elsif char.b.unpack("C*")[encoding_name == 'UTF-32LE' ? 2 : 1] > 16 ||
|
191
|
+
char.b.unpack("C*")[encoding_name == 'UTF-32LE' ? 3 : 0] > 0
|
192
|
+
codepoint = "toolarge"
|
189
193
|
else
|
190
|
-
codepoint = "
|
194
|
+
codepoint = "sur.gate"
|
191
195
|
end
|
192
196
|
end
|
193
197
|
end
|
@@ -305,4 +309,15 @@ module Unibits
|
|
305
309
|
res << Paint[ bin_byte_2, current_color, :underline ] unless !bin_byte_2 || bin_byte_2.empty?
|
306
310
|
res
|
307
311
|
end
|
312
|
+
|
313
|
+
def self.double_check_utf32_validness!(char, char_info)
|
314
|
+
return if RUBY_VERSION > "2.4.0" || char_info.encoding.name[0, 6] != "UTF-32" || !char_info.valid?
|
315
|
+
byte_values = char.b.unpack("C*")
|
316
|
+
le = char_info.encoding.name == 'UTF-32LE'
|
317
|
+
if byte_values[le ? 2 : 1] > 16 ||
|
318
|
+
byte_values[le ? 3 : 0] > 0 ||
|
319
|
+
byte_values[le ? 1 : 2] >= 216 && byte_values[le ? 1 : 2] <= 223
|
320
|
+
char_info.instance_variable_set(:@is_valid, false)
|
321
|
+
end
|
322
|
+
end
|
308
323
|
end
|
data/lib/unibits/version.rb
CHANGED
data/spec/unibits_spec.rb
CHANGED
@@ -259,21 +259,26 @@ describe Unibits do
|
|
259
259
|
result.must_match "�"
|
260
260
|
end
|
261
261
|
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
262
|
+
it "- too large codepoint (1/2)" do
|
263
|
+
string = "\x00\x00\x11\x00".force_encoding("UTF-32LE")
|
264
|
+
result = Paint.unpaint(Unibits.visualize(string))
|
265
|
+
result.must_match "�"
|
266
|
+
result.must_match "toolarge"
|
267
|
+
end
|
268
|
+
|
269
|
+
it "- too large codepoint (2/2)" do
|
270
|
+
string = "\x00\x00\x00\x01".force_encoding("UTF-32LE")
|
271
|
+
result = Paint.unpaint(Unibits.visualize(string))
|
272
|
+
result.must_match "�"
|
273
|
+
result.must_match "toolarge"
|
274
|
+
end
|
275
|
+
|
276
|
+
it "- has surrogate" do
|
277
|
+
string = "\x00\xD8\x00\x00".force_encoding("UTF-32LE")
|
278
|
+
result = Paint.unpaint(Unibits.visualize(string))
|
279
|
+
result.must_match "�"
|
280
|
+
result.must_match "sur.gate"
|
281
|
+
end
|
277
282
|
end
|
278
283
|
|
279
284
|
describe "invalid ASCII encodings" do
|
data/unibits.gemspec
CHANGED
@@ -5,8 +5,8 @@ require File.dirname(__FILE__) + "/lib/unibits/version"
|
|
5
5
|
Gem::Specification.new do |gem|
|
6
6
|
gem.name = "unibits"
|
7
7
|
gem.version = Unibits::VERSION
|
8
|
-
gem.summary = "Visualizes
|
9
|
-
gem.description = "Visualizes
|
8
|
+
gem.summary = "Visualizes encodings."
|
9
|
+
gem.description = "Visualizes encodings in the terminal. Supports UTF-8, UTF-16LE, UTF-16BE, UTF-32LE, UTF-32BE, US-ASCII, ASCII-8BIT, and most of Rubies single-byte encodings. Comes as CLI command and as Ruby Kernel method."
|
10
10
|
gem.authors = ["Jan Lelis"]
|
11
11
|
gem.email = ["mail@janlelis.de"]
|
12
12
|
gem.homepage = "https://github.com/janlelis/unibits"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unibits
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: paint
|
@@ -72,9 +72,9 @@ dependencies:
|
|
72
72
|
- - "~>"
|
73
73
|
- !ruby/object:Gem::Version
|
74
74
|
version: '2.0'
|
75
|
-
description: Visualizes
|
76
|
-
UTF-
|
77
|
-
as Ruby Kernel method.
|
75
|
+
description: Visualizes encodings in the terminal. Supports UTF-8, UTF-16LE, UTF-16BE,
|
76
|
+
UTF-32LE, UTF-32BE, US-ASCII, ASCII-8BIT, and most of Rubies single-byte encodings.
|
77
|
+
Comes as CLI command and as Ruby Kernel method.
|
78
78
|
email:
|
79
79
|
- mail@janlelis.de
|
80
80
|
executables:
|
@@ -121,6 +121,6 @@ rubyforge_project:
|
|
121
121
|
rubygems_version: 2.6.8
|
122
122
|
signing_key:
|
123
123
|
specification_version: 4
|
124
|
-
summary: Visualizes
|
124
|
+
summary: Visualizes encodings.
|
125
125
|
test_files:
|
126
126
|
- spec/unibits_spec.rb
|