unibits 2.4.0 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -1
- data/CHANGELOG.md +10 -0
- data/README.md +6 -4
- data/bin/unibits +3 -2
- data/lib/unibits.rb +19 -4
- data/lib/unibits/version.rb +1 -1
- data/spec/unibits_spec.rb +7 -0
- data/unibits.gemspec +2 -2
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 89ef5ce08435a61d17d624f01b9355753446667c
|
4
|
+
data.tar.gz: 2c758d25836c61613a0ab1c60f8196b93421c979
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fc2087917695cd6855477ddaa7fa973da3c188a814564e13d7331ee4c3744aa7338efef1397b3ed6fe3a9b502522dbcace5b7d7eb20f409ecac74a56ec74d200
|
7
|
+
data.tar.gz: 5428461b0a77d91f746bfc565072f9a0baf126b0d7ce40d88113b54e56de4678a209bf01a03ff2fc9c84e8f38c34ec31685b62e8b757b0698934d33a64544753
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
## CHANGELOG
|
2
2
|
|
3
|
+
### 2.5.0
|
4
|
+
|
5
|
+
* Double check UTF-32 only on Ruby versions which contain the bug
|
6
|
+
* Highlight unassigned codepoints which are ignorable
|
7
|
+
* Bump symbolify dependency
|
8
|
+
* Add special characters (U+FFF9 - U+FFFC)
|
9
|
+
* Non-control separators return ⏎
|
10
|
+
* Bump characteristics dependency
|
11
|
+
* Allow GB1988 encoding (7bit ascii-like)
|
12
|
+
|
3
13
|
### 2.4.0
|
4
14
|
|
5
15
|
* Extract symbolification logic into extra [symbolify](https://github.com/janlelis/symbolify) gem (includes fixes and non-character detection)
|
data/README.md
CHANGED
@@ -5,19 +5,20 @@ Ruby library and CLI command that visualizes various Unicode and ASCII/single by
|
|
5
5
|
- Makes analyzing encodings easier
|
6
6
|
- Helps you with debugging strings
|
7
7
|
- Highlights invalid/special/blank bytes/characters/codepoints
|
8
|
-
- Supports *UTF-8*, *UTF-16LE*/*UTF-16BE*, *UTF-32LE*/*UTF-32BE*, *ISO-8859-X*, *Windows-125X*, *IBMX*, *CP85X*, *macX*, *TIS-620*/*Windows-874*, *KOI8-R*/*KOI8-U*, 7-Bit *ASCII*, and arbitrary *BINARY* data
|
8
|
+
- Supports *UTF-8*, *UTF-16LE*/*UTF-16BE*, *UTF-32LE*/*UTF-32BE*, *ISO-8859-X*, *Windows-125X*, *IBMX*, *CP85X*, *macX*, *TIS-620*/*Windows-874*, *KOI8-R*/*KOI8-U*, 7-Bit *ASCII*/*GB1988*, and arbitrary *BINARY* data
|
9
9
|
|
10
10
|
## Color Coding
|
11
11
|
|
12
12
|
Each byte of the given string is highlighted using the following mechanism (characters -> codepoints):
|
13
13
|
|
14
14
|
- Red for invalid bytes
|
15
|
-
- Orange for unassigned bytes/characters
|
16
|
-
- Blue for control characters
|
17
15
|
- Light blue for blanks
|
16
|
+
- Blue for control characters
|
18
17
|
- Non-control formatting characters in pink
|
19
18
|
- Green for marks (Unicode only)
|
20
|
-
-
|
19
|
+
- Orange for unassigned codepoints
|
20
|
+
- Lighter orange for unassigned codepoints which are also ignorable
|
21
|
+
- Random color for all other codepoints
|
21
22
|
|
22
23
|
## Setup
|
23
24
|
|
@@ -113,6 +114,7 @@ Example in Ruby: `unibits "🌫 Idiosyncrätic ℜսᖯʏ", encoding: 'ascii'
|
|
113
114
|
Also see
|
114
115
|
|
115
116
|
- [Ruby's Encoding class](https://ruby-doc.org/core/Encoding.html)
|
117
|
+
- [Symbolify gem](https://github.com/janlelis/symbolify)
|
116
118
|
- [Characteristics gem](https://github.com/janlelis/characteristics)
|
117
119
|
- [UTF-8 (Wikipedia)](https://en.wikipedia.org/wiki/UTF-8#Description)
|
118
120
|
- [UTF-16 (Wikipedia)](https://en.wikipedia.org/wiki/UTF-16#Description)
|
data/bin/unibits
CHANGED
@@ -53,11 +53,12 @@ if argv[:help]
|
|
53
53
|
#{Paint["COLOR CODING", :underline]}
|
54
54
|
|
55
55
|
#{Paint["invalid", Unibits::COLORS[:invalid]]}
|
56
|
-
#{Paint["unassigned", Unibits::COLORS[:unassigned]]}
|
57
|
-
#{Paint["control", Unibits::COLORS[:control]]}
|
58
56
|
#{Paint["blank", Unibits::COLORS[:blank]]}
|
57
|
+
#{Paint["control", Unibits::COLORS[:control]]}
|
59
58
|
#{Paint["format", Unibits::COLORS[:format]]}
|
60
59
|
#{Paint["mark", Unibits::COLORS[:mark]]}
|
60
|
+
#{Paint["unassigned", Unibits::COLORS[:unassigned]]}
|
61
|
+
#{Paint["unassigned and ignorable", Unibits::COLORS[:ignorable]]}
|
61
62
|
|
62
63
|
#{Paint["STATS", :underline]}
|
63
64
|
|
data/lib/unibits.rb
CHANGED
@@ -22,16 +22,18 @@ module Unibits
|
|
22
22
|
/^TIS-620$/,
|
23
23
|
/^Windows-874$/,
|
24
24
|
/^KOI8/,
|
25
|
+
/^GB1988$/,
|
25
26
|
)
|
26
27
|
).sort.freeze
|
27
28
|
|
28
29
|
COLORS = {
|
29
30
|
invalid: "#FF0000",
|
30
|
-
unassigned: "#FF5500",
|
31
31
|
control: "#0000FF",
|
32
32
|
blank: "#33AADD",
|
33
33
|
format: "#FF00FF",
|
34
34
|
mark: "#228822",
|
35
|
+
unassigned: "#FF5500",
|
36
|
+
ignorable: "#FFAA00",
|
35
37
|
}
|
36
38
|
|
37
39
|
DEFAULT_TERMINAL_WIDTH = 80
|
@@ -81,7 +83,17 @@ module Unibits
|
|
81
83
|
puts
|
82
84
|
string.each_char{ |char|
|
83
85
|
char_info = Characteristics.create_for_type(char, type)
|
84
|
-
|
86
|
+
|
87
|
+
if RUBY_VERSION >= "2.4.1" ||
|
88
|
+
RUBY_VERSION < "2.4.0" && RUBY_VERSION >= "2.3.4" ||
|
89
|
+
RUBY_VERSION < "2.3.0" && RUBY_VERSION >= "2.2.7" ||
|
90
|
+
char_info.encoding.name[0, 6] != "UTF-32" ||
|
91
|
+
!char_info.valid?
|
92
|
+
# bug is fixed or not relevant
|
93
|
+
else
|
94
|
+
double_check_utf32_validness!(char, char_info)
|
95
|
+
end
|
96
|
+
|
85
97
|
current_color = determine_char_color(char_info)
|
86
98
|
|
87
99
|
current_encoding_error = nil if char_info.valid?
|
@@ -239,7 +251,11 @@ module Unibits
|
|
239
251
|
if !char_info.valid?
|
240
252
|
COLORS[:invalid]
|
241
253
|
elsif !char_info.assigned?
|
242
|
-
|
254
|
+
if char_info.unicode? && char_info.ignorable?
|
255
|
+
COLORS[:ignorable]
|
256
|
+
else
|
257
|
+
COLORS[:unassigned]
|
258
|
+
end
|
243
259
|
elsif char_info.blank?
|
244
260
|
COLORS[:blank]
|
245
261
|
elsif char_info.control?
|
@@ -315,7 +331,6 @@ module Unibits
|
|
315
331
|
end
|
316
332
|
|
317
333
|
def self.double_check_utf32_validness!(char, char_info)
|
318
|
-
return if RUBY_VERSION > "2.4.0" || char_info.encoding.name[0, 6] != "UTF-32" || !char_info.valid?
|
319
334
|
byte_values = char.b.unpack("C*")
|
320
335
|
le = char_info.encoding.name == 'UTF-32LE'
|
321
336
|
if byte_values[le ? 2 : 1] > 16 ||
|
data/lib/unibits/version.rb
CHANGED
data/spec/unibits_spec.rb
CHANGED
@@ -67,6 +67,13 @@ describe Unibits do
|
|
67
67
|
result.must_match "01000011"
|
68
68
|
end
|
69
69
|
|
70
|
+
it "works with GB1988" do
|
71
|
+
result = Paint.unpaint(Unibits.visualize("ASCII string".force_encoding('GB1988')))
|
72
|
+
result.must_match "C"
|
73
|
+
result.must_match "43"
|
74
|
+
result.must_match "01000011"
|
75
|
+
end
|
76
|
+
|
70
77
|
it "works with 'ISO-8859-X' encodings" do
|
71
78
|
string = "\xBC Idiosyncr\xE4tic\n\x91".force_encoding("ISO-8859-1")
|
72
79
|
result = Paint.unpaint(Unibits.visualize(string))
|
data/unibits.gemspec
CHANGED
@@ -19,8 +19,8 @@ Gem::Specification.new do |gem|
|
|
19
19
|
|
20
20
|
gem.add_dependency 'paint', '>= 0.9', '< 3.0'
|
21
21
|
gem.add_dependency 'unicode-display_width', '~> 1.1'
|
22
|
-
gem.add_dependency 'symbolify', '~> 1.
|
23
|
-
gem.add_dependency 'characteristics', '>= 0.
|
22
|
+
gem.add_dependency 'symbolify', '~> 1.2'
|
23
|
+
gem.add_dependency 'characteristics', '>= 0.7'
|
24
24
|
gem.add_dependency 'rationalist', '~> 2.0'
|
25
25
|
|
26
26
|
gem.required_ruby_version = "~> 2.0"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unibits
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: paint
|
@@ -50,28 +50,28 @@ dependencies:
|
|
50
50
|
requirements:
|
51
51
|
- - "~>"
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version: '1.
|
53
|
+
version: '1.2'
|
54
54
|
type: :runtime
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
57
57
|
requirements:
|
58
58
|
- - "~>"
|
59
59
|
- !ruby/object:Gem::Version
|
60
|
-
version: '1.
|
60
|
+
version: '1.2'
|
61
61
|
- !ruby/object:Gem::Dependency
|
62
62
|
name: characteristics
|
63
63
|
requirement: !ruby/object:Gem::Requirement
|
64
64
|
requirements:
|
65
65
|
- - ">="
|
66
66
|
- !ruby/object:Gem::Version
|
67
|
-
version: 0.
|
67
|
+
version: '0.7'
|
68
68
|
type: :runtime
|
69
69
|
prerelease: false
|
70
70
|
version_requirements: !ruby/object:Gem::Requirement
|
71
71
|
requirements:
|
72
72
|
- - ">="
|
73
73
|
- !ruby/object:Gem::Version
|
74
|
-
version: 0.
|
74
|
+
version: '0.7'
|
75
75
|
- !ruby/object:Gem::Dependency
|
76
76
|
name: rationalist
|
77
77
|
requirement: !ruby/object:Gem::Requirement
|