unibits 2.4.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -1
- data/CHANGELOG.md +10 -0
- data/README.md +6 -4
- data/bin/unibits +3 -2
- data/lib/unibits.rb +19 -4
- data/lib/unibits/version.rb +1 -1
- data/spec/unibits_spec.rb +7 -0
- data/unibits.gemspec +2 -2
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 89ef5ce08435a61d17d624f01b9355753446667c
|
4
|
+
data.tar.gz: 2c758d25836c61613a0ab1c60f8196b93421c979
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fc2087917695cd6855477ddaa7fa973da3c188a814564e13d7331ee4c3744aa7338efef1397b3ed6fe3a9b502522dbcace5b7d7eb20f409ecac74a56ec74d200
|
7
|
+
data.tar.gz: 5428461b0a77d91f746bfc565072f9a0baf126b0d7ce40d88113b54e56de4678a209bf01a03ff2fc9c84e8f38c34ec31685b62e8b757b0698934d33a64544753
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
## CHANGELOG
|
2
2
|
|
3
|
+
### 2.5.0
|
4
|
+
|
5
|
+
* Double check UTF-32 only on Ruby versions which contain the bug
|
6
|
+
* Highlight unassigned codepoints which are ignorable
|
7
|
+
* Bump symbolify dependency
|
8
|
+
* Add special characters (U+FFF9 - U+FFFC)
|
9
|
+
* Non-control separators return ⏎
|
10
|
+
* Bump characteristics dependency
|
11
|
+
* Allow GB1988 encoding (7bit ascii-like)
|
12
|
+
|
3
13
|
### 2.4.0
|
4
14
|
|
5
15
|
* Extract symbolification logic into extra [symbolify](https://github.com/janlelis/symbolify) gem (includes fixes and non-character detection)
|
data/README.md
CHANGED
@@ -5,19 +5,20 @@ Ruby library and CLI command that visualizes various Unicode and ASCII/single by
|
|
5
5
|
- Makes analyzing encodings easier
|
6
6
|
- Helps you with debugging strings
|
7
7
|
- Highlights invalid/special/blank bytes/characters/codepoints
|
8
|
-
- Supports *UTF-8*, *UTF-16LE*/*UTF-16BE*, *UTF-32LE*/*UTF-32BE*, *ISO-8859-X*, *Windows-125X*, *IBMX*, *CP85X*, *macX*, *TIS-620*/*Windows-874*, *KOI8-R*/*KOI8-U*, 7-Bit *ASCII*, and arbitrary *BINARY* data
|
8
|
+
- Supports *UTF-8*, *UTF-16LE*/*UTF-16BE*, *UTF-32LE*/*UTF-32BE*, *ISO-8859-X*, *Windows-125X*, *IBMX*, *CP85X*, *macX*, *TIS-620*/*Windows-874*, *KOI8-R*/*KOI8-U*, 7-Bit *ASCII*/*GB1988*, and arbitrary *BINARY* data
|
9
9
|
|
10
10
|
## Color Coding
|
11
11
|
|
12
12
|
Each byte of the given string is highlighted using the following mechanism (characters -> codepoints):
|
13
13
|
|
14
14
|
- Red for invalid bytes
|
15
|
-
- Orange for unassigned bytes/characters
|
16
|
-
- Blue for control characters
|
17
15
|
- Light blue for blanks
|
16
|
+
- Blue for control characters
|
18
17
|
- Non-control formatting characters in pink
|
19
18
|
- Green for marks (Unicode only)
|
20
|
-
-
|
19
|
+
- Orange for unassigned codepoints
|
20
|
+
- Lighter orange for unassigned codepoints which are also ignorable
|
21
|
+
- Random color for all other codepoints
|
21
22
|
|
22
23
|
## Setup
|
23
24
|
|
@@ -113,6 +114,7 @@ Example in Ruby: `unibits "🌫 Idiosyncrätic ℜսᖯʏ", encoding: 'ascii'
|
|
113
114
|
Also see
|
114
115
|
|
115
116
|
- [Ruby's Encoding class](https://ruby-doc.org/core/Encoding.html)
|
117
|
+
- [Symbolify gem](https://github.com/janlelis/symbolify)
|
116
118
|
- [Characteristics gem](https://github.com/janlelis/characteristics)
|
117
119
|
- [UTF-8 (Wikipedia)](https://en.wikipedia.org/wiki/UTF-8#Description)
|
118
120
|
- [UTF-16 (Wikipedia)](https://en.wikipedia.org/wiki/UTF-16#Description)
|
data/bin/unibits
CHANGED
@@ -53,11 +53,12 @@ if argv[:help]
|
|
53
53
|
#{Paint["COLOR CODING", :underline]}
|
54
54
|
|
55
55
|
#{Paint["invalid", Unibits::COLORS[:invalid]]}
|
56
|
-
#{Paint["unassigned", Unibits::COLORS[:unassigned]]}
|
57
|
-
#{Paint["control", Unibits::COLORS[:control]]}
|
58
56
|
#{Paint["blank", Unibits::COLORS[:blank]]}
|
57
|
+
#{Paint["control", Unibits::COLORS[:control]]}
|
59
58
|
#{Paint["format", Unibits::COLORS[:format]]}
|
60
59
|
#{Paint["mark", Unibits::COLORS[:mark]]}
|
60
|
+
#{Paint["unassigned", Unibits::COLORS[:unassigned]]}
|
61
|
+
#{Paint["unassigned and ignorable", Unibits::COLORS[:ignorable]]}
|
61
62
|
|
62
63
|
#{Paint["STATS", :underline]}
|
63
64
|
|
data/lib/unibits.rb
CHANGED
@@ -22,16 +22,18 @@ module Unibits
|
|
22
22
|
/^TIS-620$/,
|
23
23
|
/^Windows-874$/,
|
24
24
|
/^KOI8/,
|
25
|
+
/^GB1988$/,
|
25
26
|
)
|
26
27
|
).sort.freeze
|
27
28
|
|
28
29
|
COLORS = {
|
29
30
|
invalid: "#FF0000",
|
30
|
-
unassigned: "#FF5500",
|
31
31
|
control: "#0000FF",
|
32
32
|
blank: "#33AADD",
|
33
33
|
format: "#FF00FF",
|
34
34
|
mark: "#228822",
|
35
|
+
unassigned: "#FF5500",
|
36
|
+
ignorable: "#FFAA00",
|
35
37
|
}
|
36
38
|
|
37
39
|
DEFAULT_TERMINAL_WIDTH = 80
|
@@ -81,7 +83,17 @@ module Unibits
|
|
81
83
|
puts
|
82
84
|
string.each_char{ |char|
|
83
85
|
char_info = Characteristics.create_for_type(char, type)
|
84
|
-
|
86
|
+
|
87
|
+
if RUBY_VERSION >= "2.4.1" ||
|
88
|
+
RUBY_VERSION < "2.4.0" && RUBY_VERSION >= "2.3.4" ||
|
89
|
+
RUBY_VERSION < "2.3.0" && RUBY_VERSION >= "2.2.7" ||
|
90
|
+
char_info.encoding.name[0, 6] != "UTF-32" ||
|
91
|
+
!char_info.valid?
|
92
|
+
# bug is fixed or not relevant
|
93
|
+
else
|
94
|
+
double_check_utf32_validness!(char, char_info)
|
95
|
+
end
|
96
|
+
|
85
97
|
current_color = determine_char_color(char_info)
|
86
98
|
|
87
99
|
current_encoding_error = nil if char_info.valid?
|
@@ -239,7 +251,11 @@ module Unibits
|
|
239
251
|
if !char_info.valid?
|
240
252
|
COLORS[:invalid]
|
241
253
|
elsif !char_info.assigned?
|
242
|
-
|
254
|
+
if char_info.unicode? && char_info.ignorable?
|
255
|
+
COLORS[:ignorable]
|
256
|
+
else
|
257
|
+
COLORS[:unassigned]
|
258
|
+
end
|
243
259
|
elsif char_info.blank?
|
244
260
|
COLORS[:blank]
|
245
261
|
elsif char_info.control?
|
@@ -315,7 +331,6 @@ module Unibits
|
|
315
331
|
end
|
316
332
|
|
317
333
|
def self.double_check_utf32_validness!(char, char_info)
|
318
|
-
return if RUBY_VERSION > "2.4.0" || char_info.encoding.name[0, 6] != "UTF-32" || !char_info.valid?
|
319
334
|
byte_values = char.b.unpack("C*")
|
320
335
|
le = char_info.encoding.name == 'UTF-32LE'
|
321
336
|
if byte_values[le ? 2 : 1] > 16 ||
|
data/lib/unibits/version.rb
CHANGED
data/spec/unibits_spec.rb
CHANGED
@@ -67,6 +67,13 @@ describe Unibits do
|
|
67
67
|
result.must_match "01000011"
|
68
68
|
end
|
69
69
|
|
70
|
+
it "works with GB1988" do
|
71
|
+
result = Paint.unpaint(Unibits.visualize("ASCII string".force_encoding('GB1988')))
|
72
|
+
result.must_match "C"
|
73
|
+
result.must_match "43"
|
74
|
+
result.must_match "01000011"
|
75
|
+
end
|
76
|
+
|
70
77
|
it "works with 'ISO-8859-X' encodings" do
|
71
78
|
string = "\xBC Idiosyncr\xE4tic\n\x91".force_encoding("ISO-8859-1")
|
72
79
|
result = Paint.unpaint(Unibits.visualize(string))
|
data/unibits.gemspec
CHANGED
@@ -19,8 +19,8 @@ Gem::Specification.new do |gem|
|
|
19
19
|
|
20
20
|
gem.add_dependency 'paint', '>= 0.9', '< 3.0'
|
21
21
|
gem.add_dependency 'unicode-display_width', '~> 1.1'
|
22
|
-
gem.add_dependency 'symbolify', '~> 1.
|
23
|
-
gem.add_dependency 'characteristics', '>= 0.
|
22
|
+
gem.add_dependency 'symbolify', '~> 1.2'
|
23
|
+
gem.add_dependency 'characteristics', '>= 0.7'
|
24
24
|
gem.add_dependency 'rationalist', '~> 2.0'
|
25
25
|
|
26
26
|
gem.required_ruby_version = "~> 2.0"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unibits
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: paint
|
@@ -50,28 +50,28 @@ dependencies:
|
|
50
50
|
requirements:
|
51
51
|
- - "~>"
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version: '1.
|
53
|
+
version: '1.2'
|
54
54
|
type: :runtime
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
57
57
|
requirements:
|
58
58
|
- - "~>"
|
59
59
|
- !ruby/object:Gem::Version
|
60
|
-
version: '1.
|
60
|
+
version: '1.2'
|
61
61
|
- !ruby/object:Gem::Dependency
|
62
62
|
name: characteristics
|
63
63
|
requirement: !ruby/object:Gem::Requirement
|
64
64
|
requirements:
|
65
65
|
- - ">="
|
66
66
|
- !ruby/object:Gem::Version
|
67
|
-
version: 0.
|
67
|
+
version: '0.7'
|
68
68
|
type: :runtime
|
69
69
|
prerelease: false
|
70
70
|
version_requirements: !ruby/object:Gem::Requirement
|
71
71
|
requirements:
|
72
72
|
- - ">="
|
73
73
|
- !ruby/object:Gem::Version
|
74
|
-
version: 0.
|
74
|
+
version: '0.7'
|
75
75
|
- !ruby/object:Gem::Dependency
|
76
76
|
name: rationalist
|
77
77
|
requirement: !ruby/object:Gem::Requirement
|