unibits 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e9d89e598970e15c646b6d3dce0969afe24204fe
4
- data.tar.gz: '0473794df2d445d3d0bc5d6f5c697d0c148de119'
3
+ metadata.gz: 89ef5ce08435a61d17d624f01b9355753446667c
4
+ data.tar.gz: 2c758d25836c61613a0ab1c60f8196b93421c979
5
5
  SHA512:
6
- metadata.gz: f9bd47214da917798bb5e40ad7bdaf80d93a0fd2b07d43f825b8819fb81eca54935f4eff5ae3b508a8213895366a2898b049b558e368c6e69b275f7a3a86bb8f
7
- data.tar.gz: 71c2583c9935c7ceead69c8d7c694af2f621bc08f952c896d0e24baf8688d747724315e1fe178e98c1e0e2138d830306e3623804ad9dd37b81c100ffa7394e19
6
+ metadata.gz: fc2087917695cd6855477ddaa7fa973da3c188a814564e13d7331ee4c3744aa7338efef1397b3ed6fe3a9b502522dbcace5b7d7eb20f409ecac74a56ec74d200
7
+ data.tar.gz: 5428461b0a77d91f746bfc565072f9a0baf126b0d7ce40d88113b54e56de4678a209bf01a03ff2fc9c84e8f38c34ec31685b62e8b757b0698934d33a64544753
@@ -5,8 +5,10 @@ rvm:
5
5
  - ruby-head
6
6
  - 2.4.1
7
7
  - 2.4.0
8
+ - 2.3.4
8
9
  - 2.3.3
9
- - 2.2
10
+ - 2.2.7
11
+ - 2.2.6
10
12
  - 2.1
11
13
  - 2.0
12
14
  - jruby-head
@@ -1,5 +1,15 @@
1
1
  ## CHANGELOG
2
2
 
3
+ ### 2.5.0
4
+
5
+ * Double check UTF-32 only on Ruby versions which contain the bug
6
+ * Highlight unassigned codepoints which are ignorable
7
+ * Bump symbolify dependency
8
+ * Add special characters (U+FFF9 - U+FFFC)
9
+ * Non-control separators return ⏎
10
+ * Bump characteristics dependency
11
+ * Allow GB1988 encoding (7bit ascii-like)
12
+
3
13
  ### 2.4.0
4
14
 
5
15
  * Extract symbolification logic into extra [symbolify](https://github.com/janlelis/symbolify) gem (includes fixes and non-character detection)
data/README.md CHANGED
@@ -5,19 +5,20 @@ Ruby library and CLI command that visualizes various Unicode and ASCII/single by
5
5
  - Makes analyzing encodings easier
6
6
  - Helps you with debugging strings
7
7
  - Highlights invalid/special/blank bytes/characters/codepoints
8
- - Supports *UTF-8*, *UTF-16LE*/*UTF-16BE*, *UTF-32LE*/*UTF-32BE*, *ISO-8859-X*, *Windows-125X*, *IBMX*, *CP85X*, *macX*, *TIS-620*/*Windows-874*, *KOI8-R*/*KOI8-U*, 7-Bit *ASCII*, and arbitrary *BINARY* data
8
+ - Supports *UTF-8*, *UTF-16LE*/*UTF-16BE*, *UTF-32LE*/*UTF-32BE*, *ISO-8859-X*, *Windows-125X*, *IBMX*, *CP85X*, *macX*, *TIS-620*/*Windows-874*, *KOI8-R*/*KOI8-U*, 7-Bit *ASCII*/*GB1988*, and arbitrary *BINARY* data
9
9
 
10
10
  ## Color Coding
11
11
 
12
12
  Each byte of the given string is highlighted using the following mechanism (characters -> codepoints):
13
13
 
14
14
  - Red for invalid bytes
15
- - Orange for unassigned bytes/characters
16
- - Blue for control characters
17
15
  - Light blue for blanks
16
+ - Blue for control characters
18
17
  - Non-control formatting characters in pink
19
18
  - Green for marks (Unicode only)
20
- - Random color for all other characters
19
+ - Orange for unassigned codepoints
20
+ - Lighter orange for unassigned codepoints which are also ignorable
21
+ - Random color for all other codepoints
21
22
 
22
23
  ## Setup
23
24
 
@@ -113,6 +114,7 @@ Example in Ruby: `unibits "🌫 Idiosyncrätic ℜսᖯʏ", encoding: 'ascii'
113
114
  Also see
114
115
 
115
116
  - [Ruby's Encoding class](https://ruby-doc.org/core/Encoding.html)
117
+ - [Symbolify gem](https://github.com/janlelis/symbolify)
116
118
  - [Characteristics gem](https://github.com/janlelis/characteristics)
117
119
  - [UTF-8 (Wikipedia)](https://en.wikipedia.org/wiki/UTF-8#Description)
118
120
  - [UTF-16 (Wikipedia)](https://en.wikipedia.org/wiki/UTF-16#Description)
@@ -53,11 +53,12 @@ if argv[:help]
53
53
  #{Paint["COLOR CODING", :underline]}
54
54
 
55
55
  #{Paint["invalid", Unibits::COLORS[:invalid]]}
56
- #{Paint["unassigned", Unibits::COLORS[:unassigned]]}
57
- #{Paint["control", Unibits::COLORS[:control]]}
58
56
  #{Paint["blank", Unibits::COLORS[:blank]]}
57
+ #{Paint["control", Unibits::COLORS[:control]]}
59
58
  #{Paint["format", Unibits::COLORS[:format]]}
60
59
  #{Paint["mark", Unibits::COLORS[:mark]]}
60
+ #{Paint["unassigned", Unibits::COLORS[:unassigned]]}
61
+ #{Paint["unassigned and ignorable", Unibits::COLORS[:ignorable]]}
61
62
 
62
63
  #{Paint["STATS", :underline]}
63
64
 
@@ -22,16 +22,18 @@ module Unibits
22
22
  /^TIS-620$/,
23
23
  /^Windows-874$/,
24
24
  /^KOI8/,
25
+ /^GB1988$/,
25
26
  )
26
27
  ).sort.freeze
27
28
 
28
29
  COLORS = {
29
30
  invalid: "#FF0000",
30
- unassigned: "#FF5500",
31
31
  control: "#0000FF",
32
32
  blank: "#33AADD",
33
33
  format: "#FF00FF",
34
34
  mark: "#228822",
35
+ unassigned: "#FF5500",
36
+ ignorable: "#FFAA00",
35
37
  }
36
38
 
37
39
  DEFAULT_TERMINAL_WIDTH = 80
@@ -81,7 +83,17 @@ module Unibits
81
83
  puts
82
84
  string.each_char{ |char|
83
85
  char_info = Characteristics.create_for_type(char, type)
84
- double_check_utf32_validness!(char, char_info)
86
+
87
+ if RUBY_VERSION >= "2.4.1" ||
88
+ RUBY_VERSION < "2.4.0" && RUBY_VERSION >= "2.3.4" ||
89
+ RUBY_VERSION < "2.3.0" && RUBY_VERSION >= "2.2.7" ||
90
+ char_info.encoding.name[0, 6] != "UTF-32" ||
91
+ !char_info.valid?
92
+ # bug is fixed or not relevant
93
+ else
94
+ double_check_utf32_validness!(char, char_info)
95
+ end
96
+
85
97
  current_color = determine_char_color(char_info)
86
98
 
87
99
  current_encoding_error = nil if char_info.valid?
@@ -239,7 +251,11 @@ module Unibits
239
251
  if !char_info.valid?
240
252
  COLORS[:invalid]
241
253
  elsif !char_info.assigned?
242
- COLORS[:unassigned]
254
+ if char_info.unicode? && char_info.ignorable?
255
+ COLORS[:ignorable]
256
+ else
257
+ COLORS[:unassigned]
258
+ end
243
259
  elsif char_info.blank?
244
260
  COLORS[:blank]
245
261
  elsif char_info.control?
@@ -315,7 +331,6 @@ module Unibits
315
331
  end
316
332
 
317
333
  def self.double_check_utf32_validness!(char, char_info)
318
- return if RUBY_VERSION > "2.4.0" || char_info.encoding.name[0, 6] != "UTF-32" || !char_info.valid?
319
334
  byte_values = char.b.unpack("C*")
320
335
  le = char_info.encoding.name == 'UTF-32LE'
321
336
  if byte_values[le ? 2 : 1] > 16 ||
@@ -1,4 +1,4 @@
1
1
  module Unibits
2
- VERSION = "2.4.0".freeze
2
+ VERSION = "2.5.0".freeze
3
3
  UNICODE_VERSION = "9.0.0".freeze
4
4
  end
@@ -67,6 +67,13 @@ describe Unibits do
67
67
  result.must_match "01000011"
68
68
  end
69
69
 
70
+ it "works with GB1988" do
71
+ result = Paint.unpaint(Unibits.visualize("ASCII string".force_encoding('GB1988')))
72
+ result.must_match "C"
73
+ result.must_match "43"
74
+ result.must_match "01000011"
75
+ end
76
+
70
77
  it "works with 'ISO-8859-X' encodings" do
71
78
  string = "\xBC Idiosyncr\xE4tic\n\x91".force_encoding("ISO-8859-1")
72
79
  result = Paint.unpaint(Unibits.visualize(string))
@@ -19,8 +19,8 @@ Gem::Specification.new do |gem|
19
19
 
20
20
  gem.add_dependency 'paint', '>= 0.9', '< 3.0'
21
21
  gem.add_dependency 'unicode-display_width', '~> 1.1'
22
- gem.add_dependency 'symbolify', '~> 1.0'
23
- gem.add_dependency 'characteristics', '>= 0.5.2'
22
+ gem.add_dependency 'symbolify', '~> 1.2'
23
+ gem.add_dependency 'characteristics', '>= 0.7'
24
24
  gem.add_dependency 'rationalist', '~> 2.0'
25
25
 
26
26
  gem.required_ruby_version = "~> 2.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unibits
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.0
4
+ version: 2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Lelis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-25 00:00:00.000000000 Z
11
+ date: 2017-03-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: paint
@@ -50,28 +50,28 @@ dependencies:
50
50
  requirements:
51
51
  - - "~>"
52
52
  - !ruby/object:Gem::Version
53
- version: '1.0'
53
+ version: '1.2'
54
54
  type: :runtime
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
57
57
  requirements:
58
58
  - - "~>"
59
59
  - !ruby/object:Gem::Version
60
- version: '1.0'
60
+ version: '1.2'
61
61
  - !ruby/object:Gem::Dependency
62
62
  name: characteristics
63
63
  requirement: !ruby/object:Gem::Requirement
64
64
  requirements:
65
65
  - - ">="
66
66
  - !ruby/object:Gem::Version
67
- version: 0.5.2
67
+ version: '0.7'
68
68
  type: :runtime
69
69
  prerelease: false
70
70
  version_requirements: !ruby/object:Gem::Requirement
71
71
  requirements:
72
72
  - - ">="
73
73
  - !ruby/object:Gem::Version
74
- version: 0.5.2
74
+ version: '0.7'
75
75
  - !ruby/object:Gem::Dependency
76
76
  name: rationalist
77
77
  requirement: !ruby/object:Gem::Requirement