unisec 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'unisec/cli/surrogates'
3
+ require 'unisec/cli/confusables'
4
4
  require 'unisec/cli/hexdump'
5
5
  require 'unisec/cli/properties'
6
- require 'unisec/cli/confusables'
6
+ require 'unisec/cli/rugrep'
7
+ require 'unisec/cli/size'
8
+ require 'unisec/cli/surrogates'
9
+ require 'unisec/cli/versions'
7
10
 
8
11
  module Unisec
9
12
  # Module used to create the CLI for the executable
@@ -14,14 +17,17 @@ module Unisec
14
17
 
15
18
  # Mapping between the (sub-)commands as seen by the user
16
19
  # on the command-line interface and the CLI modules in the lib
17
- register 'surrogates to', Surrogates::To
18
- register 'surrogates from', Surrogates::From
19
- register 'hexdump', Hexdump
20
- register 'properties list', Properties::List
21
- register 'properties codepoints', Properties::Codepoints
22
- register 'properties char', Properties::Char
23
20
  register 'confusables list', Confusables::List
24
21
  register 'confusables randomize', Confusables::Randomize
22
+ register 'grep', Grep
23
+ register 'hexdump', Hexdump
24
+ register 'properties char', Properties::Char
25
+ register 'properties codepoints', Properties::Codepoints
26
+ register 'properties list', Properties::List
27
+ register 'size', Size
28
+ register 'surrogates from', Surrogates::From
29
+ register 'surrogates to', Surrogates::To
30
+ register 'versions', Versions
25
31
  end
26
32
  end
27
33
  end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+
6
+ module Unisec
7
+ module CLI
8
+ module Commands
9
+ # CLI command `unisec grep` for the class {Unisec::Rugrep} from the lib.
10
+ #
11
+ # Example:
12
+ #
13
+ # ```plaintext
14
+ # $ unisec grep 'FRENCH \w+'
15
+ # U+20A3 ₣ FRENCH FRANC SIGN
16
+ # U+1F35F 🍟 FRENCH FRIES
17
+ # ```
18
+ class Grep < Dry::CLI::Command
19
+ desc 'Search for Unicode code point names by regular expression'
20
+
21
+ argument :regexp, required: true,
22
+ desc: 'regular expression'
23
+
24
+ # Hexdump of all Unicode encodings.
25
+ # @param regexp [Regexp] Regular expression without delimiters or modifiers.
26
+ # Supports everything Ruby Regexp supports
27
+ def call(regexp: nil, **)
28
+ puts Unisec::Rugrep.regrep_display(regexp)
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+
6
+ module Unisec
7
+ module CLI
8
+ module Commands
9
+ # CLI command `unisec size` for the class {Unisec::Size} from the lib.
10
+ #
11
+ # Example:
12
+ #
13
+ # ```plaintext
14
+ # $ unisec size 🧑🏼‍🔬
15
+ # Code point(s): 4
16
+ # Grapheme(s): 1
17
+ # UTF-8 byte(s): 15
18
+ # UTF-16 byte(s): 14
19
+ # UTF-32 byte(s): 16
20
+ # UTF-8 unit(s): 15
21
+ # UTF-16 unit(s): 7
22
+ # UTF-32 unit(s): 4
23
+ # ```
24
+ class Size < Dry::CLI::Command
25
+ desc 'All kinf of size information about a Unicode string'
26
+
27
+ argument :input, required: true,
28
+ desc: 'String input'
29
+
30
+ # All kinf of size information about a Unicode string.
31
+ # @param input [String] Input sting we want to know the size of
32
+ def call(input: nil, **)
33
+ puts Unisec::Size.new(input).display
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+
6
+ module Unisec
7
+ module CLI
8
+ module Commands
9
+ # CLI command `unisec versions` for the class {Unisec::Versions} from the lib.
10
+ #
11
+ # Example:
12
+ #
13
+ # ```plaintext
14
+ # $ unisec versions
15
+ # Unicode:
16
+ # Unicode (Ruby) 15.0.0
17
+ # Unicode (twitter_cldr gem) 14.0.0
18
+ # Unicode (unicode-confusable gem) 15.0.0
19
+ # ICU (twitter_cldr gem) 70.1
20
+ # CLDR (twitter_cldr gem) 40
21
+ # Unicode emoji (Ruby) 15.0
22
+ #
23
+ # Gems:
24
+ # unisec 0.0.1
25
+ # twitter_cldr gem 6.11.5
26
+ # unicode-confusable gem 1.9.0
27
+ # ```
28
+ class Versions < Dry::CLI::Command
29
+ desc 'Version of anything related to Unicode as used in unisec'
30
+
31
+ # Version of anything related to Unicode as used in unisec.
32
+ def call(**)
33
+ puts Unisec::Versions.display
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -50,7 +50,7 @@ module Unisec
50
50
  def self.codepoints_display(prop)
51
51
  codepoints = Properties.codepoints(prop)
52
52
  codepoints.each do |cp|
53
- puts "#{Properties.char2codepoint(cp[:char]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
53
+ puts "#{Properties.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
54
54
  end
55
55
  nil
56
56
  end
@@ -158,7 +158,7 @@ module Unisec
158
158
  # @example
159
159
  # Unisec::Properties.char2codepoint('💎') # => "U+1F48E"
160
160
  def self.char2codepoint(chr)
161
- "U+#{format('%.4x', chr.codepoints.first).upcase}"
161
+ Properties.deccp2stdhexcp(chr.codepoints.first)
162
162
  end
163
163
 
164
164
  # Display the code points in Unicode format for the given characters (code points as string)
@@ -174,5 +174,14 @@ module Unisec
174
174
  end
175
175
  out.join(' ')
176
176
  end
177
+
178
+ # Convert from decimal code point to standardized format hexadecimal code point
179
+ # @param int_cp [Integer] Code point in decimal format
180
+ # @return [String] code point in Unicode format
181
+ # @example
182
+ # Unisec::Properties.intcp2stdhexcp(128640) # => "U+1F680"
183
+ def self.deccp2stdhexcp(int_cp)
184
+ "U+#{format('%.4x', int_cp).upcase}"
185
+ end
177
186
  end
178
187
  end
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'twitter_cldr'
4
+ require 'paint'
5
+
6
+ module Unisec
7
+ # Ruby grep : Ruby regular expression search for Unicode code point names
8
+ class Rugrep
9
+ # UCD Derived names file location
10
+ # @see https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedName.txt
11
+ UCD_DERIVEDNAME = File.join(__dir__, '../../data/DerivedName.txt')
12
+
13
+ # Search code points by (Ruby) regexp
14
+ # @param regexp [Regexp] Regular expression without delimiters or modifiers.
15
+ # Supports everything Ruby Regexp supports
16
+ # @return [Array<Hash>] Array of code points (`{char: String, codepoint: Integer, name: String}`)
17
+ # @example
18
+ # Unisec::Rugrep.regrep('snowman|snowflake')
19
+ # # =>
20
+ # # [{:char=>"☃", :codepoint=>9731, :name=>"SNOWMAN"},
21
+ # # {:char=>"⛄", :codepoint=>9924, :name=>"SNOWMAN WITHOUT SNOW"},
22
+ # # {:char=>"⛇", :codepoint=>9927, :name=>"BLACK SNOWMAN"},
23
+ # # {:char=>"❄", :codepoint=>10052, :name=>"SNOWFLAKE"},
24
+ # # {:char=>"❅", :codepoint=>10053, :name=>"TIGHT TRIFOLIATE SNOWFLAKE"},
25
+ # # {:char=>"❆", :codepoint=>10054, :name=>"HEAVY CHEVRON SNOWFLAKE"}]
26
+ # Unisec::Rugrep.regrep('greek small letter \w+')
27
+ # # =>
28
+ # # [{:char=>"ͱ", :codepoint=>881, :name=>"GREEK SMALL LETTER HETA"},
29
+ # # {:char=>"ͳ", :codepoint=>883, :name=>"GREEK SMALL LETTER ARCHAIC SAMPI"},
30
+ # # {:char=>"ͷ", :codepoint=>887, :name=>"GREEK SMALL LETTER PAMPHYLIAN DIGAMMA"},
31
+ # # …]
32
+ def self.regrep(regexp)
33
+ out = []
34
+ file = File.new(UCD_DERIVEDNAME)
35
+ file.each_line(chomp: true) do |line|
36
+ # Skip if the line is empty or a comment
37
+ next if line.empty? || line[0] == '#'
38
+
39
+ # parse the line to extract code point as integer and the name
40
+ cp_int, name = line.split(';')
41
+ cp_int = cp_int.chomp.to_i(16)
42
+ name.lstrip!
43
+ next unless /#{regexp}/i.match?(name) # compiling regexp once is surprisingly not faster
44
+
45
+ out << {
46
+ char: TwitterCldr::Utils::CodePoints.to_string([cp_int]),
47
+ codepoint: cp_int,
48
+ name: name
49
+ }
50
+ end
51
+ out
52
+ end
53
+
54
+ # Display a CLI-friendly output listing all code points corresponding to a regular expression.
55
+ # @example
56
+ # Unisec::Rugrep.regrep_display('snowman|snowflake')
57
+ # # =>
58
+ # # U+2603 ☃ SNOWMAN
59
+ # # U+26C4 ⛄ SNOWMAN WITHOUT SNOW
60
+ # # U+26C7 ⛇ BLACK SNOWMAN
61
+ # # U+2744 ❄ SNOWFLAKE
62
+ # # U+2745 ❅ TIGHT TRIFOLIATE SNOWFLAKE
63
+ # # U+2746 ❆ HEAVY CHEVRON SNOWFLAKE
64
+ def self.regrep_display(regexp)
65
+ codepoints = regrep(regexp)
66
+ codepoints.each do |cp|
67
+ puts "#{Properties.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
68
+ end
69
+ nil
70
+ end
71
+
72
+ # Returns the version of Unicode used in UCD local file (data/DerivedName.txt)
73
+ # @return [String] Unicode version
74
+ # @example
75
+ # Unisec::Rugrep.ucd_derivedname_version # => "15.1.0"
76
+ def self.ucd_derivedname_version
77
+ first_line = File.open(UCD_DERIVEDNAME, &:readline)
78
+ first_line.match(/-(\d+\.\d+\.\d+)\.txt/).captures.first
79
+ end
80
+
81
+ # Search code points by (Ruby) regexp
82
+ # @param regexp [Regexp] Regular expression without delimiters or modifiers
83
+ # @return [Array<Hash>] Array of code points (`{char: String, codepoint: Integer, name: String}`)
84
+ # @example
85
+ # Unisec::Rugrep.regrep_slow('snowman|snowflake')
86
+ # # =>
87
+ # # [{:char=>"☃", :codepoint=>9731, :name=>"SNOWMAN"},
88
+ # # {:char=>"⛄", :codepoint=>9924, :name=>"SNOWMAN WITHOUT SNOW"},
89
+ # # {:char=>"⛇", :codepoint=>9927, :name=>"BLACK SNOWMAN"},
90
+ # # {:char=>"❄", :codepoint=>10052, :name=>"SNOWFLAKE"},
91
+ # # {:char=>"❅", :codepoint=>10053, :name=>"TIGHT TRIFOLIATE SNOWFLAKE"},
92
+ # # {:char=>"❆", :codepoint=>10054, :name=>"HEAVY CHEVRON SNOWFLAKE"}]
93
+ # @note ⚠ This command is very time consuming (~ 1min) and unoptimized (execute one regexp per code point…)
94
+ def self.regrep_slow(regexp)
95
+ out = []
96
+ TwitterCldr::Shared::CodePoint.each do |cp|
97
+ next unless /#{regexp}/oi.match?(cp.name) # compiling regexp once is surprisingly not faster
98
+
99
+ out << {
100
+ char: TwitterCldr::Utils::CodePoints.to_string([cp.code_point]),
101
+ codepoint: cp.code_point,
102
+ name: cp.name
103
+ }
104
+ end
105
+ out
106
+ end
107
+
108
+ # Display a CLI-friendly output listing all code points corresponding to a regular expression.
109
+ # @example
110
+ # Unisec::Rugrep.regrep_display_slow('snowman|snowflake')
111
+ # # =>
112
+ # # U+2603 ☃ SNOWMAN
113
+ # # U+26C4 ⛄ SNOWMAN WITHOUT SNOW
114
+ # # U+26C7 ⛇ BLACK SNOWMAN
115
+ # # U+2744 ❄ SNOWFLAKE
116
+ # # U+2745 ❅ TIGHT TRIFOLIATE SNOWFLAKE
117
+ # # U+2746 ❆ HEAVY CHEVRON SNOWFLAKE
118
+ def self.regrep_display_slow(regexp)
119
+ codepoints = regrep_slow(regexp)
120
+ codepoints.each do |cp|
121
+ puts "#{Properties.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
122
+ end
123
+ nil
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'paint'
4
+
5
+ module Unisec
6
+ # All kinf of size information about a Unicode string
7
+ class Size
8
+ # Number of code points
9
+ # @return [Integer] number of code points
10
+ # @example
11
+ # us = Unisec::Size.new('👩‍❤️‍👩')
12
+ # us.code_points_size # => 6
13
+ attr_reader :code_points_size
14
+
15
+ # Number of graphemes
16
+ # @return [Integer] number of graphemes
17
+ # @example
18
+ # us = Unisec::Size.new('👩‍❤️‍👩')
19
+ # us.grapheme_size # => 1
20
+ attr_reader :grapheme_size
21
+
22
+ # UTF-8 size in bytes
23
+ # @return [Integer] UTF-8 size in bytes
24
+ # @example
25
+ # us = Unisec::Size.new('👩‍❤️‍👩')
26
+ # us.utf8_bytesize # => 20
27
+ attr_reader :utf8_bytesize
28
+
29
+ # UTF-16 size in bytes
30
+ # @return [Integer] UTF-16 size in bytes
31
+ # @example
32
+ # us = Unisec::Size.new('👩‍❤️‍👩')
33
+ # us.utf16_bytesize # => 16
34
+ attr_reader :utf16_bytesize
35
+
36
+ # UTF-32 size in bytes
37
+ # @return [Integer] UTF-32 size in bytes
38
+ # @example
39
+ # us = Unisec::Size.new('👩‍❤️‍👩')
40
+ # us.utf32_bytesize # => 24
41
+ attr_reader :utf32_bytesize
42
+
43
+ # Number of UTF-8 units
44
+ # @return [Integer] number of UTF-8 units
45
+ # @example
46
+ # us = Unisec::Size.new('👩‍❤️‍👩')
47
+ # us.utf8_unitsize # => 20
48
+ attr_reader :utf8_unitsize
49
+
50
+ # Number of UTF-16 units
51
+ # @return [Integer] number of UTF-16 units
52
+ # @example
53
+ # us = Unisec::Size.new('👩‍❤️‍👩')
54
+ # us.utf16_unitsize # => 8
55
+ attr_reader :utf16_unitsize
56
+
57
+ # Number of UTF-32 units
58
+ # @return [Integer] number of UTF-32 units
59
+ # @example
60
+ # us = Unisec::Size.new('👩‍❤️‍👩')
61
+ # us.utf32_unitsize # => 6
62
+ attr_reader :utf32_unitsize
63
+
64
+ def initialize(str)
65
+ @code_points_size = Size.code_points_size(str)
66
+ @grapheme_size = Size.grapheme_size(str)
67
+ @utf8_bytesize = Size.utf8_bytesize(str)
68
+ @utf16_bytesize = Size.utf16_bytesize(str)
69
+ @utf32_bytesize = Size.utf32_bytesize(str)
70
+ @utf8_unitsize = Size.utf8_unitsize(str)
71
+ @utf16_unitsize = Size.utf16_unitsize(str)
72
+ @utf32_unitsize = Size.utf32_unitsize(str)
73
+ end
74
+
75
+ # Number of code points
76
+ # @param str [String] Input sting we want to know the size of
77
+ # @return [Integer] number of code points
78
+ # @example
79
+ # Unisec::Size.code_points_size('👩‍❤️‍👩') # => 6
80
+ def self.code_points_size(str)
81
+ str.size
82
+ end
83
+
84
+ # Number of graphemes
85
+ # @param str [String] Input sting we want to know the size of
86
+ # @return [Integer] number of graphemes
87
+ # @example
88
+ # Unisec::Size.grapheme_size('👩‍❤️‍👩') # => 1
89
+ def self.grapheme_size(str)
90
+ str.grapheme_clusters.size
91
+ end
92
+
93
+ # UTF-8 size in bytes
94
+ # @param str [String] Input sting we want to know the size of
95
+ # @return [Integer] UTF-8 size in bytes
96
+ # @example
97
+ # Unisec::Size.utf8_bytesize('👩‍❤️‍👩') # => 20
98
+ def self.utf8_bytesize(str)
99
+ str.bytesize
100
+ end
101
+
102
+ # UTF-16 size in bytes
103
+ # @param str [String] Input sting we want to know the size of
104
+ # @return [Integer] UTF-16 size in bytes
105
+ # @example
106
+ # Unisec::Size.utf16_bytesize('👩‍❤️‍👩') # => 16
107
+ def self.utf16_bytesize(str)
108
+ str.encode('UTF-16BE').bytesize
109
+ end
110
+
111
+ # UTF-32 size in bytes
112
+ # @param str [String] Input sting we want to know the size of
113
+ # @return [Integer] UTF-32 size in bytes
114
+ # @example
115
+ # Unisec::Size.utf32_bytesize('👩‍❤️‍👩') # => 24
116
+ def self.utf32_bytesize(str)
117
+ str.encode('UTF-32BE').bytesize
118
+ end
119
+
120
+ # Number of UTF-8 units
121
+ # @param str [String] Input sting we want to know the size of
122
+ # @return [Integer] number of UTF-8 units
123
+ # @example
124
+ # Unisec::Size.utf8_unitsize('👩‍❤️‍👩') # => 20
125
+ def self.utf8_unitsize(str)
126
+ utf8_bytesize(str)
127
+ end
128
+
129
+ # Number of UTF-16 units
130
+ # @param str [String] Input sting we want to know the size of
131
+ # @return [Integer] number of UTF-16 units
132
+ # @example
133
+ # Unisec::Size.utf16_unitsize('👩‍❤️‍👩') # => 8
134
+ def self.utf16_unitsize(str)
135
+ utf16_bytesize(str) / 2
136
+ end
137
+
138
+ # Number of UTF-32 units
139
+ # @param str [String] Input sting we want to know the size of
140
+ # @return [Integer] number of UTF-32 units
141
+ # @example
142
+ # Unisec::Size.utf32_unitsize('👩‍❤️‍👩') # => 6
143
+ def self.utf32_unitsize(str)
144
+ utf32_bytesize(str) / 4
145
+ end
146
+
147
+ # Display a CLI-friendly output summurizing the size information about a Unicode string.
148
+ # @example
149
+ # Unisec::Size.new('👩‍❤️‍👨').display
150
+ # # =>
151
+ # # Code point(s): 6
152
+ # # Grapheme(s): 1
153
+ # # UTF-8 byte(s): 20
154
+ # # UTF-16 byte(s): 16
155
+ # # UTF-32 byte(s): 24
156
+ # # UTF-8 unit(s): 20
157
+ # # UTF-16 unit(s): 8
158
+ # # UTF-32 unit(s): 6
159
+ def display
160
+ display = ->(key, value) { puts Paint[key, :red, :bold].ljust(27) + " #{value}" }
161
+ display.call('Code point(s):', @code_points_size)
162
+ display.call('Grapheme(s):', @grapheme_size)
163
+ display.call('UTF-8 byte(s):', @utf8_bytesize)
164
+ display.call('UTF-16 byte(s):', @utf16_bytesize)
165
+ display.call('UTF-32 byte(s):', @utf32_bytesize)
166
+ display.call('UTF-8 unit(s):', @utf8_unitsize)
167
+ display.call('UTF-16 unit(s):', @utf16_unitsize)
168
+ display.call('UTF-32 unit(s):', @utf32_unitsize)
169
+ end
170
+ end
171
+ end
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Unisec
4
4
  # Version of unisec library and app
5
- VERSION = '0.0.1'
5
+ VERSION = '0.0.3'
6
6
  end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'twitter_cldr'
4
+ require 'unicode/confusable'
5
+ require 'paint'
6
+
7
+ module Unisec
8
+ # Version information related to Unicode used in Unisec
9
+ class Versions
10
+ # Version and label of anything related to Unicode used in Unisec
11
+ # @return [Hash] versions of each component
12
+ # @example
13
+ # Unisec::Versions.versions
14
+ # # =>
15
+ # # {:unisec=>{:version=>"0.0.1", :label=>"unisec"},
16
+ # # … }
17
+ def self.versions # rubocop:disable Metrics/MethodLength
18
+ {
19
+ unisec: {
20
+ version: Unisec::VERSION,
21
+ label: 'unisec'
22
+ },
23
+ ruby_unicode: {
24
+ version: RbConfig::CONFIG['UNICODE_VERSION'],
25
+ label: 'Unicode (Ruby)'
26
+ },
27
+ ruby_unicode_emoji: {
28
+ version: RbConfig::CONFIG['UNICODE_EMOJI_VERSION'],
29
+ label: 'Unicode emoji (Ruby)'
30
+ },
31
+ twittercldr_cldr: {
32
+ version: TwitterCldr::Versions::CLDR_VERSION,
33
+ label: 'CLDR (twitter_cldr gem)'
34
+ },
35
+ twittercldr_icu: {
36
+ version: TwitterCldr::Versions::ICU_VERSION,
37
+ label: 'ICU (twitter_cldr gem)'
38
+ },
39
+ twittercldr_unicode: {
40
+ version: TwitterCldr::Versions::UNICODE_VERSION,
41
+ label: 'Unicode (twitter_cldr gem)'
42
+ },
43
+ twittercldr: {
44
+ version: TwitterCldr::VERSION,
45
+ label: 'twitter_cldr gem'
46
+ },
47
+ unicodeconfusable: {
48
+ version: Unicode::Confusable::VERSION,
49
+ label: 'unicode-confusable gem'
50
+ },
51
+ unicodeconfusable_unicode: {
52
+ version: Unicode::Confusable::UNICODE_VERSION,
53
+ label: 'Unicode (unicode-confusable gem)'
54
+ },
55
+ ucd_derivedname: {
56
+ version: Unisec::Rugrep.ucd_derivedname_version,
57
+ label: 'UCD (data/DerivedName.txt)'
58
+ }
59
+ }
60
+ end
61
+
62
+ # Display a CLI-friendly output of the version of anything related to Unicode used in unisec
63
+ # @example
64
+ # Unisec::Versions.display
65
+ # # =>
66
+ # # Unicode:
67
+ # # Unicode (Ruby) 15.0.0
68
+ # # …
69
+ # #
70
+ # # Gems:
71
+ # # unisec 0.0.1
72
+ # # …
73
+ def self.display # rubocop:disable Metrics/AbcSize
74
+ data = versions
75
+ display = ->(node) { puts Paint[data[node][:label], :red, :bold].ljust(44) + " #{data[node][:version]}" }
76
+ puts Paint['Unicode:', :underline]
77
+ display.call(:ruby_unicode)
78
+ display.call(:twittercldr_unicode)
79
+ display.call(:unicodeconfusable_unicode)
80
+ display.call(:twittercldr_icu)
81
+ display.call(:twittercldr_cldr)
82
+ display.call(:ruby_unicode_emoji)
83
+ display.call(:ucd_derivedname)
84
+ puts Paint["\nGems:", :underline]
85
+ display.call(:unisec)
86
+ display.call(:twittercldr)
87
+ display.call(:unicodeconfusable)
88
+ end
89
+ end
90
+ end
data/lib/unisec.rb CHANGED
@@ -2,7 +2,10 @@
2
2
 
3
3
  require 'unisec/version'
4
4
 
5
- require 'unisec/surrogates'
5
+ require 'unisec/confusables'
6
6
  require 'unisec/hexdump'
7
7
  require 'unisec/properties'
8
- require 'unisec/confusables'
8
+ require 'unisec/rugrep'
9
+ require 'unisec/size'
10
+ require 'unisec/surrogates'
11
+ require 'unisec/versions'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unisec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexandre ZANNI
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-07-21 00:00:00.000000000 Z
11
+ date: 2023-10-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ctf-party
@@ -86,7 +86,9 @@ dependencies:
86
86
  - - "~>"
87
87
  - !ruby/object:Gem::Version
88
88
  version: '1.9'
89
- description: Toolkit for security research manipulating Unicode
89
+ description: 'Toolkit for security research manipulating Unicode: confusables, homoglyphs,
90
+ hexdump, code point, UTF-8, UTF-16, UTF-32, properties, regexp search, size, grapheme,
91
+ surrogates, version, ICU, CLDR, UCD'
90
92
  email: alexandre.zanni@europe.com
91
93
  executables:
92
94
  - unisec
@@ -95,18 +97,25 @@ extra_rdoc_files: []
95
97
  files:
96
98
  - LICENSE
97
99
  - bin/unisec
100
+ - data/DerivedName.txt
98
101
  - lib/unisec.rb
99
102
  - lib/unisec/cli/cli.rb
100
103
  - lib/unisec/cli/confusables.rb
101
104
  - lib/unisec/cli/hexdump.rb
102
105
  - lib/unisec/cli/properties.rb
106
+ - lib/unisec/cli/rugrep.rb
107
+ - lib/unisec/cli/size.rb
103
108
  - lib/unisec/cli/surrogates.rb
109
+ - lib/unisec/cli/versions.rb
104
110
  - lib/unisec/confusables.rb
105
111
  - lib/unisec/hexdump.rb
106
112
  - lib/unisec/properties.rb
113
+ - lib/unisec/rugrep.rb
114
+ - lib/unisec/size.rb
107
115
  - lib/unisec/surrogates.rb
108
116
  - lib/unisec/utils.rb
109
117
  - lib/unisec/version.rb
118
+ - lib/unisec/versions.rb
110
119
  homepage: https://github.com/Acceis/unisec
111
120
  licenses:
112
121
  - MIT
@@ -136,7 +145,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
136
145
  - !ruby/object:Gem::Version
137
146
  version: '0'
138
147
  requirements: []
139
- rubygems_version: 3.4.1
148
+ rubygems_version: 3.4.10
140
149
  signing_key:
141
150
  specification_version: 4
142
151
  summary: Unicode Security Toolkit