unisec 0.0.8 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fe68d59956b20311ad5d9f5f64c10a6b0648d1c7c146b9fbacf3f25348385207
4
- data.tar.gz: 8f62db4e8a2327e0ac36e1be53fb5c168fb856fd81e9cafec0af1297a451bed8
3
+ metadata.gz: 6cfd875b3a3e0ca562f75674360c22dafa1993a3c5a71cb1b946b777e3afd021
4
+ data.tar.gz: e4773c61cdd9cc7122cc95465b6c2e86ef04e3737ed91e924342d65bef51dd3e
5
5
  SHA512:
6
- metadata.gz: 7eb59fcce432494896adc586f168835578da1ab54f6f64080d4ecc86d91bebd39d569f38e72a6c9c79ea23e303a34315c8aebaf9e2fa2b340d25f234731e82ab
7
- data.tar.gz: 33711c517a93ea3e28b25cde223d94f1cc2cf2edc0db39a3af41d5c6268d5bac51bef4d534306a24df21ea283c325fe052e0723780dcdedd294930e6f1d8eeee
6
+ metadata.gz: 212a2fe8ca988f8a7d839fb3396cb9895cfc4c51cbf07d75a20d8b4c6c202f2190e595c6c50a8c772b4125d2a4a5d418f0069ed3d696dd7b9845075aa8f8d451
7
+ data.tar.gz: 90a4ae3132b55e1b71f0abbbfbb83c58fee2ce1f2bb7d00b4cd62c10b833931a2406a152d0aa3bd9a2d86ddebe3885af3f18991cf77b4aef9b464b3593e1c207
data/lib/unisec/bidi.rb CHANGED
@@ -18,10 +18,10 @@ module Unisec
18
18
  # @param input [String] the target string
19
19
  # @param opts [Hash] optional parameters, see {Spoof.bidi_affix}
20
20
  # @return [String] the target string
21
- def set_target_display(input, **)
21
+ def set_target_display(input, **opts)
22
22
  @target_display = input
23
- @spoof_string = reverse(**)
24
- @spoof_payload = bidi_affix(**)
23
+ @spoof_string = reverse(**opts)
24
+ @spoof_payload = bidi_affix(**opts)
25
25
  @target_display
26
26
  end
27
27
 
data/lib/unisec/blocks.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'paint'
4
+ require 'twitter_cldr'
4
5
  require 'unisec/utils'
5
6
 
6
7
  module Unisec
@@ -205,5 +206,38 @@ module Unisec
205
206
  end
206
207
  nil
207
208
  end
209
+
210
+ # Returns the name of the Unicode block containing the given character.
211
+ # @param char [String] Single character (only one code unit, so be careful with
212
+ # emojis, composed or joint characters using several units, only the first
213
+ # code unit will be kept).
214
+ # @return [String] Block name or empty string if not found.
215
+ # @example
216
+ # Unisec::Blocks.reverse('…') # => "General Punctuation"
217
+ # Unisec::Blocks.reverse('A') # => "Basic Latin"
218
+ # Unisec::Blocks.reverse('💩') # => "Miscellaneous Symbols and Pictographs"
219
+ # Unisec::Blocks.reverse('🇫🇷') # => "Enclosed Alphanumeric Supplement" (only first unit is kept)
220
+ def self.reverse(char)
221
+ cp_num = TwitterCldr::Utils::CodePoints.from_string(char)
222
+ cp = TwitterCldr::Shared::CodePoint.get(cp_num.first)
223
+ props = cp.properties
224
+ props.block.join
225
+ rescue NoMethodError # in case of invalid character where CodePoint.get() => nil
226
+ ''
227
+ end
228
+
229
+ # Display a CLI-friendly output showing the block name for a given character.
230
+ # @param char [String] Single character (only one code unit, so be careful with
231
+ # emojis, composed or joint characters using several units, only the first
232
+ # code unit will be kept).
233
+ def self.reverse_display(char)
234
+ blk_name = reverse(char)
235
+ if blk_name.empty?
236
+ puts "no block found for #{char.inspect}"
237
+ else
238
+ puts blk_name
239
+ end
240
+ nil
241
+ end
208
242
  end
209
243
  end
@@ -60,6 +60,34 @@ module Unisec
60
60
  end
61
61
  end
62
62
 
63
+ # Command `unisec blocks reverse`
64
+ #
65
+ # Example:
66
+ #
67
+ # ```plaintext
68
+ # $ unisec blocks reverse '…'
69
+ # General Punctuation
70
+ # $ unisec blocks reverse 'A'
71
+ # Basic Latin
72
+ # $ unisec blocks reverse '💩'
73
+ # Miscellaneous Symbols and Pictographs
74
+ # $ unisec blocks reverse '🇫🇷'
75
+ # Enclosed Alphanumeric Supplement
76
+ # ```
77
+ class Reverse < Dry::CLI::Command
78
+ desc 'Search in which Unicode block a given character is'
79
+
80
+ argument :char, required: true,
81
+ desc: 'Single character (only one code unit, so be careful with emojis, composed or ' \
82
+ 'joint characters using several units, only the first code unit will be kept)'
83
+
84
+ # Display the Unicode block name for a given character
85
+ # @param char [String] Single character (only one code unit, so be careful with emojis, composed or joint characters using several units, only the first code unit will be kept).
86
+ def call(char: nil, **)
87
+ Unisec::Blocks.reverse_display(char)
88
+ end
89
+ end
90
+
63
91
  # Command `unisec blocks invalid`
64
92
  #
65
93
  # Example:
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'dry/cli/completion/command'
3
4
  require 'unisec/cli/bidi'
4
5
  require 'unisec/cli/blocks'
5
6
  require 'unisec/cli/confusables'
@@ -24,17 +25,23 @@ module Unisec
24
25
  register 'bidi spoof', Bidi::Spoof
25
26
  register 'blocks invalid', Blocks::Invalid
26
27
  register 'blocks list', Blocks::List
28
+ register 'blocks reverse', Blocks::Reverse
27
29
  register 'blocks search', Blocks::Search
30
+ register 'completion', Dry::CLI::Completion::Command[self]
28
31
  register 'confusables list', Confusables::List
29
32
  register 'confusables randomize', Confusables::Randomize
30
33
  register 'dump codepoints integer', Dump::Codepoints::Integer
31
34
  register 'dump codepoints standard', Dump::Codepoints::Standard
32
35
  register 'dump dec', Dump::Dec
33
36
  register 'dump hex', Dump::Hex
37
+ register 'dump rev', Dump::Reverse
34
38
  register 'grep', Grep
35
39
  register 'normalize all', Normalize::All
36
40
  register 'normalize replace', Normalize::Replace
41
+ register 'normalize reverse', Normalize::Reverse
42
+ register 'planes block', Planes::Block
37
43
  register 'planes list', Planes::List
44
+ register 'planes reverse', Planes::Reverse
38
45
  register 'planes search', Planes::Search
39
46
  register 'properties char', Properties::Char
40
47
  register 'properties codepoints', Properties::Codepoints
@@ -127,6 +127,41 @@ module Unisec
127
127
  end
128
128
  end
129
129
  end
130
+
131
+ # CLI command `unisec dump rev` for the method {Unisec::Hexdump.reverse} from the lib.
132
+ #
133
+ # Example:
134
+ #
135
+ # ```plaintext
136
+ # $ unisec dump rev 0a0d --enc=utf16be
137
+ # ਍ (U+0A0D) - 0a0d
138
+ #
139
+ # $ unisec dump rev 808080 --enc=utf8 --exact=false
140
+ # 񀀀 (U+40000) - f1 80 80 80
141
+ # 򀀀 (U+80000) - f2 80 80 80
142
+ # 󀀀 (U+C0000) - f3 80 80 80
143
+ # 􀀀 (U+100000) - f4 80 80 80
144
+ # ```
145
+ class Reverse < Dry::CLI::Command
146
+ desc 'Reverse search in hexadecimal dump'
147
+
148
+ argument :hexbytes, required: true,
149
+ desc: 'Byte(s) in hexadecimal to search for. Read from STDIN if equal to -.'
150
+
151
+ option :enc, default: 'utf8', values: %w[utf8 utf16be utf16le utf32be utf32le],
152
+ desc: 'The target encoding in which to search.'
153
+
154
+ option :exact, default: 'true', values: %w[true false],
155
+ desc: 'true (default) = exact search, false = "sub-string" search / the value is included ' \
156
+ 'in the encoded value'
157
+
158
+ # Search X byte(s) hexadecimal value in Y encoding, basically which characters will give this resulting encoded value
159
+ # @param hexbytes [String] The target encoding in which to search.
160
+ def call(hexbytes: nil, **options)
161
+ hexbytes = $stdin.read.chomp if hexbytes == '-'
162
+ puts Unisec::Hexdump.display_reverse(hexbytes, options[:enc], exact: options[:exact].to_bool)
163
+ end
164
+ end
130
165
  end
131
166
  end
132
167
  end
@@ -81,6 +81,37 @@ module Unisec
81
81
  puts Unisec::Normalization.new(input).display_replace
82
82
  end
83
83
  end
84
+
85
+ # Command `unisec normalize reverse '<'`
86
+ #
87
+ # Example:
88
+ #
89
+ # ```plaintext
90
+ # $ unisec normalize reverse '"' --forms 'nfkc,nfkd'
91
+ # Original:
92
+ # " (U+0022)
93
+ # NFKC
94
+ # " (U+FF02)
95
+ # NFKD
96
+ # " (U+FF02)
97
+ # ```
98
+ class Reverse < Dry::CLI::Command
99
+ desc 'List reverse normalization candidates (what characters will transform into target after normalization)'
100
+
101
+ argument :target, required: true,
102
+ desc: 'Normalization target. Read from STDIN if equal to -.'
103
+
104
+ option :forms, default: %i[nfc nfd nfkc nfkd],
105
+ desc: 'Output only in the specified normalization form(s). ' \
106
+ 'Separate by comma if multiple values.'
107
+
108
+ # Reverse normalize
109
+ # @param target [String] Normalization target
110
+ def call(target: nil, **options)
111
+ target = $stdin.read.chomp if target == '-'
112
+ puts Unisec::Normalization.display_reverse_normalize(target, forms: options[:forms])
113
+ end
114
+ end
84
115
  end
85
116
  end
86
117
  end
@@ -93,6 +93,58 @@ module Unisec
93
93
  with_count: options[:with_count].to_bool)
94
94
  end
95
95
  end
96
+
97
+ # Command `unisec planes reverse`
98
+ #
99
+ # Example:
100
+ #
101
+ # ```plaintext
102
+ # $ unisec planes reverse '…'
103
+ # Basic Multilingual Plane
104
+ # $ unisec planes reverse '🨂'
105
+ # Supplementary Multilingual Plane
106
+ # $ unisec planes reverse '𠀀'
107
+ # Supplementary Ideographic Plane
108
+ # $ unisec planes reverse '🇫🇷'
109
+ # Supplementary Multilingual Plane
110
+ # ```
111
+ class Reverse < Dry::CLI::Command
112
+ desc 'Search in which Unicode plane a given character is'
113
+
114
+ argument :char, required: true,
115
+ desc: 'Single character (only one code unit, so be careful with emojis, composed or joint ' \
116
+ 'characters using several units), only the first code unit will be kept).'
117
+
118
+ # Display the Unicode plane name for a given character
119
+ # @param char [String] Single character (only one code unit, so be careful with emojis,
120
+ # composed or joint characters using several units, only the first code unit will be kept).
121
+ def call(char: nil, **)
122
+ Unisec::Planes.reverse_display(char)
123
+ end
124
+ end
125
+
126
+ # Command `unisec planes block`
127
+ #
128
+ # Example:
129
+ #
130
+ # ```plaintext
131
+ # $ unisec planes block 'Basic Latin'
132
+ # Basic Multilingual Plane
133
+ # $ unisec planes block 'Miscellaneous Symbols and Pictographs'
134
+ # Supplementary Multilingual Plane
135
+ # ```
136
+ class Block < Dry::CLI::Command
137
+ desc 'Search in which Unicode plane a block is'
138
+
139
+ argument :block_arg, required: true,
140
+ desc: 'Block name (case insensitive)'
141
+
142
+ # Display the Unicode plane name for a given block
143
+ # @param block_arg [String] Block name (case insensitive).
144
+ def call(block_arg: nil, **)
145
+ Unisec::Planes.block_display(block_arg)
146
+ end
147
+ end
96
148
  end
97
149
  end
98
150
  end
@@ -85,6 +85,33 @@ module Unisec
85
85
  str.encode('UTF-32LE').to_hex.scan(/.{8}/).join(' ')
86
86
  end
87
87
 
88
+ # Search X byte(s) hexadecimal value in Y encoding, basically which characters will give this resulting encoded value
89
+ # @param hexbytes [String] Byte(s) in hexadecimal to search for
90
+ # @param enc [String] The target encoding in which to search. It uses Unisec CLI argument values (utf8 utf16be utf16le utf32be utf32le).
91
+ # @param exact [TrueClass|FalseClass] true (default) = exact search, false = "sub-string" search / the value is included in the encoded value
92
+ # @return [Array<String>] all matching source characters
93
+ # @example
94
+ # Unisec::Hexdump.reverse('61', 'utf8') # => ["a"]
95
+ # Unisec::Hexdump.reverse('a6', 'utf8', exact: true) # => []
96
+ # Unisec::Hexdump.reverse('a6', 'utf8', exact: false) # => ["¦", "æ", "Ħ", "Ŧ", "Ʀ", "Ǧ", … ]
97
+ # Unisec::Hexdump.reverse('0d0a', 'utf16be', exact: true) # => ["\u0D0A"] (ഊ)
98
+ def self.reverse(hexbytes, enc, exact: true)
99
+ chars = []
100
+ (0x000000..0x10FFFF).each do |i|
101
+ char = i.chr(Unisec::Utils::Arguments.argenc2enc(enc, target: 'class'))
102
+ encoded_value = Unisec::Hexdump.send(enc, char).delete(' ')
103
+ if exact && encoded_value == hexbytes # exact match
104
+ chars << char
105
+ break
106
+ elsif !exact && encoded_value.include?(hexbytes) # includes value
107
+ chars << char
108
+ end
109
+ rescue RangeError # skip invalid code points for selected encoding
110
+ next
111
+ end
112
+ chars
113
+ end
114
+
88
115
  # Display a CLI-friendly output summurizing the hexdump in all Unicode encodings
89
116
  # @return [String] CLI-ready output
90
117
  # @example
@@ -101,5 +128,29 @@ module Unisec
101
128
  "UTF-32BE: #{@utf32be}\n" \
102
129
  "UTF-32LE: #{@utf32le}"
103
130
  end
131
+
132
+ # Display a CLI-friendly output summurizing the reverse hexdump search results
133
+ # @param hexbytes [String] see {Unisec::Hexdump.reverse}
134
+ # @param enc [String] see {Unisec::Hexdump.reverse}
135
+ # @param exact [TrueClass|FalseClass] see {Unisec::Hexdump.reverse}
136
+ # @return [String] CLI-ready output
137
+ # @example
138
+ # puts Unisec::Hexdump.display_reverse('0d0a', 'utf16be', exact: true)
139
+ # # ഊ (U+0D0A) - 0d0a
140
+ # puts Unisec::Hexdump.display_reverse('808080', 'utf8', exact: false)
141
+ # # 񀀀 (U+40000) - f1 80 80 80
142
+ # # 򀀀 (U+80000) - f2 80 80 80
143
+ # # 󀀀 (U+C0000) - f3 80 80 80
144
+ # # 􀀀 (U+100000) - f4 80 80 80
145
+ def self.display_reverse(hexbytes, enc, exact: true)
146
+ res = Unisec::Hexdump.reverse(hexbytes, enc, exact: exact)
147
+ out = ''
148
+ res.each do |char|
149
+ cp = Utils::String.char2codepoint(char)
150
+ hxd = Unisec::Hexdump.send(enc, char)
151
+ out += "#{char.encode('UTF-8')} (#{cp}) - #{hxd}\n"
152
+ end
153
+ out
154
+ end
104
155
  end
105
156
  end
@@ -95,6 +95,35 @@ module Unisec
95
95
  Normalization.replace_bypass(@original)
96
96
  end
97
97
 
98
+ # Find the list of symbols that will transform into a given symbol after normalization
99
+ # @param target [String]
100
+ # @param forms [String|Symbol|Array<Symbol>]
101
+ # @return [Hash] (results won't include input)
102
+ # @example
103
+ # Unisec::Normalization.reverse_normalize('<') # => {nfc: [], nfd: [], nfkc: ["﹤", "<"], nfkd: ["﹤", "<"]}
104
+ # Unisec::Normalization.reverse_normalize('.', forms: [:nfkc, :nfkd]) # => {nfkc: ["․", "﹒", "."], nfkd: ["․", "﹒", "."]}
105
+ # Unisec::Normalization.reverse_normalize('ffi', forms: :nfkc) # => {nfkc: ["ffi"]}
106
+ # Unisec::Normalization.reverse_normalize('≯', forms: 'nfd') # => {nfd: ["≯"]}
107
+ # Unisec::Normalization.reverse_normalize('ô', forms: 'nfc,nfd') # => {nfc: [], nfd: []}
108
+ def self.reverse_normalize(target, forms: %i[nfc nfd nfkc nfkd])
109
+ forms = Utils::Arguments.to_array_of_sym(forms)
110
+ result = {}
111
+ forms.each do |form|
112
+ result[form] = []
113
+ end
114
+
115
+ (0x000000..0x10FFFF).each do |codepoint|
116
+ char = codepoint.chr(Encoding::UTF_8)
117
+ forms.each do |form|
118
+ result[form] << char if (char.unicode_normalize(form) == target) && (char != target)
119
+ end
120
+ rescue RangeError # skip UTF-16 surrogates and potential other invalid code points
121
+ next
122
+ end
123
+
124
+ result
125
+ end
126
+
98
127
  # Display a CLI-friendly output summurizing all normalization forms
99
128
  # @return [String] CLI-ready output
100
129
  # @example
@@ -124,6 +153,18 @@ module Unisec
124
153
 
125
154
  # Display a CLI-friendly output of the XSS payload to bypass HTML escape and
126
155
  # what it does once normalized in NFKC & NFKD.
156
+ # @return [String] CLI-ready output
157
+ # @example
158
+ # $ puts Unisec::Normalization.new('<script>').display_replace
159
+ # # =>
160
+ # # Original: <script>
161
+ # # U+003C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+003E
162
+ # # Bypass payload: <script>
163
+ # # U+FF1C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+FF1E
164
+ # # NFKC: <script>
165
+ # # U+003C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+003E
166
+ # # NFKD: <script>
167
+ # # U+003C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+003E
127
168
  def display_replace
128
169
  colorize = lambda { |form_title, form_attr|
129
170
  "#{Paint[form_title.to_s, :underline,
@@ -135,5 +176,36 @@ module Unisec
135
176
  colorize.call('NFKC', Normalization.nfkc(payload)) +
136
177
  colorize.call('NFKD', Normalization.nfkd(payload))
137
178
  end
179
+
180
+ # Display a CLI-friendly output reverse normalization results
181
+ # @param target [String] see {Unisec::Normalization.reverse_normalize}
182
+ # @param forms [String|Symbol|Array<Symbol>] see {Unisec::Normalization.reverse_normalize}
183
+ # @return [String] CLI-ready output
184
+ # @example
185
+ # puts Unisec::Normalization.display_reverse_normalize('<')
186
+ # # =>
187
+ # # Original:
188
+ # # < (U+003C)
189
+ # # NFKC
190
+ # # ﹤ (U+FE64)
191
+ # # < (U+FF1C)
192
+ # # NFKD
193
+ # # ﹤ (U+FE64)
194
+ # # < (U+FF1C)
195
+ def self.display_reverse_normalize(target, forms: %i[nfc nfd nfkc nfkd]) # rubocop:disable Metrics/AbcSize
196
+ colorize_form = ->(form_title) { Paint[form_title, :underline, :bold] }
197
+ colorize_char = ->(char) { " #{char} (#{Paint[Unisec::Utils::String.chars2codepoints(char), :red]})\n" }
198
+ out = "#{colorize_form.call('Original')}:\n#{colorize_char.call(target)}"
199
+ res = Unisec::Normalization.reverse_normalize(target, forms: forms) # => {nfc: [], nfd: [], nfkc: ["﹤", "<"], nfkd: ["﹤", "<"]}
200
+ res.each_key do |k|
201
+ next if res[k].empty?
202
+
203
+ out += "#{colorize_form.call(k.to_s.upcase)}\n"
204
+ res[k].each do |v|
205
+ out += colorize_char.call(v)
206
+ end
207
+ end
208
+ out
209
+ end
138
210
  end
139
211
  end
data/lib/unisec/planes.rb CHANGED
@@ -220,5 +220,71 @@ module Unisec
220
220
  end
221
221
  nil
222
222
  end
223
+
224
+ # Returns the name of the Unicode plane containing the given character.
225
+ # @param char [String] Single character (only one code unit, so be careful with
226
+ # emojis, composed or joint characters using several units, only the first
227
+ # code unit will be kept).
228
+ # @return [String] Plane name or empty string if not found.
229
+ # @example
230
+ # Unisec::Planes.reverse('…') # => "Basic Multilingual Plane"
231
+ # Unisec::Planes.reverse('🨂') # => "Supplementary Multilingual Plane"
232
+ # Unisec::Planes.reverse('𠀀') # => "Supplementary Ideographic Plane"
233
+ # Unisec::Planes.reverse('🇫🇷') # => "Supplementary Multilingual Plane" (first unit kept)
234
+ def self.reverse(char)
235
+ return '' unless char.is_a?(String)
236
+
237
+ cp = Utils::String.convert_to_integer(char[0])
238
+ PLANES.each do |plane|
239
+ return plane[:name] if plane[:range].include?(cp)
240
+ end
241
+ '' # not found
242
+ end
243
+
244
+ # Display a CLI-friendly output showing the plane name for a given character.
245
+ # @param char [String] Single character (only one code unit, so be careful with
246
+ # emojis, composed or joint characters using several units, only the first
247
+ # code unit will be kept).
248
+ def self.reverse_display(char)
249
+ plane_name = reverse(char)
250
+ if plane_name.empty?
251
+ puts "no plane found for #{char.inspect}"
252
+ else
253
+ puts plane_name
254
+ end
255
+ nil
256
+ end
257
+
258
+ # Returns the name of the Unicode plane containing the given block.
259
+ # @param block_arg [String] Block name (case insensitive).
260
+ # @return [String] Plane name or empty string if not found.
261
+ # @example
262
+ # Unisec::Planes.block('Basic Latin') # => "Basic Multilingual Plane"
263
+ # Unisec::Planes.block('Miscellaneous Symbols and Pictographs') # => "Supplementary Multilingual Plane"
264
+ def self.block(block_arg) # rubocop:disable Metrics/CyclomaticComplexity
265
+ # support only search by block name
266
+ return '' if block_arg.is_a?(Integer)
267
+ return '' if block_arg.is_a?(String) && (block_arg.size == 1 || block_arg.start_with?('U+'))
268
+
269
+ blk = Blocks.block(block_arg, with_count: false)
270
+ return '' unless blk # block name not found
271
+
272
+ PLANES.each do |plane|
273
+ return plane[:name] if plane[:range].cover?(blk[:range])
274
+ end
275
+ '' # not found
276
+ end
277
+
278
+ # Display a CLI-friendly output showing the plane name for a given block.
279
+ # @param block_arg [String] Block name (case insensitive).
280
+ def self.block_display(block_arg)
281
+ plane_name = block(block_arg)
282
+ if plane_name.empty?
283
+ puts "no plane found for block #{block_arg.inspect}"
284
+ else
285
+ puts plane_name
286
+ end
287
+ nil
288
+ end
223
289
  end
224
290
  end
@@ -75,9 +75,10 @@ module Unisec
75
75
  end
76
76
  {
77
77
  age: props.age.join,
78
+ plane: Unisec::Planes.reverse(chr),
78
79
  block: props.block.join,
79
80
  category: categories[1],
80
- subcategory: categories[0],
81
+ subcategory: "#{categories[0]} (#{cp.category})",
81
82
  codepoint: Utils::String.char2codepoint(chr),
82
83
  name: cp.name,
83
84
  script: props.script.join,
@@ -121,6 +122,7 @@ module Unisec
121
122
  display.call('Name:', data[:name])
122
123
  display.call('Code Point:', data[:codepoint] + " (#{Utils::String.convert(chr, :integer)})")
123
124
  puts
125
+ display.call('Plane', data[:plane])
124
126
  display.call('Block:', data[:block])
125
127
  display.call('Category:', data[:category])
126
128
  display.call('Sub-Category:', data[:subcategory])
data/lib/unisec/utils.rb CHANGED
@@ -207,7 +207,7 @@ module Unisec
207
207
  # @example
208
208
  # Unisec::Utils::String.chars2intcodepoints('I 💕 Ruby 💎') # => "73 32 128149 32 82 117 98 121 32 128142"
209
209
  def self.chars2intcodepoints(chrs)
210
- chrs.codepoints.map(&:to_s).join(' ')
210
+ chrs.codepoints.join(' ')
211
211
  end
212
212
 
213
213
  # Convert a string of hex encoded Unicode code points range to actual
@@ -236,12 +236,54 @@ module Unisec
236
236
  module Range
237
237
  # Convert a (integer) range to a range of Unicode code points
238
238
  # @param range [::Range]
239
- # @return [String]
239
+ # @return [::String]
240
240
  # @example
241
241
  # Unisec::Utils::Range.range2codepoint_range(1048576..1114111) # => "U+100000 - U+10FFFF"
242
242
  def self.range2codepoint_range(range)
243
243
  "#{Integer.deccp2stdhexcp(range.begin)} - #{Integer.deccp2stdhexcp(range.end)}"
244
244
  end
245
245
  end
246
+
247
+ module Arguments
248
+ # Converts an argument that is a string, a string of arguments separated by comma, a symbol to an array of symbol.
249
+ # Useful for methods that are expected to work on array of symbols but can receive various format of imputs (e.g. from CLI).
250
+ # @param input [::String|Symbol] (anything else will be returned untransformed)
251
+ # @return [Array<Symbol>] (or anything else if input type is not respected)
252
+ # @example
253
+ # Unisec::Utils::Arguments.to_array_of_sym("arg") # => [:arg]
254
+ # Unisec::Utils::Arguments.to_array_of_sym("a,b,c") # => [:a, :b, :c]
255
+ # Unisec::Utils::Arguments.to_array_of_sym(:snake) # => [:snake]
256
+ # Unisec::Utils::Arguments.to_array_of_sym([:a, :b, :c]) # => [:a, :b, :c]
257
+ def self.to_array_of_sym(input)
258
+ case input
259
+ when ::String # a,b,c => [:a, :b, :c]
260
+ input.split(',').map(&:to_sym)
261
+ when ::Symbol # :a => [:a]
262
+ [input]
263
+ else
264
+ input
265
+ end
266
+ end
267
+
268
+ # Converts encoding name from CLI to encoding name in standard format or Ruby Class
269
+ # @param argenc [::String] Encoding name as used as argument in Unisec CLI (authorized values are: utf8 utf16be utf16le utf32be utf32le).
270
+ # @param target [::String] 'standard' for standard encoding name, 'class' for Ruby class naming
271
+ # @return [::String|Class]
272
+ # @example
273
+ # Unisec::Utils::Arguments.argenc2enc('utf8', target: 'standard') # => "UTF-8"
274
+ # Unisec::Utils::Arguments.argenc2enc('utf16be', target: 'class') # => #<Encoding:UTF-16BE (autoload)>
275
+ def self.argenc2enc(argenc, target: 'standard')
276
+ argument_encodings = %w[utf8 utf16be utf16le utf32be utf32le]
277
+ raise ArgumentError unless argument_encodings.include?(argenc)
278
+
279
+ if target == 'standard'
280
+ argenc.upcase.insert(3, '-')
281
+ elsif target == 'class'
282
+ Encoding.const_get(argenc.upcase.insert(3, '_')) # const_get safe thanks to input whitelist
283
+ else
284
+ raise ArgumentError
285
+ end
286
+ end
287
+ end
246
288
  end
247
289
  end
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Unisec
4
4
  # Version of unisec library and app
5
- VERSION = '0.0.8'
5
+ VERSION = '0.0.10'
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unisec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexandre ZANNI
@@ -29,14 +29,28 @@ dependencies:
29
29
  requirements:
30
30
  - - "~>"
31
31
  - !ruby/object:Gem::Version
32
- version: '1.0'
32
+ version: '1.4'
33
33
  type: :runtime
34
34
  prerelease: false
35
35
  version_requirements: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
- version: '1.0'
39
+ version: '1.4'
40
+ - !ruby/object:Gem::Dependency
41
+ name: dry-cli-completion
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: 2.0.0
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: 2.0.0
40
54
  - !ruby/object:Gem::Dependency
41
55
  name: paint
42
56
  requirement: !ruby/object:Gem::Requirement
@@ -71,14 +85,14 @@ dependencies:
71
85
  requirements:
72
86
  - - "~>"
73
87
  - !ruby/object:Gem::Version
74
- version: '1.12'
88
+ version: '1.13'
75
89
  type: :runtime
76
90
  prerelease: false
77
91
  version_requirements: !ruby/object:Gem::Requirement
78
92
  requirements:
79
93
  - - "~>"
80
94
  - !ruby/object:Gem::Version
81
- version: '1.12'
95
+ version: '1.13'
82
96
  description: 'Toolkit for security research manipulating Unicode: confusables, homoglyphs,
83
97
  hexdump, code point, UTF-8, UTF-16, UTF-32, properties, regexp search, size, grapheme,
84
98
  surrogates, version, ICU, CLDR, UCD, BiDi, normalization'
@@ -137,7 +151,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
137
151
  requirements:
138
152
  - - ">="
139
153
  - !ruby/object:Gem::Version
140
- version: 3.2.0
154
+ version: 3.3.0
141
155
  - - "<"
142
156
  - !ruby/object:Gem::Version
143
157
  version: '5.0'
@@ -147,7 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
147
161
  - !ruby/object:Gem::Version
148
162
  version: '0'
149
163
  requirements: []
150
- rubygems_version: 4.0.3
164
+ rubygems_version: 4.0.10
151
165
  specification_version: 4
152
166
  summary: Unicode Security Toolkit
153
167
  test_files: []