RubyGems - unisec - Versions diffs - 0.0.8 → 0.0.9 - Mend

unisec 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/lib/unisec/bidi.rb +3 -3
data/lib/unisec/blocks.rb +34 -0
data/lib/unisec/cli/blocks.rb +28 -0
data/lib/unisec/cli/cli.rb +7 -0
data/lib/unisec/cli/dump.rb +35 -0
data/lib/unisec/cli/normalization.rb +31 -0
data/lib/unisec/cli/planes.rb +52 -0
data/lib/unisec/hexdump.rb +51 -0
data/lib/unisec/normalization.rb +72 -0
data/lib/unisec/planes.rb +66 -0
data/lib/unisec/properties.rb +3 -1
data/lib/unisec/utils.rb +44 -2
data/lib/unisec/version.rb +1 -1
metadata +21 -7

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: fe68d59956b20311ad5d9f5f64c10a6b0648d1c7c146b9fbacf3f25348385207
-  data.tar.gz: 8f62db4e8a2327e0ac36e1be53fb5c168fb856fd81e9cafec0af1297a451bed8
+  metadata.gz: e1c859ae327cc9381cc578456525a9fc0d6e68299f10bce6cd4f6439431a7fc0
+  data.tar.gz: 8c091df7ffc3e8f720ca9e5cee3d022e4cba4876530727150cc8277d61509f7c
 SHA512:
-  metadata.gz: 7eb59fcce432494896adc586f168835578da1ab54f6f64080d4ecc86d91bebd39d569f38e72a6c9c79ea23e303a34315c8aebaf9e2fa2b340d25f234731e82ab
-  data.tar.gz: 33711c517a93ea3e28b25cde223d94f1cc2cf2edc0db39a3af41d5c6268d5bac51bef4d534306a24df21ea283c325fe052e0723780dcdedd294930e6f1d8eeee
+  metadata.gz: 7981fd667521cbccf1c3fdfda8610722fdf9892392568be8bacdd36719109982e07d906c9c4b5c3aff4c90d10252b93460698a3f404348d5dcbd8783124e77cb
+  data.tar.gz: 3b32516d01be17f5d462acade421755c5420f1f2f7d596f972c87d17425a64e06cee0fc7963d916113ea970f6a8882b47aa4e84113d31c992f8cc115c2ea5f59

data/lib/unisec/bidi.rb CHANGED Viewed

@@ -18,10 +18,10 @@ module Unisec
       # @param input [String] the target string
       # @param opts [Hash] optional parameters, see {Spoof.bidi_affix}
       # @return [String] the target string
-      def set_target_display(input, **)
+      def set_target_display(input, **opts)
         @target_display = input
-        @spoof_string = reverse(**)
-        @spoof_payload = bidi_affix(**)
+        @spoof_string = reverse(**opts)
+        @spoof_payload = bidi_affix(**opts)
         @target_display
       end

data/lib/unisec/blocks.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 require 'paint'
+require 'twitter_cldr'
 require 'unisec/utils'
 module Unisec
@@ -205,5 +206,38 @@ module Unisec
       end
       nil
     end
+    # Returns the name of the Unicode block containing the given character.
+    # @param char [String] Single character (only one code unit, so be careful with
+    #   emojis, composed or joint characters using several units, only the first
+    #   code unit will be kept).
+    # @return [String] Block name or empty string if not found.
+    # @example
+    #   Unisec::Blocks.reverse('…') # => "General Punctuation"
+    #   Unisec::Blocks.reverse('A') # => "Basic Latin"
+    #   Unisec::Blocks.reverse('💩') # => "Miscellaneous Symbols and Pictographs"
+    #   Unisec::Blocks.reverse('🇫🇷') # => "Enclosed Alphanumeric Supplement" (only first unit is kept)
+    def self.reverse(char)
+      cp_num = TwitterCldr::Utils::CodePoints.from_string(char)
+      cp = TwitterCldr::Shared::CodePoint.get(cp_num.first)
+      props = cp.properties
+      props.block.join
+    rescue NoMethodError # in case of invalid character where CodePoint.get() => nil
+      ''
+    end
+    # Display a CLI-friendly output showing the block name for a given character.
+    # @param char [String] Single character (only one code unit, so be careful with
+    #   emojis, composed or joint characters using several units, only the first
+    #   code unit will be kept).
+    def self.reverse_display(char)
+      blk_name = reverse(char)
+      if blk_name.empty?
+        puts "no block found for #{char.inspect}"
+      else
+        puts blk_name
+      end
+      nil
+    end
   end
 end

data/lib/unisec/cli/blocks.rb CHANGED Viewed

@@ -60,6 +60,34 @@ module Unisec
           end
         end
+        # Command `unisec blocks reverse`
+        #
+        # Example:
+        #
+        # ```plaintext
+        # $ unisec blocks reverse '…'
+        # General Punctuation
+        # $ unisec blocks reverse 'A'
+        # Basic Latin
+        # $ unisec blocks reverse '💩'
+        # Miscellaneous Symbols and Pictographs
+        # $ unisec blocks reverse '🇫🇷'
+        # Enclosed Alphanumeric Supplement
+        # ```
+        class Reverse < Dry::CLI::Command
+          desc 'Search in which Unicode block a given character is'
+          argument :char, required: true,
+                          desc: 'Single character (only one code unit, so be careful with emojis, composed or ' \
+                                'joint characters using several units, only the first code unit will be kept)'
+          # Display the Unicode block name for a given character
+          # @param char [String] Single character (only one code unit, so be careful with emojis, composed or joint characters using several units, only the first code unit will be kept).
+          def call(char: nil, **)
+            Unisec::Blocks.reverse_display(char)
+          end
+        end
         # Command `unisec blocks invalid`
         #
         # Example:

data/lib/unisec/cli/cli.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'dry/cli/completion/command'
 require 'unisec/cli/bidi'
 require 'unisec/cli/blocks'
 require 'unisec/cli/confusables'
@@ -24,17 +25,23 @@ module Unisec
       register 'bidi spoof', Bidi::Spoof
       register 'blocks invalid', Blocks::Invalid
       register 'blocks list', Blocks::List
+      register 'blocks reverse', Blocks::Reverse
       register 'blocks search', Blocks::Search
+      register 'completion', Dry::CLI::Completion::Command[self]
       register 'confusables list', Confusables::List
       register 'confusables randomize', Confusables::Randomize
       register 'dump codepoints integer', Dump::Codepoints::Integer
       register 'dump codepoints standard', Dump::Codepoints::Standard
       register 'dump dec', Dump::Dec
       register 'dump hex', Dump::Hex
+      register 'dump rev', Dump::Reverse
       register 'grep', Grep
       register 'normalize all', Normalize::All
       register 'normalize replace', Normalize::Replace
+      register 'normalize reverse', Normalize::Reverse
+      register 'planes block', Planes::Block
       register 'planes list', Planes::List
+      register 'planes reverse', Planes::Reverse
       register 'planes search', Planes::Search
       register 'properties char', Properties::Char
       register 'properties codepoints', Properties::Codepoints

data/lib/unisec/cli/dump.rb CHANGED Viewed

@@ -127,6 +127,41 @@ module Unisec
             end
           end
         end
+        # CLI command `unisec dump rev` for the method {Unisec::Hexdump.reverse} from the lib.
+        #
+        # Example:
+        #
+        # ```plaintext
+        # $ unisec dump rev 0a0d --enc=utf16be
+        # ਍ (U+0A0D) - 0a0d
+        #
+        # $ unisec dump rev 808080 --enc=utf8 --exact=false
+        # 񀀀 (U+40000) - f1 80 80 80
+        # 򀀀 (U+80000) - f2 80 80 80
+        # 󀀀 (U+C0000) - f3 80 80 80
+        # 􀀀 (U+100000) - f4 80 80 80
+        # ```
+        class Reverse < Dry::CLI::Command
+          desc 'Reverse search in hexadecimal dump'
+          argument :hexbytes, required: true,
+                              desc: 'Byte(s) in hexadecimal to search for. Read from STDIN if equal to -.'
+          option :enc, default: 'utf8', values: %w[utf8 utf16be utf16le utf32be utf32le],
+                       desc: 'The target encoding in which to search.'
+          option :exact, default: 'true', values: %w[true false],
+                         desc: 'true (default) = exact search, false = "sub-string" search / the value is included ' \
+                               'in the encoded value'
+          # Search X byte(s) hexadecimal value in Y encoding, basically which characters will give this resulting encoded value
+          # @param hexbytes [String] The target encoding in which to search.
+          def call(hexbytes: nil, **options)
+            hexbytes = $stdin.read.chomp if hexbytes == '-'
+            puts Unisec::Hexdump.display_reverse(hexbytes, options[:enc], exact: options[:exact].to_bool)
+          end
+        end
       end
     end
   end

data/lib/unisec/cli/normalization.rb CHANGED Viewed

@@ -81,6 +81,37 @@ module Unisec
             puts Unisec::Normalization.new(input).display_replace
           end
         end
+        # Command `unisec normalize reverse '<'`
+        #
+        # Example:
+        #
+        # ```plaintext
+        # $ unisec normalize reverse '"' --forms 'nfkc,nfkd'
+        # Original:
+        #   " (U+0022)
+        # NFKC
+        #   ＂ (U+FF02)
+        # NFKD
+        #   ＂ (U+FF02)
+        # ```
+        class Reverse < Dry::CLI::Command
+          desc 'List reverse normalization candidates (what characters will transform into target after normalization)'
+          argument :target, required: true,
+                            desc: 'Normalization target. Read from STDIN if equal to -.'
+          option :forms, default: %i[nfc nfd nfkc nfkd],
+                         desc: 'Output only in the specified normalization form(s). ' \
+                               'Separate by comma if multiple values.'
+          # Reverse normalize
+          # @param target [String] Normalization target
+          def call(target: nil, **options)
+            target = $stdin.read.chomp if target == '-'
+            puts Unisec::Normalization.display_reverse_normalize(target, forms: options[:forms])
+          end
+        end
       end
     end
   end

data/lib/unisec/cli/planes.rb CHANGED Viewed

@@ -93,6 +93,58 @@ module Unisec
                                                     with_count: options[:with_count].to_bool)
           end
         end
+        # Command `unisec planes reverse`
+        #
+        # Example:
+        #
+        # ```plaintext
+        # $ unisec planes reverse '…'
+        # Basic Multilingual Plane
+        # $ unisec planes reverse '🨂'
+        # Supplementary Multilingual Plane
+        # $ unisec planes reverse '𠀀'
+        # Supplementary Ideographic Plane
+        # $ unisec planes reverse '🇫🇷'
+        # Supplementary Multilingual Plane
+        # ```
+        class Reverse < Dry::CLI::Command
+          desc 'Search in which Unicode plane a given character is'
+          argument :char, required: true,
+                          desc: 'Single character (only one code unit, so be careful with emojis, composed or joint ' \
+                                'characters using several units), only the first code unit will be kept).'
+          # Display the Unicode plane name for a given character
+          # @param char [String] Single character (only one code unit, so be careful with emojis,
+          #   composed or joint characters using several units, only the first code unit will be kept).
+          def call(char: nil, **)
+            Unisec::Planes.reverse_display(char)
+          end
+        end
+        # Command `unisec planes block`
+        #
+        # Example:
+        #
+        # ```plaintext
+        # $ unisec planes block 'Basic Latin'
+        # Basic Multilingual Plane
+        # $ unisec planes block 'Miscellaneous Symbols and Pictographs'
+        # Supplementary Multilingual Plane
+        # ```
+        class Block < Dry::CLI::Command
+          desc 'Search in which Unicode plane a block is'
+          argument :block_arg, required: true,
+                               desc: 'Block name (case insensitive)'
+          # Display the Unicode plane name for a given block
+          # @param block_arg [String] Block name (case insensitive).
+          def call(block_arg: nil, **)
+            Unisec::Planes.block_display(block_arg)
+          end
+        end
       end
     end
   end

data/lib/unisec/hexdump.rb CHANGED Viewed

@@ -85,6 +85,33 @@ module Unisec
       str.encode('UTF-32LE').to_hex.scan(/.{8}/).join(' ')
     end
+    # Search X byte(s) hexadecimal value in Y encoding, basically which characters will give this resulting encoded value
+    # @param hexbytes [String] Byte(s) in hexadecimal to search for
+    # @param enc [String] The target encoding in which to search. It uses Unisec CLI argument values (utf8 utf16be utf16le utf32be utf32le).
+    # @param exact [TrueClass|FalseClass] true (default) = exact search, false = "sub-string" search / the value is included in the encoded value
+    # @return [Array<String>] all matching source characters
+    # @example
+    #   Unisec::Hexdump.reverse('61', 'utf8') # => ["a"]
+    #   Unisec::Hexdump.reverse('a6', 'utf8', exact: true) # => []
+    #   Unisec::Hexdump.reverse('a6', 'utf8', exact: false) # => ["¦",  "æ",  "Ħ",  "Ŧ",  "Ʀ", "Ǧ", … ]
+    #   Unisec::Hexdump.reverse('0d0a', 'utf16be', exact: true) # => ["\u0D0A"] (ഊ)
+    def self.reverse(hexbytes, enc, exact: true)
+      chars = []
+      (0x000000..0x10FFFF).each do |i|
+        char = i.chr(Unisec::Utils::Arguments.argenc2enc(enc, target: 'class'))
+        encoded_value = Unisec::Hexdump.send(enc, char).delete(' ')
+        if exact && encoded_value == hexbytes # exact match
+          chars << char
+          break
+        elsif !exact && encoded_value.include?(hexbytes) # includes value
+          chars << char
+        end
+      rescue RangeError # skip invalid code points for selected encoding
+        next
+      end
+      chars
+    end
     # Display a CLI-friendly output summurizing the hexdump in all Unicode encodings
     # @return [String] CLI-ready output
     # @example
@@ -101,5 +128,29 @@ module Unisec
         "UTF-32BE: #{@utf32be}\n" \
         "UTF-32LE: #{@utf32le}"
     end
+    # Display a CLI-friendly output summurizing the reverse hexdump search results
+    # @param hexbytes [String] see {Unisec::Hexdump.reverse}
+    # @param enc [String] see {Unisec::Hexdump.reverse}
+    # @param exact [TrueClass|FalseClass] see {Unisec::Hexdump.reverse}
+    # @return [String] CLI-ready output
+    # @example
+    #   puts Unisec::Hexdump.display_reverse('0d0a', 'utf16be', exact: true)
+    #   # ഊ (U+0D0A) - 0d0a
+    #   puts Unisec::Hexdump.display_reverse('808080', 'utf8', exact: false)
+    #   # 񀀀 (U+40000) - f1 80 80 80
+    #   # 򀀀 (U+80000) - f2 80 80 80
+    #   # 󀀀 (U+C0000) - f3 80 80 80
+    #   # 􀀀 (U+100000) - f4 80 80 80
+    def self.display_reverse(hexbytes, enc, exact: true)
+      res = Unisec::Hexdump.reverse(hexbytes, enc, exact: exact)
+      out = ''
+      res.each do |char|
+        cp = Utils::String.char2codepoint(char)
+        hxd = Unisec::Hexdump.send(enc, char)
+        out += "#{char.encode('UTF-8')} (#{cp}) - #{hxd}\n"
+      end
+      out
+    end
   end
 end

data/lib/unisec/normalization.rb CHANGED Viewed

@@ -95,6 +95,35 @@ module Unisec
       Normalization.replace_bypass(@original)
     end
+    # Find the list of symbols that will transform into a given symbol after normalization
+    # @param target [String]
+    # @param forms [String|Symbol|Array<Symbol>]
+    # @return [Hash] (results won't include input)
+    # @example
+    #   Unisec::Normalization.reverse_normalize('<') # => {nfc: [], nfd: [], nfkc: ["﹤", "＜"], nfkd: ["﹤", "＜"]}
+    #   Unisec::Normalization.reverse_normalize('.', forms: [:nfkc, :nfkd]) # => {nfkc: ["․", "﹒", "．"], nfkd: ["․", "﹒", "．"]}
+    #   Unisec::Normalization.reverse_normalize('ffi', forms: :nfkc) # => {nfkc: ["ﬃ"]}
+    #   Unisec::Normalization.reverse_normalize('≯', forms: 'nfd') # => {nfd: ["≯"]}
+    #   Unisec::Normalization.reverse_normalize('ô', forms: 'nfc,nfd') # => {nfc: [], nfd: []}
+    def self.reverse_normalize(target, forms: %i[nfc nfd nfkc nfkd])
+      forms = Utils::Arguments.to_array_of_sym(forms)
+      result = {}
+      forms.each do |form|
+        result[form] = []
+      end
+      (0x000000..0x10FFFF).each do |codepoint|
+        char = codepoint.chr(Encoding::UTF_8)
+        forms.each do |form|
+          result[form] << char if (char.unicode_normalize(form) == target) && (char != target)
+        end
+      rescue RangeError # skip UTF-16 surrogates and potential other invalid code points
+        next
+      end
+      result
+    end
     # Display a CLI-friendly output summurizing all normalization forms
     # @return [String] CLI-ready output
     # @example
@@ -124,6 +153,18 @@ module Unisec
     # Display a CLI-friendly output of the XSS payload to bypass HTML escape and
     # what it does once normalized in NFKC & NFKD.
+    # @return [String] CLI-ready output
+    # @example
+    #   $ puts Unisec::Normalization.new('<script>').display_replace
+    #   # =>
+    #   # Original: <script>
+    #   #   U+003C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+003E
+    #   # Bypass payload: ＜script＞
+    #   #   U+FF1C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+FF1E
+    #   # NFKC: <script>
+    #   #   U+003C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+003E
+    #   # NFKD: <script>
+    #   #   U+003C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+003E
     def display_replace
       colorize = lambda { |form_title, form_attr|
         "#{Paint[form_title.to_s, :underline,
@@ -135,5 +176,36 @@ module Unisec
         colorize.call('NFKC', Normalization.nfkc(payload)) +
         colorize.call('NFKD', Normalization.nfkd(payload))
     end
+    # Display a CLI-friendly output reverse normalization results
+    # @param target [String] see {Unisec::Normalization.reverse_normalize}
+    # @param forms [String|Symbol|Array<Symbol>] see {Unisec::Normalization.reverse_normalize}
+    # @return [String] CLI-ready output
+    # @example
+    #   puts Unisec::Normalization.display_reverse_normalize('<')
+    #   # =>
+    #   # Original:
+    #   #   < (U+003C)
+    #   # NFKC
+    #   #   ﹤ (U+FE64)
+    #   #   ＜ (U+FF1C)
+    #   # NFKD
+    #   #   ﹤ (U+FE64)
+    #   #   ＜ (U+FF1C)
+    def self.display_reverse_normalize(target, forms: %i[nfc nfd nfkc nfkd]) # rubocop:disable Metrics/AbcSize
+      colorize_form = ->(form_title) { Paint[form_title, :underline, :bold] }
+      colorize_char = ->(char) { "  #{char} (#{Paint[Unisec::Utils::String.chars2codepoints(char), :red]})\n" }
+      out = "#{colorize_form.call('Original')}:\n#{colorize_char.call(target)}"
+      res = Unisec::Normalization.reverse_normalize(target, forms: forms) # => {nfc: [], nfd: [], nfkc: ["﹤", "＜"], nfkd: ["﹤", "＜"]}
+      res.each_key do |k|
+        next if res[k].empty?
+        out += "#{colorize_form.call(k.to_s.upcase)}\n"
+        res[k].each do |v|
+          out += colorize_char.call(v)
+        end
+      end
+      out
+    end
   end
 end

data/lib/unisec/planes.rb CHANGED Viewed

@@ -220,5 +220,71 @@ module Unisec
       end
       nil
     end
+    # Returns the name of the Unicode plane containing the given character.
+    # @param char [String] Single character (only one code unit, so be careful with
+    #   emojis, composed or joint characters using several units, only the first
+    #   code unit will be kept).
+    # @return [String] Plane name or empty string if not found.
+    # @example
+    #   Unisec::Planes.reverse('…') # => "Basic Multilingual Plane"
+    #   Unisec::Planes.reverse('🨂') # => "Supplementary Multilingual Plane"
+    #   Unisec::Planes.reverse('𠀀') # => "Supplementary Ideographic Plane"
+    #   Unisec::Planes.reverse('🇫🇷') # => "Supplementary Multilingual Plane" (first unit kept)
+    def self.reverse(char)
+      return '' unless char.is_a?(String)
+      cp = Utils::String.convert_to_integer(char[0])
+      PLANES.each do |plane|
+        return plane[:name] if plane[:range].include?(cp)
+      end
+      '' # not found
+    end
+    # Display a CLI-friendly output showing the plane name for a given character.
+    # @param char [String] Single character (only one code unit, so be careful with
+    #   emojis, composed or joint characters using several units, only the first
+    #   code unit will be kept).
+    def self.reverse_display(char)
+      plane_name = reverse(char)
+      if plane_name.empty?
+        puts "no plane found for #{char.inspect}"
+      else
+        puts plane_name
+      end
+      nil
+    end
+    # Returns the name of the Unicode plane containing the given block.
+    # @param block_arg [String] Block name (case insensitive).
+    # @return [String] Plane name or empty string if not found.
+    # @example
+    #   Unisec::Planes.block('Basic Latin') # => "Basic Multilingual Plane"
+    #   Unisec::Planes.block('Miscellaneous Symbols and Pictographs') # => "Supplementary Multilingual Plane"
+    def self.block(block_arg) # rubocop:disable Metrics/CyclomaticComplexity
+      # support only search by block name
+      return '' if block_arg.is_a?(Integer)
+      return '' if block_arg.is_a?(String) && (block_arg.size == 1 || block_arg.start_with?('U+'))
+      blk = Blocks.block(block_arg, with_count: false)
+      return '' unless blk # block name not found
+      PLANES.each do |plane|
+        return plane[:name] if plane[:range].cover?(blk[:range])
+      end
+      '' # not found
+    end
+    # Display a CLI-friendly output showing the plane name for a given block.
+    # @param block_arg [String] Block name (case insensitive).
+    def self.block_display(block_arg)
+      plane_name = block(block_arg)
+      if plane_name.empty?
+        puts "no plane found for block #{block_arg.inspect}"
+      else
+        puts plane_name
+      end
+      nil
+    end
   end
 end

data/lib/unisec/properties.rb CHANGED Viewed

@@ -75,9 +75,10 @@ module Unisec
       end
       {
         age: props.age.join,
+        plane: Unisec::Planes.reverse(chr),
         block: props.block.join,
         category: categories[1],
-        subcategory: categories[0],
+        subcategory: "#{categories[0]} (#{cp.category})",
         codepoint: Utils::String.char2codepoint(chr),
         name: cp.name,
         script: props.script.join,
@@ -121,6 +122,7 @@ module Unisec
       display.call('Name:', data[:name])
       display.call('Code Point:', data[:codepoint] + " (#{Utils::String.convert(chr, :integer)})")
       puts
+      display.call('Plane', data[:plane])
       display.call('Block:', data[:block])
       display.call('Category:', data[:category])
       display.call('Sub-Category:', data[:subcategory])

data/lib/unisec/utils.rb CHANGED Viewed

@@ -207,7 +207,7 @@ module Unisec
       # @example
       #   Unisec::Utils::String.chars2intcodepoints('I 💕 Ruby 💎') # => "73 32 128149 32 82 117 98 121 32 128142"
       def self.chars2intcodepoints(chrs)
-        chrs.codepoints.map(&:to_s).join(' ')
+        chrs.codepoints.join(' ')
       end
       # Convert a string of hex encoded Unicode code points range to actual
@@ -236,12 +236,54 @@ module Unisec
     module Range
       # Convert a (integer) range to a range of Unicode code points
       # @param range [::Range]
-      # @return [String]
+      # @return [::String]
       # @example
       #   Unisec::Utils::Range.range2codepoint_range(1048576..1114111) # => "U+100000 - U+10FFFF"
       def self.range2codepoint_range(range)
         "#{Integer.deccp2stdhexcp(range.begin)} - #{Integer.deccp2stdhexcp(range.end)}"
       end
     end
+    module Arguments
+      # Converts an argument that is a string, a string of arguments separated by comma, a symbol to an array of symbol.
+      # Useful for methods that are expected to work on array of symbols but can receive various format of imputs (e.g. from CLI).
+      # @param input [::String|Symbol] (anything else will be returned untransformed)
+      # @return [Array<Symbol>] (or anything else if input type is not respected)
+      # @example
+      #   Unisec::Utils::Arguments.to_array_of_sym("arg") # => [:arg]
+      #   Unisec::Utils::Arguments.to_array_of_sym("a,b,c") # => [:a, :b, :c]
+      #   Unisec::Utils::Arguments.to_array_of_sym(:snake) # => [:snake]
+      #   Unisec::Utils::Arguments.to_array_of_sym([:a, :b, :c]) # => [:a, :b, :c]
+      def self.to_array_of_sym(input)
+        case input
+        when ::String # a,b,c => [:a, :b, :c]
+          input.split(',').map(&:to_sym)
+        when ::Symbol # :a => [:a]
+          [input]
+        else
+          input
+        end
+      end
+      # Converts encoding name from CLI to encoding name in standard format or Ruby Class
+      # @param argenc [::String] Encoding name as used as argument in Unisec CLI (authorized values are: utf8 utf16be utf16le utf32be utf32le).
+      # @param target [::String] 'standard' for standard encoding name, 'class' for Ruby class naming
+      # @return [::String|Class]
+      # @example
+      #   Unisec::Utils::Arguments.argenc2enc('utf8', target: 'standard') # => "UTF-8"
+      #   Unisec::Utils::Arguments.argenc2enc('utf16be', target: 'class') # => #<Encoding:UTF-16BE (autoload)>
+      def self.argenc2enc(argenc, target: 'standard')
+        argument_encodings = %w[utf8 utf16be utf16le utf32be utf32le]
+        raise ArgumentError unless argument_encodings.include?(argenc)
+        if target == 'standard'
+          argenc.upcase.insert(3, '-')
+        elsif target == 'class'
+          Encoding.const_get(argenc.upcase.insert(3, '_')) # const_get safe thanks to input whitelist
+        else
+          raise ArgumentError
+        end
+      end
+    end
   end
 end

data/lib/unisec/version.rb CHANGED Viewed

@@ -2,5 +2,5 @@
 module Unisec
   # Version of unisec library and app
-  VERSION = '0.0.8'
+  VERSION = '0.0.9'
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: unisec
 version: !ruby/object:Gem::Version
-  version: 0.0.8
+  version: 0.0.9
 platform: ruby
 authors:
 - Alexandre ZANNI
@@ -29,14 +29,28 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.0'
+        version: '1.4'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.0'
+        version: '1.4'
+- !ruby/object:Gem::Dependency
+  name: dry-cli-completion
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 2.0.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 2.0.0
 - !ruby/object:Gem::Dependency
   name: paint
   requirement: !ruby/object:Gem::Requirement
@@ -71,14 +85,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.12'
+        version: '1.13'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.12'
+        version: '1.13'
 description: 'Toolkit for security research manipulating Unicode: confusables, homoglyphs,
   hexdump, code point, UTF-8, UTF-16, UTF-32, properties, regexp search, size, grapheme,
   surrogates, version, ICU, CLDR, UCD, BiDi, normalization'
@@ -137,7 +151,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: 3.2.0
+      version: 3.3.0
   - - "<"
     - !ruby/object:Gem::Version
       version: '5.0'
@@ -147,7 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 4.0.3
+rubygems_version: 4.0.10
 specification_version: 4
 summary: Unicode Security Toolkit
 test_files: []