RubyGems - unisec - Versions diffs - 0.0.7 → 0.0.9 - Mend

unisec 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/lib/unisec/bidi.rb +3 -3
data/lib/unisec/blocks.rb +35 -1
data/lib/unisec/cli/blocks.rb +28 -0
data/lib/unisec/cli/cli.rb +9 -0
data/lib/unisec/cli/dump.rb +81 -0
data/lib/unisec/cli/normalization.rb +31 -0
data/lib/unisec/cli/planes.rb +52 -0
data/lib/unisec/hexdump.rb +51 -0
data/lib/unisec/normalization.rb +72 -0
data/lib/unisec/planes.rb +66 -0
data/lib/unisec/properties.rb +4 -2
data/lib/unisec/utils.rb +105 -23
data/lib/unisec/version.rb +1 -1
metadata +21 -7

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 9081ac95de968e70cd91438a73b185efee13e33ccf16c40a2791da5963d3d67c
-  data.tar.gz: 99a651d4efc5f6b36ae088ec254e0dc950b5d84db9105dc851f29d51609615f8
+  metadata.gz: e1c859ae327cc9381cc578456525a9fc0d6e68299f10bce6cd4f6439431a7fc0
+  data.tar.gz: 8c091df7ffc3e8f720ca9e5cee3d022e4cba4876530727150cc8277d61509f7c
 SHA512:
-  metadata.gz: ab342720e300cd25e167385f70402e00ef240ed6c422f7aeea666774b4e477f423164fe3de49146bc0f4a3f2565f86effb1465f112085568c65b1625b9d911e5
-  data.tar.gz: bc105f1430c812711727600365db10871e1bd69ad274c7ccc0a2b5e1362676304ce27e5e85c6a988986fe9a82bc6189bf3c14e6ecb577f327dc41bd6923f241d
+  metadata.gz: 7981fd667521cbccf1c3fdfda8610722fdf9892392568be8bacdd36719109982e07d906c9c4b5c3aff4c90d10252b93460698a3f404348d5dcbd8783124e77cb
+  data.tar.gz: 3b32516d01be17f5d462acade421755c5420f1f2f7d596f972c87d17425a64e06cee0fc7963d916113ea970f6a8882b47aa4e84113d31c992f8cc115c2ea5f59

data/lib/unisec/bidi.rb CHANGED Viewed

@@ -18,10 +18,10 @@ module Unisec
       # @param input [String] the target string
       # @param opts [Hash] optional parameters, see {Spoof.bidi_affix}
       # @return [String] the target string
-      def set_target_display(input, **)
+      def set_target_display(input, **opts)
         @target_display = input
-        @spoof_string = reverse(**)
-        @spoof_payload = bidi_affix(**)
+        @spoof_string = reverse(**opts)
+        @spoof_payload = bidi_affix(**opts)
         @target_display
       end

data/lib/unisec/blocks.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 require 'paint'
+require 'twitter_cldr'
 require 'unisec/utils'
 module Unisec
@@ -114,7 +115,7 @@ module Unisec
           if block_arg.size == 1 # is a char (1 code unit, not one grapheme)
             found = true if blk_range.include?(Utils::String.convert_to_integer(block_arg))
           elsif block_arg.start_with?('U+') # string code point
-            found = true if blk_range.include?(Utils::String.stdhexcp2deccp(block_arg))
+            found = true if blk_range.include?(Utils::String.convert(block_arg, :integer))
           elsif blk_name.downcase == block_arg.downcase # block name
             found = true
           end
@@ -205,5 +206,38 @@ module Unisec
       end
       nil
     end
+    # Returns the name of the Unicode block containing the given character.
+    # @param char [String] Single character (only one code unit, so be careful with
+    #   emojis, composed or joint characters using several units, only the first
+    #   code unit will be kept).
+    # @return [String] Block name or empty string if not found.
+    # @example
+    #   Unisec::Blocks.reverse('…') # => "General Punctuation"
+    #   Unisec::Blocks.reverse('A') # => "Basic Latin"
+    #   Unisec::Blocks.reverse('💩') # => "Miscellaneous Symbols and Pictographs"
+    #   Unisec::Blocks.reverse('🇫🇷') # => "Enclosed Alphanumeric Supplement" (only first unit is kept)
+    def self.reverse(char)
+      cp_num = TwitterCldr::Utils::CodePoints.from_string(char)
+      cp = TwitterCldr::Shared::CodePoint.get(cp_num.first)
+      props = cp.properties
+      props.block.join
+    rescue NoMethodError # in case of invalid character where CodePoint.get() => nil
+      ''
+    end
+    # Display a CLI-friendly output showing the block name for a given character.
+    # @param char [String] Single character (only one code unit, so be careful with
+    #   emojis, composed or joint characters using several units, only the first
+    #   code unit will be kept).
+    def self.reverse_display(char)
+      blk_name = reverse(char)
+      if blk_name.empty?
+        puts "no block found for #{char.inspect}"
+      else
+        puts blk_name
+      end
+      nil
+    end
   end
 end

data/lib/unisec/cli/blocks.rb CHANGED Viewed

@@ -60,6 +60,34 @@ module Unisec
           end
         end
+        # Command `unisec blocks reverse`
+        #
+        # Example:
+        #
+        # ```plaintext
+        # $ unisec blocks reverse '…'
+        # General Punctuation
+        # $ unisec blocks reverse 'A'
+        # Basic Latin
+        # $ unisec blocks reverse '💩'
+        # Miscellaneous Symbols and Pictographs
+        # $ unisec blocks reverse '🇫🇷'
+        # Enclosed Alphanumeric Supplement
+        # ```
+        class Reverse < Dry::CLI::Command
+          desc 'Search in which Unicode block a given character is'
+          argument :char, required: true,
+                          desc: 'Single character (only one code unit, so be careful with emojis, composed or ' \
+                                'joint characters using several units, only the first code unit will be kept)'
+          # Display the Unicode block name for a given character
+          # @param char [String] Single character (only one code unit, so be careful with emojis, composed or joint characters using several units, only the first code unit will be kept).
+          def call(char: nil, **)
+            Unisec::Blocks.reverse_display(char)
+          end
+        end
         # Command `unisec blocks invalid`
         #
         # Example:

data/lib/unisec/cli/cli.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
+require 'dry/cli/completion/command'
 require 'unisec/cli/bidi'
 require 'unisec/cli/blocks'
 require 'unisec/cli/confusables'
@@ -24,15 +25,23 @@ module Unisec
       register 'bidi spoof', Bidi::Spoof
       register 'blocks invalid', Blocks::Invalid
       register 'blocks list', Blocks::List
+      register 'blocks reverse', Blocks::Reverse
       register 'blocks search', Blocks::Search
+      register 'completion', Dry::CLI::Completion::Command[self]
       register 'confusables list', Confusables::List
       register 'confusables randomize', Confusables::Randomize
+      register 'dump codepoints integer', Dump::Codepoints::Integer
+      register 'dump codepoints standard', Dump::Codepoints::Standard
       register 'dump dec', Dump::Dec
       register 'dump hex', Dump::Hex
+      register 'dump rev', Dump::Reverse
       register 'grep', Grep
       register 'normalize all', Normalize::All
       register 'normalize replace', Normalize::Replace
+      register 'normalize reverse', Normalize::Reverse
+      register 'planes block', Planes::Block
       register 'planes list', Planes::List
+      register 'planes reverse', Planes::Reverse
       register 'planes search', Planes::Search
       register 'properties char', Properties::Char
       register 'properties codepoints', Properties::Codepoints

data/lib/unisec/cli/dump.rb CHANGED Viewed

@@ -81,6 +81,87 @@ module Unisec
             end
           end
         end
+        module Codepoints
+          # CLI command `unisec dump codepoints standard`.
+          #
+          # Example:
+          #
+          # ```plaintext
+          # $ unisec dump codepoints standard "unicode"
+          # U+0075 U+006E U+0069 U+0063 U+006F U+0064 U+0065
+          # ```
+          class Standard < Dry::CLI::Command
+            desc 'Code point dump (standard format)'
+            argument :input, required: true,
+                             desc: 'String input. Read from STDIN if equal to -.'
+            # Code point dump (standard format).
+            # @param input [String] Input string to encode
+            def call(input: nil)
+              input = $stdin.read.chomp if input == '-'
+              puts Unisec::Utils::String.chars2codepoints(input)
+            end
+          end
+          # CLI command `unisec dump codepoints integer`.
+          #
+          # Example:
+          #
+          # ```plaintext
+          # $ unisec dump codepoints integer 'I 💕 Ruby 💎'
+          # 73 32 128149 32 82 117 98 121 32 128142
+          # ```
+          class Integer < Dry::CLI::Command
+            desc 'Code point dump (integer format)'
+            argument :input, required: true,
+                             desc: 'String input. Read from STDIN if equal to -.'
+            # Code point dump (integer format).
+            # @param input [String] Input string to encode
+            def call(input: nil)
+              input = $stdin.read.chomp if input == '-'
+              puts Unisec::Utils::String.chars2intcodepoints(input)
+            end
+          end
+        end
+        # CLI command `unisec dump rev` for the method {Unisec::Hexdump.reverse} from the lib.
+        #
+        # Example:
+        #
+        # ```plaintext
+        # $ unisec dump rev 0a0d --enc=utf16be
+        # ਍ (U+0A0D) - 0a0d
+        #
+        # $ unisec dump rev 808080 --enc=utf8 --exact=false
+        # 񀀀 (U+40000) - f1 80 80 80
+        # 򀀀 (U+80000) - f2 80 80 80
+        # 󀀀 (U+C0000) - f3 80 80 80
+        # 􀀀 (U+100000) - f4 80 80 80
+        # ```
+        class Reverse < Dry::CLI::Command
+          desc 'Reverse search in hexadecimal dump'
+          argument :hexbytes, required: true,
+                              desc: 'Byte(s) in hexadecimal to search for. Read from STDIN if equal to -.'
+          option :enc, default: 'utf8', values: %w[utf8 utf16be utf16le utf32be utf32le],
+                       desc: 'The target encoding in which to search.'
+          option :exact, default: 'true', values: %w[true false],
+                         desc: 'true (default) = exact search, false = "sub-string" search / the value is included ' \
+                               'in the encoded value'
+          # Search X byte(s) hexadecimal value in Y encoding, basically which characters will give this resulting encoded value
+          # @param hexbytes [String] The target encoding in which to search.
+          def call(hexbytes: nil, **options)
+            hexbytes = $stdin.read.chomp if hexbytes == '-'
+            puts Unisec::Hexdump.display_reverse(hexbytes, options[:enc], exact: options[:exact].to_bool)
+          end
+        end
       end
     end
   end

data/lib/unisec/cli/normalization.rb CHANGED Viewed

@@ -81,6 +81,37 @@ module Unisec
             puts Unisec::Normalization.new(input).display_replace
           end
         end
+        # Command `unisec normalize reverse '<'`
+        #
+        # Example:
+        #
+        # ```plaintext
+        # $ unisec normalize reverse '"' --forms 'nfkc,nfkd'
+        # Original:
+        #   " (U+0022)
+        # NFKC
+        #   ＂ (U+FF02)
+        # NFKD
+        #   ＂ (U+FF02)
+        # ```
+        class Reverse < Dry::CLI::Command
+          desc 'List reverse normalization candidates (what characters will transform into target after normalization)'
+          argument :target, required: true,
+                            desc: 'Normalization target. Read from STDIN if equal to -.'
+          option :forms, default: %i[nfc nfd nfkc nfkd],
+                         desc: 'Output only in the specified normalization form(s). ' \
+                               'Separate by comma if multiple values.'
+          # Reverse normalize
+          # @param target [String] Normalization target
+          def call(target: nil, **options)
+            target = $stdin.read.chomp if target == '-'
+            puts Unisec::Normalization.display_reverse_normalize(target, forms: options[:forms])
+          end
+        end
       end
     end
   end

data/lib/unisec/cli/planes.rb CHANGED Viewed

@@ -93,6 +93,58 @@ module Unisec
                                                     with_count: options[:with_count].to_bool)
           end
         end
+        # Command `unisec planes reverse`
+        #
+        # Example:
+        #
+        # ```plaintext
+        # $ unisec planes reverse '…'
+        # Basic Multilingual Plane
+        # $ unisec planes reverse '🨂'
+        # Supplementary Multilingual Plane
+        # $ unisec planes reverse '𠀀'
+        # Supplementary Ideographic Plane
+        # $ unisec planes reverse '🇫🇷'
+        # Supplementary Multilingual Plane
+        # ```
+        class Reverse < Dry::CLI::Command
+          desc 'Search in which Unicode plane a given character is'
+          argument :char, required: true,
+                          desc: 'Single character (only one code unit, so be careful with emojis, composed or joint ' \
+                                'characters using several units), only the first code unit will be kept).'
+          # Display the Unicode plane name for a given character
+          # @param char [String] Single character (only one code unit, so be careful with emojis,
+          #   composed or joint characters using several units, only the first code unit will be kept).
+          def call(char: nil, **)
+            Unisec::Planes.reverse_display(char)
+          end
+        end
+        # Command `unisec planes block`
+        #
+        # Example:
+        #
+        # ```plaintext
+        # $ unisec planes block 'Basic Latin'
+        # Basic Multilingual Plane
+        # $ unisec planes block 'Miscellaneous Symbols and Pictographs'
+        # Supplementary Multilingual Plane
+        # ```
+        class Block < Dry::CLI::Command
+          desc 'Search in which Unicode plane a block is'
+          argument :block_arg, required: true,
+                               desc: 'Block name (case insensitive)'
+          # Display the Unicode plane name for a given block
+          # @param block_arg [String] Block name (case insensitive).
+          def call(block_arg: nil, **)
+            Unisec::Planes.block_display(block_arg)
+          end
+        end
       end
     end
   end

data/lib/unisec/hexdump.rb CHANGED Viewed

@@ -85,6 +85,33 @@ module Unisec
       str.encode('UTF-32LE').to_hex.scan(/.{8}/).join(' ')
     end
+    # Search X byte(s) hexadecimal value in Y encoding, basically which characters will give this resulting encoded value
+    # @param hexbytes [String] Byte(s) in hexadecimal to search for
+    # @param enc [String] The target encoding in which to search. It uses Unisec CLI argument values (utf8 utf16be utf16le utf32be utf32le).
+    # @param exact [TrueClass|FalseClass] true (default) = exact search, false = "sub-string" search / the value is included in the encoded value
+    # @return [Array<String>] all matching source characters
+    # @example
+    #   Unisec::Hexdump.reverse('61', 'utf8') # => ["a"]
+    #   Unisec::Hexdump.reverse('a6', 'utf8', exact: true) # => []
+    #   Unisec::Hexdump.reverse('a6', 'utf8', exact: false) # => ["¦",  "æ",  "Ħ",  "Ŧ",  "Ʀ", "Ǧ", … ]
+    #   Unisec::Hexdump.reverse('0d0a', 'utf16be', exact: true) # => ["\u0D0A"] (ഊ)
+    def self.reverse(hexbytes, enc, exact: true)
+      chars = []
+      (0x000000..0x10FFFF).each do |i|
+        char = i.chr(Unisec::Utils::Arguments.argenc2enc(enc, target: 'class'))
+        encoded_value = Unisec::Hexdump.send(enc, char).delete(' ')
+        if exact && encoded_value == hexbytes # exact match
+          chars << char
+          break
+        elsif !exact && encoded_value.include?(hexbytes) # includes value
+          chars << char
+        end
+      rescue RangeError # skip invalid code points for selected encoding
+        next
+      end
+      chars
+    end
     # Display a CLI-friendly output summurizing the hexdump in all Unicode encodings
     # @return [String] CLI-ready output
     # @example
@@ -101,5 +128,29 @@ module Unisec
         "UTF-32BE: #{@utf32be}\n" \
         "UTF-32LE: #{@utf32le}"
     end
+    # Display a CLI-friendly output summurizing the reverse hexdump search results
+    # @param hexbytes [String] see {Unisec::Hexdump.reverse}
+    # @param enc [String] see {Unisec::Hexdump.reverse}
+    # @param exact [TrueClass|FalseClass] see {Unisec::Hexdump.reverse}
+    # @return [String] CLI-ready output
+    # @example
+    #   puts Unisec::Hexdump.display_reverse('0d0a', 'utf16be', exact: true)
+    #   # ഊ (U+0D0A) - 0d0a
+    #   puts Unisec::Hexdump.display_reverse('808080', 'utf8', exact: false)
+    #   # 񀀀 (U+40000) - f1 80 80 80
+    #   # 򀀀 (U+80000) - f2 80 80 80
+    #   # 󀀀 (U+C0000) - f3 80 80 80
+    #   # 􀀀 (U+100000) - f4 80 80 80
+    def self.display_reverse(hexbytes, enc, exact: true)
+      res = Unisec::Hexdump.reverse(hexbytes, enc, exact: exact)
+      out = ''
+      res.each do |char|
+        cp = Utils::String.char2codepoint(char)
+        hxd = Unisec::Hexdump.send(enc, char)
+        out += "#{char.encode('UTF-8')} (#{cp}) - #{hxd}\n"
+      end
+      out
+    end
   end
 end

data/lib/unisec/normalization.rb CHANGED Viewed

@@ -95,6 +95,35 @@ module Unisec
       Normalization.replace_bypass(@original)
     end
+    # Find the list of symbols that will transform into a given symbol after normalization
+    # @param target [String]
+    # @param forms [String|Symbol|Array<Symbol>]
+    # @return [Hash] (results won't include input)
+    # @example
+    #   Unisec::Normalization.reverse_normalize('<') # => {nfc: [], nfd: [], nfkc: ["﹤", "＜"], nfkd: ["﹤", "＜"]}
+    #   Unisec::Normalization.reverse_normalize('.', forms: [:nfkc, :nfkd]) # => {nfkc: ["․", "﹒", "．"], nfkd: ["․", "﹒", "．"]}
+    #   Unisec::Normalization.reverse_normalize('ffi', forms: :nfkc) # => {nfkc: ["ﬃ"]}
+    #   Unisec::Normalization.reverse_normalize('≯', forms: 'nfd') # => {nfd: ["≯"]}
+    #   Unisec::Normalization.reverse_normalize('ô', forms: 'nfc,nfd') # => {nfc: [], nfd: []}
+    def self.reverse_normalize(target, forms: %i[nfc nfd nfkc nfkd])
+      forms = Utils::Arguments.to_array_of_sym(forms)
+      result = {}
+      forms.each do |form|
+        result[form] = []
+      end
+      (0x000000..0x10FFFF).each do |codepoint|
+        char = codepoint.chr(Encoding::UTF_8)
+        forms.each do |form|
+          result[form] << char if (char.unicode_normalize(form) == target) && (char != target)
+        end
+      rescue RangeError # skip UTF-16 surrogates and potential other invalid code points
+        next
+      end
+      result
+    end
     # Display a CLI-friendly output summurizing all normalization forms
     # @return [String] CLI-ready output
     # @example
@@ -124,6 +153,18 @@ module Unisec
     # Display a CLI-friendly output of the XSS payload to bypass HTML escape and
     # what it does once normalized in NFKC & NFKD.
+    # @return [String] CLI-ready output
+    # @example
+    #   $ puts Unisec::Normalization.new('<script>').display_replace
+    #   # =>
+    #   # Original: <script>
+    #   #   U+003C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+003E
+    #   # Bypass payload: ＜script＞
+    #   #   U+FF1C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+FF1E
+    #   # NFKC: <script>
+    #   #   U+003C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+003E
+    #   # NFKD: <script>
+    #   #   U+003C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+003E
     def display_replace
       colorize = lambda { |form_title, form_attr|
         "#{Paint[form_title.to_s, :underline,
@@ -135,5 +176,36 @@ module Unisec
         colorize.call('NFKC', Normalization.nfkc(payload)) +
         colorize.call('NFKD', Normalization.nfkd(payload))
     end
+    # Display a CLI-friendly output reverse normalization results
+    # @param target [String] see {Unisec::Normalization.reverse_normalize}
+    # @param forms [String|Symbol|Array<Symbol>] see {Unisec::Normalization.reverse_normalize}
+    # @return [String] CLI-ready output
+    # @example
+    #   puts Unisec::Normalization.display_reverse_normalize('<')
+    #   # =>
+    #   # Original:
+    #   #   < (U+003C)
+    #   # NFKC
+    #   #   ﹤ (U+FE64)
+    #   #   ＜ (U+FF1C)
+    #   # NFKD
+    #   #   ﹤ (U+FE64)
+    #   #   ＜ (U+FF1C)
+    def self.display_reverse_normalize(target, forms: %i[nfc nfd nfkc nfkd]) # rubocop:disable Metrics/AbcSize
+      colorize_form = ->(form_title) { Paint[form_title, :underline, :bold] }
+      colorize_char = ->(char) { "  #{char} (#{Paint[Unisec::Utils::String.chars2codepoints(char), :red]})\n" }
+      out = "#{colorize_form.call('Original')}:\n#{colorize_char.call(target)}"
+      res = Unisec::Normalization.reverse_normalize(target, forms: forms) # => {nfc: [], nfd: [], nfkc: ["﹤", "＜"], nfkd: ["﹤", "＜"]}
+      res.each_key do |k|
+        next if res[k].empty?
+        out += "#{colorize_form.call(k.to_s.upcase)}\n"
+        res[k].each do |v|
+          out += colorize_char.call(v)
+        end
+      end
+      out
+    end
   end
 end

data/lib/unisec/planes.rb CHANGED Viewed

@@ -220,5 +220,71 @@ module Unisec
       end
       nil
     end
+    # Returns the name of the Unicode plane containing the given character.
+    # @param char [String] Single character (only one code unit, so be careful with
+    #   emojis, composed or joint characters using several units, only the first
+    #   code unit will be kept).
+    # @return [String] Plane name or empty string if not found.
+    # @example
+    #   Unisec::Planes.reverse('…') # => "Basic Multilingual Plane"
+    #   Unisec::Planes.reverse('🨂') # => "Supplementary Multilingual Plane"
+    #   Unisec::Planes.reverse('𠀀') # => "Supplementary Ideographic Plane"
+    #   Unisec::Planes.reverse('🇫🇷') # => "Supplementary Multilingual Plane" (first unit kept)
+    def self.reverse(char)
+      return '' unless char.is_a?(String)
+      cp = Utils::String.convert_to_integer(char[0])
+      PLANES.each do |plane|
+        return plane[:name] if plane[:range].include?(cp)
+      end
+      '' # not found
+    end
+    # Display a CLI-friendly output showing the plane name for a given character.
+    # @param char [String] Single character (only one code unit, so be careful with
+    #   emojis, composed or joint characters using several units, only the first
+    #   code unit will be kept).
+    def self.reverse_display(char)
+      plane_name = reverse(char)
+      if plane_name.empty?
+        puts "no plane found for #{char.inspect}"
+      else
+        puts plane_name
+      end
+      nil
+    end
+    # Returns the name of the Unicode plane containing the given block.
+    # @param block_arg [String] Block name (case insensitive).
+    # @return [String] Plane name or empty string if not found.
+    # @example
+    #   Unisec::Planes.block('Basic Latin') # => "Basic Multilingual Plane"
+    #   Unisec::Planes.block('Miscellaneous Symbols and Pictographs') # => "Supplementary Multilingual Plane"
+    def self.block(block_arg) # rubocop:disable Metrics/CyclomaticComplexity
+      # support only search by block name
+      return '' if block_arg.is_a?(Integer)
+      return '' if block_arg.is_a?(String) && (block_arg.size == 1 || block_arg.start_with?('U+'))
+      blk = Blocks.block(block_arg, with_count: false)
+      return '' unless blk # block name not found
+      PLANES.each do |plane|
+        return plane[:name] if plane[:range].cover?(blk[:range])
+      end
+      '' # not found
+    end
+    # Display a CLI-friendly output showing the plane name for a given block.
+    # @param block_arg [String] Block name (case insensitive).
+    def self.block_display(block_arg)
+      plane_name = block(block_arg)
+      if plane_name.empty?
+        puts "no plane found for block #{block_arg.inspect}"
+      else
+        puts plane_name
+      end
+      nil
+    end
   end
 end

data/lib/unisec/properties.rb CHANGED Viewed

@@ -75,9 +75,10 @@ module Unisec
       end
       {
         age: props.age.join,
+        plane: Unisec::Planes.reverse(chr),
         block: props.block.join,
         category: categories[1],
-        subcategory: categories[0],
+        subcategory: "#{categories[0]} (#{cp.category})",
         codepoint: Utils::String.char2codepoint(chr),
         name: cp.name,
         script: props.script.join,
@@ -119,8 +120,9 @@ module Unisec
       data = Properties.char(chr)
       display = ->(key, value) { puts Paint[key, :red, :bold].ljust(30) + " #{value}" }
       display.call('Name:', data[:name])
-      display.call('Code Point:', data[:codepoint])
+      display.call('Code Point:', data[:codepoint] + " (#{Utils::String.convert(chr, :integer)})")
       puts
+      display.call('Plane', data[:plane])
       display.call('Block:', data[:block])
       display.call('Category:', data[:category])
       display.call('Sub-Category:', data[:subcategory])

data/lib/unisec/utils.rb CHANGED Viewed

@@ -55,27 +55,31 @@ module Unisec
     # About string conversion and manipulation.
     module String
       # Convert a string input into the chosen type.
-      # @param input [String] If the target type is `:integer`, the string must represent a number encoded in
-      #   hexadecimal, decimal, binary. If it's a Unicode string, only the first code point will be taken into account.
-      # @param target_type [Symbol] Convert to the chosen type. Currently only supports `:integer`.
+      # @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
+      #   The input must represent a character encoded in hexadecimal, decimal, binary or standard code point format.
+      #   See {convert_to_integer} and {convert_to_char} for detailed examples.
+      # @param target_type [Symbol] Convert to the chosen type. Currently only supports `:integer` and `:char`.
       # @return [Variable] The type of the output depends on the chosen `target_type`.
       # @example
       #   Unisec::Utils::String.convert('0x1f4a9', :integer) # => 128169
+      #   Unisec::Utils::String.convert('0x1f4a9', :char) # => "💩"
       def self.convert(input, target_type)
         case target_type
         when :integer
           convert_to_integer(input)
+        when :char
+          convert_to_char(input)
         else
           raise TypeError, "Target type \"#{target_type}\" not avaible"
         end
       end
-      # Internal method used for {.convert}.
+      # Internal method used for {convert}.
       #
       # Convert a string input into integer.
-      # @param input [String] The string must represent a number encoded in hexadecimal, decimal, binary. If it's a
-      #   Unicode string, only the first code point will be taken into account. The input type is determined
-      #   automatically based on the prefix.
+      # @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
+      #   The input must represent a character encoded in hexadecimal, decimal, binary, standard code point format.
+      #   The input type is determined automatically based on the prefix.
       # @return [Integer]
       # @example
       #   # Hexadecimal
@@ -86,10 +90,14 @@ module Unisec
       #   Unisec::Utils::String.convert_to_integer('0b11111010010101001') # => 128169
       #   # Unicode string
       #   Unisec::Utils::String.convert_to_integer('💩') # => 128169
+      #   # Standardized format of hexadecimal code point
+      #   Unisec::Utils::String.convert_to_integer('U+1F4A9') # => 128169
       def self.convert_to_integer(input)
         case autodetect(input)
         when :hexadecimal
           input.hex2dec(prefix: '0x').to_i
+        when :stdcp
+          input.hex2dec(prefix: 'U+').to_i
         when :decimal
           input.to_i
         when :binary
@@ -101,11 +109,38 @@ module Unisec
         end
       end
+      # Internal method used for {convert}.
+      #
+      # Convert a string input into a character.
+      # @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
+      #   The input must represent a character encoded in hexadecimal, decimal, binary, standard code point format.
+      #   The input type is determined automatically based on the prefix.
+      # @return [String]
+      # @example
+      #   # Hexadecimal
+      #   Unisec::Utils::String.convert_to_char('0x1f4a9') # => "💩"
+      #   # Decimal
+      #   Unisec::Utils::String.convert_to_char('0d128169') # => "💩"
+      #   # Binary
+      #   Unisec::Utils::String.convert_to_char('0b11111010010101001') # => "💩"
+      #   # Unicode string
+      #   Unisec::Utils::String.convert_to_char('💩') # => "💩"
+      #   # Standardized format of hexadecimal code point
+      #   Unisec::Utils::String.convert_to_char('U+1F4A9') # => "💩"
+      def self.convert_to_char(input)
+        case autodetect(input)
+        when :hexadecimal, :stdcp, :decimal, :binary, :string
+          [convert(input, :integer)].pack('U')
+        else
+          raise TypeError, "Input \"#{input}\" is not of the expected type"
+        end
+      end
       # Internal method used for {.convert}.
       #
       # Autodetect the representation type of the string input.
       # @param str [String] Input.
-      # @return [Symbol] the detected type: `:hexadecimal`, `:decimal`, `:binary`, `:string`.
+      # @return [Symbol] the detected type: `:hexadecimal`, `:decimal`, `:binary`, `:string`, :stdcp.
       # @example
       #   # Hexadecimal
       #   Unisec::Utils::String.autodetect('0x1f4a9') # => :hexadecimal
@@ -115,10 +150,14 @@ module Unisec
       #   Unisec::Utils::String.autodetect('0b11111010010101001') # => :binary
       #   # Unicode string
       #   Unisec::Utils::String.autodetect('💩') # => :string
+      #   # Standardized format of hexadecimal code point
+      #   Unisec::Utils::String.autodetect('U+1F4A9') # => :stdcp
       def self.autodetect(str)
         case str
-        when /0x[0-9a-fA-F]/
+        when /0x[0-9a-fA-F]+/
           :hexadecimal
+        when /U\+[0-9A-F]+/
+          :stdcp
         when /0d[0-9]+/
           :decimal
         when /0b[0-1]+/
@@ -141,8 +180,9 @@ module Unisec
       # Display the code point in Unicode format for a given character (code point as string)
       # @param chr [String] Unicode code point (as character / string)
       # @return [String] code point in Unicode format
+      # @todo Replace this method by target type :stdcp in String.convert()
       # @example
-      #   Unisec::Properties.char2codepoint('💎') # => "U+1F48E"
+      #   Unisec::Utils::String.char2codepoint('💎') # => "U+1F48E"
       def self.char2codepoint(chr)
         Integer.deccp2stdhexcp(chr.codepoints.first)
       end
@@ -151,8 +191,8 @@ module Unisec
       # @param chrs [String] Unicode code points (as characters / string)
       # @return [String] code points in Unicode format
       # @example
-      #   Unisec::Properties.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
-      #   Unisec::Properties.chars2codepoints("🧑‍🌾") # => "U+1F9D1 U+200D U+1F33E"
+      #   Unisec::Utils::String.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
+      #   Unisec::Utils::String.chars2codepoints("🧑‍🌾") # => "U+1F9D1 U+200D U+1F33E"
       def self.chars2codepoints(chrs)
         out = []
         chrs.each_char do |chr|
@@ -161,6 +201,15 @@ module Unisec
         out.join(' ')
       end
+      # Display the code points in integer format for the given characters (code points as string)
+      # @param chrs [String] Unicode code points (as characters / string)
+      # @return [String] code points in integer format
+      # @example
+      #   Unisec::Utils::String.chars2intcodepoints('I 💕 Ruby 💎') # => "73 32 128149 32 82 117 98 121 32 128142"
+      def self.chars2intcodepoints(chrs)
+        chrs.codepoints.join(' ')
+      end
       # Convert a string of hex encoded Unicode code points range to actual
       # integer Ruby range.
       # @param range_str [String] Unicode code points range as in data/Blocks.txt
@@ -170,22 +219,13 @@ module Unisec
       def self.to_range(range_str)
         ::Range.new(*range_str.split('..').map { |x| x.hex2dec.to_i })
       end
-      # Convert from standardized format hexadecimal code point to decimal code point
-      # @param std_hex_cp [String] Code point in standardized hexadecimal format
-      # @return [Integer] Code point in decimal format
-      # @example
-      #   Unisec::Utils::String.stdhexcp2deccp('U+2026') # => 8230
-      def self.stdhexcp2deccp(std_hex_cp)
-        hex = "0x#{std_hex_cp[2..]}" # replace U+ prefix with 0x
-        convert_to_integer(hex)
-      end
     end
     module Integer
       # Convert from decimal code point to standardized format hexadecimal code point
       # @param int_cp [Integer] Code point in decimal format
       # @return [String] code point in Unicode format
+      # @todo Replace this method by the Integer.convert()
       # @example
       #   Unisec::Utils::Integer.deccp2stdhexcp(128640) # => "U+1F680"
       def self.deccp2stdhexcp(int_cp)
@@ -196,12 +236,54 @@ module Unisec
     module Range
       # Convert a (integer) range to a range of Unicode code points
       # @param range [::Range]
-      # @return [String]
+      # @return [::String]
       # @example
       #   Unisec::Utils::Range.range2codepoint_range(1048576..1114111) # => "U+100000 - U+10FFFF"
       def self.range2codepoint_range(range)
         "#{Integer.deccp2stdhexcp(range.begin)} - #{Integer.deccp2stdhexcp(range.end)}"
       end
     end
+    module Arguments
+      # Converts an argument that is a string, a string of arguments separated by comma, a symbol to an array of symbol.
+      # Useful for methods that are expected to work on array of symbols but can receive various format of imputs (e.g. from CLI).
+      # @param input [::String|Symbol] (anything else will be returned untransformed)
+      # @return [Array<Symbol>] (or anything else if input type is not respected)
+      # @example
+      #   Unisec::Utils::Arguments.to_array_of_sym("arg") # => [:arg]
+      #   Unisec::Utils::Arguments.to_array_of_sym("a,b,c") # => [:a, :b, :c]
+      #   Unisec::Utils::Arguments.to_array_of_sym(:snake) # => [:snake]
+      #   Unisec::Utils::Arguments.to_array_of_sym([:a, :b, :c]) # => [:a, :b, :c]
+      def self.to_array_of_sym(input)
+        case input
+        when ::String # a,b,c => [:a, :b, :c]
+          input.split(',').map(&:to_sym)
+        when ::Symbol # :a => [:a]
+          [input]
+        else
+          input
+        end
+      end
+      # Converts encoding name from CLI to encoding name in standard format or Ruby Class
+      # @param argenc [::String] Encoding name as used as argument in Unisec CLI (authorized values are: utf8 utf16be utf16le utf32be utf32le).
+      # @param target [::String] 'standard' for standard encoding name, 'class' for Ruby class naming
+      # @return [::String|Class]
+      # @example
+      #   Unisec::Utils::Arguments.argenc2enc('utf8', target: 'standard') # => "UTF-8"
+      #   Unisec::Utils::Arguments.argenc2enc('utf16be', target: 'class') # => #<Encoding:UTF-16BE (autoload)>
+      def self.argenc2enc(argenc, target: 'standard')
+        argument_encodings = %w[utf8 utf16be utf16le utf32be utf32le]
+        raise ArgumentError unless argument_encodings.include?(argenc)
+        if target == 'standard'
+          argenc.upcase.insert(3, '-')
+        elsif target == 'class'
+          Encoding.const_get(argenc.upcase.insert(3, '_')) # const_get safe thanks to input whitelist
+        else
+          raise ArgumentError
+        end
+      end
+    end
   end
 end

data/lib/unisec/version.rb CHANGED Viewed

@@ -2,5 +2,5 @@
 module Unisec
   # Version of unisec library and app
-  VERSION = '0.0.7'
+  VERSION = '0.0.9'
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: unisec
 version: !ruby/object:Gem::Version
-  version: 0.0.7
+  version: 0.0.9
 platform: ruby
 authors:
 - Alexandre ZANNI
@@ -29,14 +29,28 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.0'
+        version: '1.4'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.0'
+        version: '1.4'
+- !ruby/object:Gem::Dependency
+  name: dry-cli-completion
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 2.0.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 2.0.0
 - !ruby/object:Gem::Dependency
   name: paint
   requirement: !ruby/object:Gem::Requirement
@@ -71,14 +85,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.12'
+        version: '1.13'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.12'
+        version: '1.13'
 description: 'Toolkit for security research manipulating Unicode: confusables, homoglyphs,
   hexdump, code point, UTF-8, UTF-16, UTF-32, properties, regexp search, size, grapheme,
   surrogates, version, ICU, CLDR, UCD, BiDi, normalization'
@@ -137,7 +151,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: 3.2.0
+      version: 3.3.0
   - - "<"
     - !ruby/object:Gem::Version
       version: '5.0'
@@ -147,7 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 4.0.3
+rubygems_version: 4.0.10
 specification_version: 4
 summary: Unicode Security Toolkit
 test_files: []