RubyGems - unisec - Versions diffs - 0.0.6 → 0.0.8 - Mend

unisec 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

data/lib/unisec/utils.rb CHANGED Viewed

@@ -20,33 +20,66 @@ class Integer
   end
 end
+class String
+  # Convert a string to a boolean
+  # @return [TrueClass|FalseClass]
+  # @example
+  #   "true".to_bool # => true
+  def to_bool
+    case to_s.chomp.downcase
+    when 'true', 'yes', 'y', '1'
+      true
+    when 'false', 'no', 'n', '0'
+      false
+    else
+      raise ArgumentError, "invalid value for Boolean: #{str.inspect}"
+    end
+  end
+end
+class Range
+  # Is a range included in another range? Are all values of range B included in range A?
+  # @param range [Range]
+  # @return [TrueClass|FalseClass]
+  # @example
+  #   (1..10).include_range?(2..11) # => false
+  #   (1..10).include_range?(2..4) # => true
+  def include_range?(range)
+    self.begin <= range.begin && self.end >= range.end
+  end
+end
 module Unisec
   # Generic stuff not Unicode-related that can be re-used.
   module Utils
     # About string conversion and manipulation.
     module String
       # Convert a string input into the chosen type.
-      # @param input [String] If the target type is `:integer`, the string must represent a number encoded in
-      #   hexadecimal, decimal, binary. If it's a Unicode string, only the first code point will be taken into account.
-      # @param target_type [Symbol] Convert to the chosen type. Currently only supports `:integer`.
+      # @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
+      #   The input must represent a character encoded in hexadecimal, decimal, binary or standard code point format.
+      #   See {convert_to_integer} and {convert_to_char} for detailed examples.
+      # @param target_type [Symbol] Convert to the chosen type. Currently only supports `:integer` and `:char`.
       # @return [Variable] The type of the output depends on the chosen `target_type`.
       # @example
       #   Unisec::Utils::String.convert('0x1f4a9', :integer) # => 128169
+      #   Unisec::Utils::String.convert('0x1f4a9', :char) # => "💩"
       def self.convert(input, target_type)
         case target_type
         when :integer
           convert_to_integer(input)
+        when :char
+          convert_to_char(input)
         else
           raise TypeError, "Target type \"#{target_type}\" not avaible"
         end
       end
-      # Internal method used for {.convert}.
+      # Internal method used for {convert}.
       #
       # Convert a string input into integer.
-      # @param input [String] The string must represent a number encoded in hexadecimal, decimal, binary. If it's a
-      #   Unicode string, only the first code point will be taken into account. The input type is determined
-      #   automatically based on the prefix.
+      # @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
+      #   The input must represent a character encoded in hexadecimal, decimal, binary, standard code point format.
+      #   The input type is determined automatically based on the prefix.
       # @return [Integer]
       # @example
       #   # Hexadecimal
@@ -57,10 +90,14 @@ module Unisec
       #   Unisec::Utils::String.convert_to_integer('0b11111010010101001') # => 128169
       #   # Unicode string
       #   Unisec::Utils::String.convert_to_integer('💩') # => 128169
+      #   # Standardized format of hexadecimal code point
+      #   Unisec::Utils::String.convert_to_integer('U+1F4A9') # => 128169
       def self.convert_to_integer(input)
         case autodetect(input)
         when :hexadecimal
           input.hex2dec(prefix: '0x').to_i
+        when :stdcp
+          input.hex2dec(prefix: 'U+').to_i
         when :decimal
           input.to_i
         when :binary
@@ -72,11 +109,38 @@ module Unisec
         end
       end
+      # Internal method used for {convert}.
+      #
+      # Convert a string input into a character.
+      # @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
+      #   The input must represent a character encoded in hexadecimal, decimal, binary, standard code point format.
+      #   The input type is determined automatically based on the prefix.
+      # @return [String]
+      # @example
+      #   # Hexadecimal
+      #   Unisec::Utils::String.convert_to_char('0x1f4a9') # => "💩"
+      #   # Decimal
+      #   Unisec::Utils::String.convert_to_char('0d128169') # => "💩"
+      #   # Binary
+      #   Unisec::Utils::String.convert_to_char('0b11111010010101001') # => "💩"
+      #   # Unicode string
+      #   Unisec::Utils::String.convert_to_char('💩') # => "💩"
+      #   # Standardized format of hexadecimal code point
+      #   Unisec::Utils::String.convert_to_char('U+1F4A9') # => "💩"
+      def self.convert_to_char(input)
+        case autodetect(input)
+        when :hexadecimal, :stdcp, :decimal, :binary, :string
+          [convert(input, :integer)].pack('U')
+        else
+          raise TypeError, "Input \"#{input}\" is not of the expected type"
+        end
+      end
       # Internal method used for {.convert}.
       #
       # Autodetect the representation type of the string input.
       # @param str [String] Input.
-      # @return [Symbol] the detected type: `:hexadecimal`, `:decimal`, `:binary`, `:string`.
+      # @return [Symbol] the detected type: `:hexadecimal`, `:decimal`, `:binary`, `:string`, :stdcp.
       # @example
       #   # Hexadecimal
       #   Unisec::Utils::String.autodetect('0x1f4a9') # => :hexadecimal
@@ -86,10 +150,14 @@ module Unisec
       #   Unisec::Utils::String.autodetect('0b11111010010101001') # => :binary
       #   # Unicode string
       #   Unisec::Utils::String.autodetect('💩') # => :string
+      #   # Standardized format of hexadecimal code point
+      #   Unisec::Utils::String.autodetect('U+1F4A9') # => :stdcp
       def self.autodetect(str)
         case str
-        when /0x[0-9a-fA-F]/
+        when /0x[0-9a-fA-F]+/
           :hexadecimal
+        when /U\+[0-9A-F]+/
+          :stdcp
         when /0d[0-9]+/
           :decimal
         when /0b[0-1]+/
@@ -108,6 +176,72 @@ module Unisec
       def self.grapheme_reverse(str)
         str.grapheme_clusters.reverse.join
       end
+      # Display the code point in Unicode format for a given character (code point as string)
+      # @param chr [String] Unicode code point (as character / string)
+      # @return [String] code point in Unicode format
+      # @todo Replace this method by target type :stdcp in String.convert()
+      # @example
+      #   Unisec::Utils::String.char2codepoint('💎') # => "U+1F48E"
+      def self.char2codepoint(chr)
+        Integer.deccp2stdhexcp(chr.codepoints.first)
+      end
+      # Display the code points in Unicode format for the given characters (code points as string)
+      # @param chrs [String] Unicode code points (as characters / string)
+      # @return [String] code points in Unicode format
+      # @example
+      #   Unisec::Utils::String.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
+      #   Unisec::Utils::String.chars2codepoints("🧑‍🌾") # => "U+1F9D1 U+200D U+1F33E"
+      def self.chars2codepoints(chrs)
+        out = []
+        chrs.each_char do |chr|
+          out << char2codepoint(chr)
+        end
+        out.join(' ')
+      end
+      # Display the code points in integer format for the given characters (code points as string)
+      # @param chrs [String] Unicode code points (as characters / string)
+      # @return [String] code points in integer format
+      # @example
+      #   Unisec::Utils::String.chars2intcodepoints('I 💕 Ruby 💎') # => "73 32 128149 32 82 117 98 121 32 128142"
+      def self.chars2intcodepoints(chrs)
+        chrs.codepoints.map(&:to_s).join(' ')
+      end
+      # Convert a string of hex encoded Unicode code points range to actual
+      # integer Ruby range.
+      # @param range_str [String] Unicode code points range as in data/Blocks.txt
+      # @return [Range]
+      # @example
+      #   Unisec::Utils::String::to_range('0080..00FF') # => 128..255
+      def self.to_range(range_str)
+        ::Range.new(*range_str.split('..').map { |x| x.hex2dec.to_i })
+      end
+    end
+    module Integer
+      # Convert from decimal code point to standardized format hexadecimal code point
+      # @param int_cp [Integer] Code point in decimal format
+      # @return [String] code point in Unicode format
+      # @todo Replace this method by the Integer.convert()
+      # @example
+      #   Unisec::Utils::Integer.deccp2stdhexcp(128640) # => "U+1F680"
+      def self.deccp2stdhexcp(int_cp)
+        "U+#{format('%.4x', int_cp).upcase}"
+      end
+    end
+    module Range
+      # Convert a (integer) range to a range of Unicode code points
+      # @param range [::Range]
+      # @return [String]
+      # @example
+      #   Unisec::Utils::Range.range2codepoint_range(1048576..1114111) # => "U+100000 - U+10FFFF"
+      def self.range2codepoint_range(range)
+        "#{Integer.deccp2stdhexcp(range.begin)} - #{Integer.deccp2stdhexcp(range.end)}"
+      end
     end
   end
 end

data/lib/unisec/version.rb CHANGED Viewed

@@ -2,5 +2,5 @@
 module Unisec
   # Version of unisec library and app
-  VERSION = '0.0.6'
+  VERSION = '0.0.8'
 end

data/lib/unisec/versions.rb CHANGED Viewed

@@ -55,6 +55,10 @@ module Unisec
         ucd_derivedname: {
           version: Unisec::Rugrep.ucd_derivedname_version,
           label: 'UCD (data/DerivedName.txt)'
+        },
+        ucd_blocks: {
+          version: Unisec::Blocks.ucd_blocks_version,
+          label: 'UCD (data/Blocks.txt)'
         }
       }
     end
@@ -81,6 +85,7 @@ module Unisec
         colorize.call(:twittercldr_cldr) +
         colorize.call(:ruby_unicode_emoji) +
         colorize.call(:ucd_derivedname) +
+        colorize.call(:ucd_blocks) +
         Paint["\nGems:\n", :underline] +
         colorize.call(:unisec) +
         colorize.call(:twittercldr) +

data/lib/unisec.rb CHANGED Viewed

@@ -3,9 +3,12 @@
 require 'unisec/version'
 require 'unisec/bidi'
+require 'unisec/blocks'
 require 'unisec/confusables'
+require 'unisec/decdump'
 require 'unisec/hexdump'
 require 'unisec/normalization'
+require 'unisec/planes'
 require 'unisec/properties'
 require 'unisec/rugrep'
 require 'unisec/size'

metadata CHANGED Viewed

@@ -1,14 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: unisec
 version: !ruby/object:Gem::Version
-  version: 0.0.6
+  version: 0.0.8
 platform: ruby
 authors:
 - Alexandre ZANNI
-autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-05-17 00:00:00.000000000 Z
+date: 1980-01-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ctf-party
@@ -16,14 +15,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '3.0'
+        version: '5.0'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '3.0'
+        version: '5.0'
 - !ruby/object:Gem::Dependency
   name: dry-cli
   requirement: !ruby/object:Gem::Requirement
@@ -58,34 +57,28 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '6.11'
-    - - ">="
-      - !ruby/object:Gem::Version
-        version: 6.11.5
+        version: '6.13'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '6.11'
-    - - ">="
-      - !ruby/object:Gem::Version
-        version: 6.11.5
+        version: '6.13'
 - !ruby/object:Gem::Dependency
   name: unicode-confusable
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.9'
+        version: '1.12'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.9'
+        version: '1.12'
 description: 'Toolkit for security research manipulating Unicode: confusables, homoglyphs,
   hexdump, code point, UTF-8, UTF-16, UTF-32, properties, regexp search, size, grapheme,
   surrogates, version, ICU, CLDR, UCD, BiDi, normalization'
@@ -97,22 +90,28 @@ extra_rdoc_files: []
 files:
 - LICENSE
 - bin/unisec
+- data/Blocks.txt
 - data/DerivedName.txt
 - lib/unisec.rb
 - lib/unisec/bidi.rb
+- lib/unisec/blocks.rb
 - lib/unisec/cli/bidi.rb
+- lib/unisec/cli/blocks.rb
 - lib/unisec/cli/cli.rb
 - lib/unisec/cli/confusables.rb
-- lib/unisec/cli/hexdump.rb
+- lib/unisec/cli/dump.rb
 - lib/unisec/cli/normalization.rb
+- lib/unisec/cli/planes.rb
 - lib/unisec/cli/properties.rb
 - lib/unisec/cli/rugrep.rb
 - lib/unisec/cli/size.rb
 - lib/unisec/cli/surrogates.rb
 - lib/unisec/cli/versions.rb
 - lib/unisec/confusables.rb
+- lib/unisec/decdump.rb
 - lib/unisec/hexdump.rb
 - lib/unisec/normalization.rb
+- lib/unisec/planes.rb
 - lib/unisec/properties.rb
 - lib/unisec/rugrep.rb
 - lib/unisec/size.rb
@@ -120,18 +119,17 @@ files:
 - lib/unisec/utils.rb
 - lib/unisec/version.rb
 - lib/unisec/versions.rb
-homepage: https://github.com/Acceis/unisec
+homepage: https://github.com/noraj/unisec
 licenses:
 - MIT
 metadata:
   yard.run: yard
-  bug_tracker_uri: https://github.com/Acceis/unisec/issues
-  changelog_uri: https://github.com/Acceis/unisec/releases
-  documentation_uri: https://acceis.github.io/unisec/
-  homepage_uri: https://github.com/Acceis/unisec
-  source_code_uri: https://github.com/Acceis/unisec/
+  bug_tracker_uri: https://github.com/noraj/unisec/issues
+  changelog_uri: https://github.com/noraj/unisec/releases
+  documentation_uri: https://noraj.github.io/unisec/
+  homepage_uri: https://github.com/noraj/unisec
+  source_code_uri: https://github.com/noraj/unisec/
   rubygems_mfa_required: 'true'
-post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -139,18 +137,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: 3.0.0
+      version: 3.2.0
   - - "<"
     - !ruby/object:Gem::Version
-      version: '4.0'
+      version: '5.0'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.5.3
-signing_key:
+rubygems_version: 4.0.3
 specification_version: 4
 summary: Unicode Security Toolkit
 test_files: []

data/lib/unisec/cli/hexdump.rb DELETED Viewed

@@ -1,47 +0,0 @@
-# frozen_string_literal: true
-require 'dry/cli'
-require 'unisec'
-module Unisec
-  module CLI
-    module Commands
-      # CLI command `unisec hexdumps` for the class {Unisec::Hexdump} from the lib.
-      #
-      # Example:
-      #
-      # ```plaintext
-      # $ unisec hexdump "ACCEIS"
-      # UTF-8: 41 43 43 45 49 53
-      # UTF-16BE: 0041 0043 0043 0045 0049 0053
-      # UTF-16LE: 4100 4300 4300 4500 4900 5300
-      # UTF-32BE: 00000041 00000043 00000043 00000045 00000049 00000053
-      # UTF-32LE: 41000000 43000000 43000000 45000000 49000000 53000000
-      #
-      # $unisec hexdump "ACCEIS" --enc utf16le
-      # 4100 4300 4300 4500 4900 5300
-      # ```
-      class Hexdump < Dry::CLI::Command
-        desc 'Hexdump in all Unicode encodings'
-        argument :input, required: true,
-                         desc: 'String input. Read from STDIN if equal to -.'
-        option :enc, default: nil, values: %w[utf8 utf16be utf16le utf32be utf32le],
-                     desc: 'Output only in the specified encoding.'
-        # Hexdump of all Unicode encodings.
-        # @param input [String] Input string to encode
-        def call(input: nil, **options)
-          input = $stdin.read.chomp if input == '-'
-          if options[:enc].nil?
-            puts Unisec::Hexdump.new(input).display
-          else
-            # using send() is safe here thanks to the value whitelist
-            puts Unisec::Hexdump.send(options[:enc], input)
-          end
-        end
-      end
-    end
-  end
-end