RubyGems - unisec - Versions diffs - 0.0.6 → 0.0.7 - Mend

unisec 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

data/lib/unisec/decdump.rb ADDED Viewed

@@ -0,0 +1,118 @@
+# frozen_string_literal: true
+require 'ctf_party'
+require 'paint'
+module Unisec
+  # Decimal dump (decdump) of all Unicode encodings.
+  class Decdump
+    # UTF-8 decdump
+    # @return [String] UTF-8 decdump
+    attr_reader :utf8
+    # UTF-16BE decdump
+    # @return [String] UTF-16BE decdump
+    attr_reader :utf16be
+    # UTF-16LE decdump
+    # @return [String] UTF-16LE decdump
+    attr_reader :utf16le
+    # UTF-32BE decdump
+    # @return [String] UTF-32BE decdump
+    attr_reader :utf32be
+    # UTF-32LE decdump
+    # @return [String] UTF-32LE decdump
+    attr_reader :utf32le
+    # Init the decdump.
+    # @param str [String] Input string to encode
+    # @example
+    #   ded = Unisec::Decdump.new('I 💕 Ruby 💎')
+    #   ded.utf8 # => "073 032 240 159 146 149 032 082 117 098 121 032 240 159 146 142"
+    #   ded.utf16be # => "|000 073| |000 032| |216 061| |220 149| |000 032| |000 082| |000 117| |000 098| |000 121| |000 032| |216 061| |220 142|"
+    #   ded.utf32be # => "|000 000 000 073| |000 000 000 032| |000 001 244 149| |000 000 000 032| |000 000 000 082| |000 000 000 117| |000 000 000 098| |000 000 000 121| |000 000 000 032| |000 001 244 142|"
+    def initialize(str)
+      @utf8 = Decdump.utf8(str)
+      @utf16be = Decdump.utf16be(str)
+      @utf16le = Decdump.utf16le(str)
+      @utf32be = Decdump.utf32be(str)
+      @utf32le = Decdump.utf32le(str)
+    end
+    # Encode to UTF-8 in decdump format (spaced at every code unit = every byte)
+    # @param str [String] Input string to encode
+    # @return [String] decdump (UTF-8 encoded)
+    # @example
+    #   Unisec::Decdump.utf8('🐋') # => "240 159 144 139"
+    def self.utf8(str)
+      str.encode('UTF-8').to_hex.scan(/.{2}/).map { |x| x.hex2dec(padding: 3) }.join(' ')
+    end
+    # Encode to UTF-16BE in decdump format (packed by code unit = every 2 bytes)
+    # @param str [String] Input string to encode
+    # @return [String] decdump (UTF-16BE encoded)
+    # @example
+    #   Unisec::Decdump.utf16be('🐋') # => "|216 061| |220 011|"
+    def self.utf16be(str)
+      dec_chuncks = str.encode('UTF-16BE').to_hex.scan(/.{2}/).map do |x|
+        x.hex2dec(padding: 3)
+      end
+      dec_chuncks.join(' ').scan(/\d+ \d+/).map { |x| "|#{x}|" }.join(' ')
+    end
+    # Encode to UTF-16LE in decdump format (packed by code unit = every 2 bytes)
+    # @param str [String] Input string to encode
+    # @return [String] decdump (UTF-16LE encoded)
+    # @example
+    #   Unisec::Decdump.utf16le('🐋') # => "|061 216| |011 220|"
+    def self.utf16le(str)
+      dec_chuncks = str.encode('UTF-16LE').to_hex.scan(/.{2}/).map do |x|
+        x.hex2dec(padding: 3)
+      end
+      dec_chuncks.join(' ').scan(/\d+ \d+/).map { |x| "|#{x}|" }.join(' ')
+    end
+    # Encode to UTF-32BE in decdump format (packed by code unit = every 4 bytes)
+    # @param str [String] Input string to encode
+    # @return [String] decdump (UTF-32BE encoded)
+    # @example
+    #   Unisec::Decdump.utf32be('🐋') # => "|000 001 244 011|"
+    def self.utf32be(str)
+      dec_chuncks = str.encode('UTF-32BE').to_hex.scan(/.{2}/).map do |x|
+        x.hex2dec(padding: 3)
+      end
+      dec_chuncks.join(' ').scan(/\d+ \d+ \d+ \d+/).map { |x| "|#{x}|" }.join(' ')
+    end
+    # Encode to UTF-32LE in decdump format (packed by code unit = every 4 bytes)
+    # @param str [String] Input string to encode
+    # @return [String] decdump (UTF-32LE encoded)
+    # @example
+    #   Unisec::Decdump.utf32le('🐋') # => "|011 244 001 000|"
+    def self.utf32le(str)
+      dec_chuncks = str.encode('UTF-32LE').to_hex.scan(/.{2}/).map do |x|
+        x.hex2dec(padding: 3)
+      end
+      dec_chuncks.join(' ').scan(/\d+ \d+ \d+ \d+/).map { |x| "|#{x}|" }.join(' ')
+    end
+    # Display a CLI-friendly output summurizing the decdump in all Unicode encodings
+    # @return [String] CLI-ready output
+    # @example
+    #   puts Unisec::Decdump.new('K').display # =>
+    #   # UTF-8: 226 132 170
+    #   # UTF-16BE: |033 042|
+    #   # UTF-16LE: |042 033|
+    #   # UTF-32BE: |000 000 033 042|
+    #   # UTF-32LE: |042 033 000 000|
+    def display
+      "UTF-8: #{@utf8}\n" \
+      "UTF-16BE: #{@utf16be}\n" \
+      "UTF-16LE: #{@utf16le}\n" \
+      "UTF-32BE: #{@utf32be}\n" \
+      "UTF-32LE: #{@utf32le}".gsub('|', Paint['|', :red])
+    end
+  end
+end

data/lib/unisec/hexdump.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 require 'ctf_party'
 module Unisec
-  # Hexdump of all Unicode encodings.
+  # Hexadecimal dump (hexdump) of all Unicode encodings.
   class Hexdump
     # UTF-8 hexdump
     # @return [String] UTF-8 hexdump

data/lib/unisec/normalization.rb CHANGED Viewed

@@ -1,6 +1,8 @@
 # frozen_string_literal: true
 require 'ctf_party'
+require 'paint'
+require 'unisec/utils'
 module Unisec
   # Normalization Forms
@@ -111,7 +113,7 @@ module Unisec
     def display
       colorize = lambda { |form_title, form_attr|
         "#{Paint[form_title.to_s, :underline,
-                 :bold]}: #{form_attr}\n  #{Paint[Unisec::Properties.chars2codepoints(form_attr), :red]}\n"
+                 :bold]}: #{form_attr}\n  #{Paint[Unisec::Utils::String.chars2codepoints(form_attr), :red]}\n"
       }
       colorize.call('Original', @original) +
         colorize.call('NFC', @nfc) +
@@ -125,7 +127,7 @@ module Unisec
     def display_replace
       colorize = lambda { |form_title, form_attr|
         "#{Paint[form_title.to_s, :underline,
-                 :bold]}: #{form_attr}\n  #{Paint[Unisec::Properties.chars2codepoints(form_attr), :red]}\n"
+                 :bold]}: #{form_attr}\n  #{Paint[Unisec::Utils::String.chars2codepoints(form_attr), :red]}\n"
       }
       payload = replace_bypass
       colorize.call('Original', @original) +

data/lib/unisec/planes.rb ADDED Viewed

@@ -0,0 +1,224 @@
+# frozen_string_literal: true
+require 'paint'
+require 'unisec/utils'
+module Unisec
+  # Operations about Unicode planes
+  class Planes # rubocop:disable Metrics/ClassLength
+    # Data about the planes
+    PLANES = [
+      { range: 0x0..0xffff, name: 'Basic Multilingual Plane' },
+      { range: 0x10000..0x1ffff, name: 'Supplementary Multilingual Plane' },
+      { range: 0x20000..0x2ffff, name: 'Supplementary Ideographic Plane' },
+      { range: 0x30000..0x3ffff, name: 'Tertiary Ideographic Plane' },
+      { range: 0x40000..0x4ffff, name: 'unassigned' },
+      { range: 0x50000..0x5ffff, name: 'unassigned' },
+      { range: 0x60000..0x6ffff, name: 'unassigned' },
+      { range: 0x70000..0x7ffff, name: 'unassigned' },
+      { range: 0x80000..0x8ffff, name: 'unassigned' },
+      { range: 0x90000..0x9ffff, name: 'unassigned' },
+      { range: 0xa0000..0xaffff, name: 'unassigned' },
+      { range: 0xb0000..0xbffff, name: 'unassigned' },
+      { range: 0xc0000..0xcffff, name: 'unassigned' },
+      { range: 0xd0000..0xdffff, name: 'unassigned' },
+      { range: 0xe0000..0xeffff, name: 'Supplementary Special-purpose Plane' },
+      { range: 0xf0000..0xfffff, name: 'supplementary Private Use Area planes' },
+      { range: 0x100000..0x10ffff, name: 'supplementary Private Use Area planes' }
+    ].freeze
+    # List Unicode planes name
+    # @param with_count [TrueClass|FalseClass] calculate block's range size & char count? (warning: very slow, very unoptimized, see {Unisec::Blocks.list})
+    # @return [Array<Hash>] blocks name, range and character and blocks count
+    #   as well as abbreviation
+    # @example
+    #   Unisec::Planes.list # =>
+    #   # [{range: 0..65535,
+    #   #   name: "Basic Multilingual Plane",
+    #   #   blocks:
+    #   #    [{range: 0..127, name: "Basic Latin", range_size: nil, char_count: nil},
+    #   #     {range: 128..255, name: "Latin-1 Supplement", range_size: nil, char_count: nil},
+    #   # […]
+    def self.list(with_count: false)
+      PLANES.zip(plane2blocks(PLANES, with_count: with_count)).map do |base, extra|
+        base.merge(blocks: extra)
+      end
+    end
+    # List details about target plane including the list of associated blocks
+    # @param plane_arg [String|Integer] name or number of the plane
+    # @param with_count [TrueClass|FalseClass] calculate block's range size & char count? (see {Unisec::Blocks.list})
+    # @return [Hash|Array<Hash>|nil] nil if no match, Hash of the plane if one match,
+    #   Array of planes' Hash if several matches
+    # @example
+    #   Unisec::Planes.plane(4) # =>
+    #   # {range: 196608..262143,
+    #   #  name: "unassigned",
+    #   #  blocks:
+    #   #   [{range: 196608..201551, name: "CJK Unified Ideographs Extension G", range_size: nil, char_count: nil},
+    #   #    {range: 201552..205743, name: "CJK Unified Ideographs Extension H", range_size: nil, char_count: nil},
+    #   #    {range: 205744..210047, name: "CJK Unified Ideographs Extension J", range_size: nil, char_count: nil}]}
+    #   Unisec::Planes.plane('Supplementary Ideographic Plane') # =>
+    #   # {range: 131072..196607,
+    #   #  name: "Supplementary Ideographic Plane",
+    #   #  blocks:
+    #   #   [{range: 131072..173791, name: "CJK Unified Ideographs Extension B", range_size: nil, char_count: nil},
+    #   #    {range: 173824..177983, name: "CJK Unified Ideographs Extension C", range_size: nil, char_count: nil},
+    #   #    {range: 177984..178207, name: "CJK Unified Ideographs Extension D", range_size: nil, char_count: nil},
+    #   #    {range: 178208..183983, name: "CJK Unified Ideographs Extension E", range_size: nil, char_count: nil},
+    #   #    {range: 183984..191471, name: "CJK Unified Ideographs Extension F", range_size: nil, char_count: nil},
+    #   #    {range: 191472..192095, name: "CJK Unified Ideographs Extension I", range_size: nil, char_count: nil},
+    #   #    {range: 194560..195103, name: "CJK Compatibility Ideographs Supplement", range_size: nil, char_count: nil}]}
+    #   Unisec::Planes.plane('unassigned') # =>
+    #   # [{range: 262144..327679, name: "unassigned", blocks: []},
+    #   #  {range: 327680..393215, name: "unassigned", blocks: []},
+    #   #  {range: 393216..458751, name: "unassigned", blocks: []},
+    #   #  {range: 458752..524287, name: "unassigned", blocks: []},
+    #   #  {range: 524288..589823, name: "unassigned", blocks: []},
+    #   #  {range: 589824..655359, name: "unassigned", blocks: []},
+    #   #  {range: 655360..720895, name: "unassigned", blocks: []},
+    #   #  {range: 720896..786431, name: "unassigned", blocks: []},
+    #   #  {range: 786432..851967, name: "unassigned", blocks: []},
+    #   #  {range: 851968..917503, name: "unassigned", blocks: []}]
+    def self.plane(plane_arg, with_count: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength
+      case plane_arg
+      when Integer # search by plane number
+        res = PLANES[plane_arg]
+      when String # search by plane name
+        res = PLANES.select { |plane| plane[:name].downcase == plane_arg.downcase }
+        return nil if res.empty?
+        res = res.first if res.size == 1 # Hash if one, Array of Hash if multiples
+      else
+        raise ArgumentError
+      end
+      case res
+      when nil
+        nil # handle invalide search term
+      # Enrich plane data with blocks
+      when Hash # When 1 plane
+        res[:blocks] = plane2blocks(res, with_count: with_count)
+        res
+      when Array # When multiple planes
+        res.zip(plane2blocks(res, with_count: with_count)).map do |base, extra|
+          base.merge(blocks: extra)
+        end
+      end
+    end
+    # Find the blocks included in a given plane
+    # @param plane [Hash|Array<Hash>] plane hash or array of plane hash
+    # @param with_count [TrueClass|FalseClass] calculate block's range size & char count? (see {Unisec::Blocks.list})
+    # @return [Array<Hash>] plane(s) enriched with blocks data
+    # @example
+    #   Unisec::Planes.plane2blocks({ range: 0x20000..0x2ffff, name: 'Supplementary Ideographic Plane' }) # =>
+    #   # [{range: 131072..173791, name: "CJK Unified Ideographs Extension B", range_size: nil, char_count: nil},
+    #   #  {range: 173824..177983, name: "CJK Unified Ideographs Extension C", range_size: nil, char_count: nil},
+    #   #  {range: 177984..178207, name: "CJK Unified Ideographs Extension D", range_size: nil, char_count: nil},
+    #   #  {range: 178208..183983, name: "CJK Unified Ideographs Extension E", range_size: nil, char_count: nil},
+    #   #  {range: 183984..191471, name: "CJK Unified Ideographs Extension F", range_size: nil, char_count: nil},
+    #   #  {range: 191472..192095, name: "CJK Unified Ideographs Extension I", range_size: nil, char_count: nil},
+    #   #  {range: 194560..195103, name: "CJK Compatibility Ideographs Supplement", range_size: nil, char_count: nil}]
+    def self.plane2blocks(plane, with_count: false)
+      blocks = []
+      case plane
+      when Hash
+        Unisec::Blocks.list(with_count: with_count).each do |block|
+          blocks << block if plane[:range].include_range?(block[:range])
+        end
+      when Array
+        plane.each do |pl|
+          blocks << plane2blocks(pl, with_count: with_count)
+        end
+      else
+        raise ArgumentError
+      end
+      blocks
+    end
+    # Abbreviate a plane name (based on uppercase letters)
+    # @param name [String] plane name (as in {PLANES} `:name`)
+    # @return [String] plane abbreviation
+    # @example
+    #   Unisec::Planes.abbr('Basic Multilingual Plane') # => "BMP"
+    #   Unisec::Planes.abbr('supplementary Private Use Area planes') # => "PUA"
+    def self.abbr(name)
+      name.scan(/\p{Upper}/).join
+    end
+    # Display a CLI-friendly output listing all planes
+    # @param with_blocks [TrueClass|FalseClass] display the blocks associated with each plane
+    # @param with_count [TrueClass|FalseClass] calculate block's range size & char count? (see {Unisec::Blocks.list})
+    # @return [nil]
+    # @example
+    #   Unisec::Planes.list_display(with_blocks: true, with_count: false)
+    #   # Range: U+0000 - U+FFFF      Name: Basic Multilingual Plane
+    #   #   Blocks:
+    #   #     Range: U+0000 - U+007F      Name: Basic Latin
+    #   #     Range: U+0080 - U+00FF      Name: Latin-1 Supplement
+    #   #     Range: U+0100 - U+017F      Name: Latin Extended-A
+    #   # […]
+    def self.list_display(with_blocks: false, with_count: false) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
+      planes = list(with_count: with_count)
+      display = ->(key, value, just) { print Paint[key, :red, :bold] + " #{value}".ljust(just) }
+      display_blk = ->(key, value, just) { print Paint[key, :magenta, :bold] + " #{value}".ljust(just) }
+      planes.each do |pla|
+        display.call('Range:', Utils::Range.range2codepoint_range(pla[:range]), 22)
+        display.call('Name:', pla[:name], 50)
+        if with_blocks
+          puts
+          display.call('  Blocks:', "\n", 0)
+          pla[:blocks].each do |block|
+            display_blk.call('    Range:', Utils::Range.range2codepoint_range(block[:range]), 22)
+            display_blk.call('Name:', block[:name], 50)
+            if with_count
+              display_blk.call('Range size:', block[:range_size], 8)
+              display_blk.call('Char count:', block[:char_count], 0)
+            end
+            puts
+          end
+        end
+        puts
+      end
+      nil
+    end
+    # Display a CLI-friendly output searchfing for a plane
+    # @param plane_arg [String|Integer] name or number of the plane
+    # @param with_blocks [TrueClass|FalseClass] display the blocks associated with each plane
+    # @param with_count [TrueClass|FalseClass] calculate block's range size & char count? (see {Unisec::Blocks.list})
+    # @return [nil]
+    # @example
+    #   Unisec::Planes.plane_display(3, with_blocks: true)
+    #   # Range: U+30000 - U+3FFFF    Name: Tertiary Ideographic Plane
+    #   #   Blocks:
+    #   #     Range: U+30000 - U+3134F    Name: CJK Unified Ideographs Extension G
+    #   #     Range: U+31350 - U+323AF    Name: CJK Unified Ideographs Extension H
+    #   #     Range: U+323B0 - U+3347F    Name: CJK Unified Ideographs Extension J
+    def self.plane_display(plane_arg, with_blocks: false, with_count: false) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
+      planes = plane(plane_arg, with_count: with_count)
+      planes = [planes] if planes.is_a?(Hash)
+      display = ->(key, value, just) { print Paint[key, :red, :bold] + " #{value}".ljust(just) }
+      display_blk = ->(key, value, just) { print Paint[key, :magenta, :bold] + " #{value}".ljust(just) }
+      planes.each do |pla|
+        display.call('Range:', Utils::Range.range2codepoint_range(pla[:range]), 22)
+        display.call('Name:', pla[:name], 50)
+        if with_blocks
+          puts
+          display.call('  Blocks:', "\n", 0)
+          pla[:blocks].each do |block|
+            display_blk.call('    Range:', Utils::Range.range2codepoint_range(block[:range]), 22)
+            display_blk.call('Name:', block[:name], 50)
+            if with_count
+              display_blk.call('Range size:', block[:range_size], 8)
+              display_blk.call('Char count:', block[:char_count], 0)
+            end
+            puts
+          end
+        end
+        puts
+      end
+      nil
+    end
+  end
+end

data/lib/unisec/properties.rb CHANGED Viewed

@@ -2,6 +2,7 @@
 require 'twitter_cldr'
 require 'paint'
+require 'unisec/utils'
 module Unisec
   # Manipulate Unicode properties
@@ -50,7 +51,7 @@ module Unisec
     def self.codepoints_display(prop)
       codepoints = Properties.codepoints(prop)
       codepoints.each do |cp|
-        puts "#{Properties.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
+        puts "#{Utils::Integer.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
       end
       nil
     end
@@ -77,7 +78,7 @@ module Unisec
         block: props.block.join,
         category: categories[1],
         subcategory: categories[0],
-        codepoint: Properties.char2codepoint(chr),
+        codepoint: Utils::String.char2codepoint(chr),
         name: cp.name,
         script: props.script.join,
         case: {
@@ -127,22 +128,22 @@ module Unisec
       display.call('Since (age):', "Version #{data[:age]}")
       puts
       x = data.dig(:case, :twitter, :uppercase)
-      display.call('Uppercase:', x + " (#{Properties.char2codepoint(x)})")
+      display.call('Uppercase:', x + " (#{Utils::String.char2codepoint(x)})")
       x = data.dig(:case, :twitter, :lowercase)
-      display.call('Lowercase:', x + " (#{Properties.char2codepoint(x)})")
+      display.call('Lowercase:', x + " (#{Utils::String.char2codepoint(x)})")
       x = data.dig(:case, :twitter, :titlecase)
-      display.call('Titlecase:', x + " (#{Properties.char2codepoint(x)})")
+      display.call('Titlecase:', x + " (#{Utils::String.char2codepoint(x)})")
       x = data.dig(:case, :twitter, :casefold)
-      display.call('Casefold:', x + " (#{Properties.char2codepoint(x)})")
+      display.call('Casefold:', x + " (#{Utils::String.char2codepoint(x)})")
       puts
       x = data.dig(:normalization, :twitter, :nfkd)
-      display.call('Normalization NFKD:', x + " (#{Properties.chars2codepoints(x)})")
+      display.call('Normalization NFKD:', x + " (#{Utils::String.chars2codepoints(x)})")
       x = data.dig(:normalization, :twitter, :nfkc)
-      display.call('Normalization NFKC:', x + " (#{Properties.chars2codepoints(x)})")
+      display.call('Normalization NFKC:', x + " (#{Utils::String.chars2codepoints(x)})")
       x = data.dig(:normalization, :twitter, :nfd)
-      display.call('Normalization NFD:', x + " (#{Properties.chars2codepoints(x)})")
+      display.call('Normalization NFD:', x + " (#{Utils::String.chars2codepoints(x)})")
       x = data.dig(:normalization, :twitter, :nfc)
-      display.call('Normalization NFC:', x + " (#{Properties.chars2codepoints(x)})")
+      display.call('Normalization NFC:', x + " (#{Utils::String.chars2codepoints(x)})")
       if extended
         puts
         data[:other_properties].each do |k, v|
@@ -151,37 +152,5 @@ module Unisec
       end
       nil
     end
-    # Display the code point in Unicode format for a given character (code point as string)
-    # @param chr [String] Unicode code point (as character / string)
-    # @return [String] code point in Unicode format
-    # @example
-    #   Unisec::Properties.char2codepoint('💎') # => "U+1F48E"
-    def self.char2codepoint(chr)
-      Properties.deccp2stdhexcp(chr.codepoints.first)
-    end
-    # Display the code points in Unicode format for the given characters (code points as string)
-    # @param chrs [String] Unicode code points (as characters / string)
-    # @return [String] code points in Unicode format
-    # @example
-    #   Unisec::Properties.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
-    #   Unisec::Properties.chars2codepoints("🧑‍🌾") # => "U+1F9D1 U+200D U+1F33E"
-    def self.chars2codepoints(chrs)
-      out = []
-      chrs.each_char do |chr|
-        out << Properties.char2codepoint(chr)
-      end
-      out.join(' ')
-    end
-    # Convert from decimal code point to standardized format hexadecimal code point
-    # @param int_cp [Integer] Code point in decimal format
-    # @return [String] code point in Unicode format
-    # @example
-    #   Unisec::Properties.intcp2stdhexcp(128640) # => "U+1F680"
-    def self.deccp2stdhexcp(int_cp)
-      "U+#{format('%.4x', int_cp).upcase}"
-    end
   end
 end

data/lib/unisec/rugrep.rb CHANGED Viewed

@@ -2,6 +2,7 @@
 require 'twitter_cldr'
 require 'paint'
+require 'unisec/utils'
 module Unisec
   # Ruby grep : Ruby regular expression search for Unicode code point names
@@ -64,7 +65,7 @@ module Unisec
     def self.regrep_display(regexp)
       codepoints = regrep(regexp)
       codepoints.each do |cp|
-        puts "#{Properties.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
+        puts "#{Utils::Integer.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
       end
       nil
     end
@@ -118,7 +119,7 @@ module Unisec
     def self.regrep_display_slow(regexp)
       codepoints = regrep_slow(regexp)
       codepoints.each do |cp|
-        puts "#{Properties.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
+        puts "#{Utils::Integer.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
       end
       nil
     end

data/lib/unisec/utils.rb CHANGED Viewed

@@ -20,6 +20,35 @@ class Integer
   end
 end
+class String
+  # Convert a string to a boolean
+  # @return [TrueClass|FalseClass]
+  # @example
+  #   "true".to_bool # => true
+  def to_bool
+    case to_s.chomp.downcase
+    when 'true', 'yes', 'y', '1'
+      true
+    when 'false', 'no', 'n', '0'
+      false
+    else
+      raise ArgumentError, "invalid value for Boolean: #{str.inspect}"
+    end
+  end
+end
+class Range
+  # Is a range included in another range? Are all values of range B included in range A?
+  # @param range [Range]
+  # @return [TrueClass|FalseClass]
+  # @example
+  #   (1..10).include_range?(2..11) # => false
+  #   (1..10).include_range?(2..4) # => true
+  def include_range?(range)
+    self.begin <= range.begin && self.end >= range.end
+  end
+end
 module Unisec
   # Generic stuff not Unicode-related that can be re-used.
   module Utils
@@ -108,6 +137,71 @@ module Unisec
       def self.grapheme_reverse(str)
         str.grapheme_clusters.reverse.join
       end
+      # Display the code point in Unicode format for a given character (code point as string)
+      # @param chr [String] Unicode code point (as character / string)
+      # @return [String] code point in Unicode format
+      # @example
+      #   Unisec::Properties.char2codepoint('💎') # => "U+1F48E"
+      def self.char2codepoint(chr)
+        Integer.deccp2stdhexcp(chr.codepoints.first)
+      end
+      # Display the code points in Unicode format for the given characters (code points as string)
+      # @param chrs [String] Unicode code points (as characters / string)
+      # @return [String] code points in Unicode format
+      # @example
+      #   Unisec::Properties.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
+      #   Unisec::Properties.chars2codepoints("🧑‍🌾") # => "U+1F9D1 U+200D U+1F33E"
+      def self.chars2codepoints(chrs)
+        out = []
+        chrs.each_char do |chr|
+          out << char2codepoint(chr)
+        end
+        out.join(' ')
+      end
+      # Convert a string of hex encoded Unicode code points range to actual
+      # integer Ruby range.
+      # @param range_str [String] Unicode code points range as in data/Blocks.txt
+      # @return [Range]
+      # @example
+      #   Unisec::Utils::String::to_range('0080..00FF') # => 128..255
+      def self.to_range(range_str)
+        ::Range.new(*range_str.split('..').map { |x| x.hex2dec.to_i })
+      end
+      # Convert from standardized format hexadecimal code point to decimal code point
+      # @param std_hex_cp [String] Code point in standardized hexadecimal format
+      # @return [Integer] Code point in decimal format
+      # @example
+      #   Unisec::Utils::String.stdhexcp2deccp('U+2026') # => 8230
+      def self.stdhexcp2deccp(std_hex_cp)
+        hex = "0x#{std_hex_cp[2..]}" # replace U+ prefix with 0x
+        convert_to_integer(hex)
+      end
+    end
+    module Integer
+      # Convert from decimal code point to standardized format hexadecimal code point
+      # @param int_cp [Integer] Code point in decimal format
+      # @return [String] code point in Unicode format
+      # @example
+      #   Unisec::Utils::Integer.deccp2stdhexcp(128640) # => "U+1F680"
+      def self.deccp2stdhexcp(int_cp)
+        "U+#{format('%.4x', int_cp).upcase}"
+      end
+    end
+    module Range
+      # Convert a (integer) range to a range of Unicode code points
+      # @param range [::Range]
+      # @return [String]
+      # @example
+      #   Unisec::Utils::Range.range2codepoint_range(1048576..1114111) # => "U+100000 - U+10FFFF"
+      def self.range2codepoint_range(range)
+        "#{Integer.deccp2stdhexcp(range.begin)} - #{Integer.deccp2stdhexcp(range.end)}"
+      end
     end
   end
 end

data/lib/unisec/version.rb CHANGED Viewed

@@ -2,5 +2,5 @@
 module Unisec
   # Version of unisec library and app
-  VERSION = '0.0.6'
+  VERSION = '0.0.7'
 end

data/lib/unisec/versions.rb CHANGED Viewed

@@ -55,6 +55,10 @@ module Unisec
         ucd_derivedname: {
           version: Unisec::Rugrep.ucd_derivedname_version,
           label: 'UCD (data/DerivedName.txt)'
+        },
+        ucd_blocks: {
+          version: Unisec::Blocks.ucd_blocks_version,
+          label: 'UCD (data/Blocks.txt)'
         }
       }
     end
@@ -81,6 +85,7 @@ module Unisec
         colorize.call(:twittercldr_cldr) +
         colorize.call(:ruby_unicode_emoji) +
         colorize.call(:ucd_derivedname) +
+        colorize.call(:ucd_blocks) +
         Paint["\nGems:\n", :underline] +
         colorize.call(:unisec) +
         colorize.call(:twittercldr) +

data/lib/unisec.rb CHANGED Viewed

@@ -3,9 +3,12 @@
 require 'unisec/version'
 require 'unisec/bidi'
+require 'unisec/blocks'
 require 'unisec/confusables'
+require 'unisec/decdump'
 require 'unisec/hexdump'
 require 'unisec/normalization'
+require 'unisec/planes'
 require 'unisec/properties'
 require 'unisec/rugrep'
 require 'unisec/size'