unisec 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+ require 'unisec/utils'
6
+
7
+ module Unisec
8
+ module CLI
9
+ module Commands
10
+ # CLI sub-commands `unisec planes xxx` for the class {Unisec::Planes} from the lib.
11
+ module Planes
12
+ # Command `unisec planes list`
13
+ #
14
+ # Example:
15
+ #
16
+ # ```plaintext
17
+ # $ unisec planes list
18
+ # Range: U+0000 - U+FFFF Name: Basic Multilingual Plane
19
+ # Range: U+10000 - U+1FFFF Name: Supplementary Multilingual Plane
20
+ # Range: U+20000 - U+2FFFF Name: Supplementary Ideographic Plane
21
+ # Range: U+30000 - U+3FFFF Name: Tertiary Ideographic Plane
22
+ # …
23
+ # $ unisec planes list --with-blocks=true
24
+ # Range: U+0000 - U+FFFF Name: Basic Multilingual Plane
25
+ # Blocks:
26
+ # Range: U+0000 - U+007F Name: Basic Latin
27
+ # Range: U+0080 - U+00FF Name: Latin-1 Supplement
28
+ # Range: U+0100 - U+017F Name: Latin Extended-A
29
+ # Range: U+0180 - U+024F Name: Latin Extended-B
30
+ # ```
31
+ class List < Dry::CLI::Command
32
+ desc 'List all Unicode planes'
33
+
34
+ option :with_blocks, default: 'false', values: %w[true false],
35
+ desc: 'display the blocks associated with each plane?'
36
+ option :with_count, default: 'false', values: %w[true false],
37
+ desc: "calculate block's range size & char count?"
38
+
39
+ # List Unicode blocks
40
+ def call(**options)
41
+ Unisec::Planes.list_display(with_blocks: options[:with_blocks].to_bool,
42
+ with_count: options[:with_count].to_bool)
43
+ end
44
+ end
45
+
46
+ # Command `unisec planes search`
47
+ #
48
+ # Example:
49
+ #
50
+ # ```plaintext
51
+ # $ unisec planes search 3
52
+ # Range: U+30000 - U+3FFFF Name: Tertiary Ideographic Plane
53
+ # $ unisec planes search 2 --with-blocks=true
54
+ # Range: U+20000 - U+2FFFF Name: Supplementary Ideographic Plane
55
+ # Blocks:
56
+ # Range: U+20000 - U+2A6DF Name: CJK Unified Ideographs Extension B
57
+ # Range: U+2A700 - U+2B73F Name: CJK Unified Ideographs Extension C
58
+ # Range: U+2B740 - U+2B81F Name: CJK Unified Ideographs Extension D
59
+ # Range: U+2B820 - U+2CEAF Name: CJK Unified Ideographs Extension E
60
+ # Range: U+2CEB0 - U+2EBEF Name: CJK Unified Ideographs Extension F
61
+ # Range: U+2EBF0 - U+2EE5F Name: CJK Unified Ideographs Extension I
62
+ # Range: U+2F800 - U+2FA1F Name: CJK Compatibility Ideographs Supplement
63
+ # $ unisec planes search 'basic multilingual plane'
64
+ # Range: U+0000 - U+FFFF Name: Basic Multilingual Plane
65
+ # $ unisec planes search 'unassigned'
66
+ # Range: U+40000 - U+4FFFF Name: unassigned
67
+ # Range: U+50000 - U+5FFFF Name: unassigned
68
+ # Range: U+60000 - U+6FFFF Name: unassigned
69
+ # Range: U+70000 - U+7FFFF Name: unassigned
70
+ # Range: U+80000 - U+8FFFF Name: unassigned
71
+ # Range: U+90000 - U+9FFFF Name: unassigned
72
+ # Range: U+A0000 - U+AFFFF Name: unassigned
73
+ # Range: U+B0000 - U+BFFFF Name: unassigned
74
+ # Range: U+C0000 - U+CFFFF Name: unassigned
75
+ # Range: U+D0000 - U+DFFFF Name: unassigned
76
+ # ```
77
+ class Search < Dry::CLI::Command
78
+ desc 'Search for a specific plane'
79
+
80
+ argument :plane_arg, required: true,
81
+ desc: 'Name or number of the plane'
82
+
83
+ option :with_blocks, default: 'false', values: %w[true false],
84
+ desc: 'display the blocks associated with each plane?'
85
+ option :with_count, default: 'false', values: %w[true false],
86
+ desc: "calculate block's range size & char count?"
87
+
88
+ # Display a plane matching a plane name or plane number
89
+ # @param plane_arg [String|Integer] name or number of the plane
90
+ def call(plane_arg: nil, **options)
91
+ plane_arg = plane_arg.to_i if /\A\d+\Z/.match?(plane_arg) # cast decimal string to integer
92
+ Unisec::Planes.plane_display(plane_arg, with_blocks: options[:with_blocks].to_bool,
93
+ with_count: options[:with_count].to_bool)
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -21,7 +21,7 @@ module Unisec
21
21
  argument :regexp, required: true,
22
22
  desc: 'regular expression'
23
23
 
24
- # Hexdump of all Unicode encodings.
24
+ # Unicode code point names matching regexp.
25
25
  # @param regexp [Regexp] Regular expression without delimiters or modifiers.
26
26
  # Supports everything Ruby Regexp supports
27
27
  def call(regexp: nil, **)
@@ -2,6 +2,8 @@
2
2
 
3
3
  require 'unicode/confusable'
4
4
  require 'twitter_cldr'
5
+ require 'paint'
6
+ require 'unisec/utils'
5
7
 
6
8
  module Unisec
7
9
  # Operations about Unicode confusable characters (homoglyphs).
@@ -22,7 +24,7 @@ module Unisec
22
24
  # @param map [Boolean] allows partial mapping, includes confusable where the given chart is a part of
23
25
  def self.list_display(chr, map: true)
24
26
  Confusables.list(chr, map: map).each do |confu|
25
- puts "#{Properties.char2codepoint(confu).ljust(9)} #{confu.ljust(4)} " \
27
+ puts "#{Utils::String.char2codepoint(confu).ljust(9)} #{confu.ljust(4)} " \
26
28
  "#{TwitterCldr::Shared::CodePoint.get(confu.codepoints.first).name}"
27
29
  end
28
30
  nil
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ctf_party'
4
+ require 'paint'
5
+
6
+ module Unisec
7
+ # Decimal dump (decdump) of all Unicode encodings.
8
+ class Decdump
9
+ # UTF-8 decdump
10
+ # @return [String] UTF-8 decdump
11
+ attr_reader :utf8
12
+
13
+ # UTF-16BE decdump
14
+ # @return [String] UTF-16BE decdump
15
+ attr_reader :utf16be
16
+
17
+ # UTF-16LE decdump
18
+ # @return [String] UTF-16LE decdump
19
+ attr_reader :utf16le
20
+
21
+ # UTF-32BE decdump
22
+ # @return [String] UTF-32BE decdump
23
+ attr_reader :utf32be
24
+
25
+ # UTF-32LE decdump
26
+ # @return [String] UTF-32LE decdump
27
+ attr_reader :utf32le
28
+
29
+ # Init the decdump.
30
+ # @param str [String] Input string to encode
31
+ # @example
32
+ # ded = Unisec::Decdump.new('I 💕 Ruby 💎')
33
+ # ded.utf8 # => "073 032 240 159 146 149 032 082 117 098 121 032 240 159 146 142"
34
+ # ded.utf16be # => "|000 073| |000 032| |216 061| |220 149| |000 032| |000 082| |000 117| |000 098| |000 121| |000 032| |216 061| |220 142|"
35
+ # ded.utf32be # => "|000 000 000 073| |000 000 000 032| |000 001 244 149| |000 000 000 032| |000 000 000 082| |000 000 000 117| |000 000 000 098| |000 000 000 121| |000 000 000 032| |000 001 244 142|"
36
+ def initialize(str)
37
+ @utf8 = Decdump.utf8(str)
38
+ @utf16be = Decdump.utf16be(str)
39
+ @utf16le = Decdump.utf16le(str)
40
+ @utf32be = Decdump.utf32be(str)
41
+ @utf32le = Decdump.utf32le(str)
42
+ end
43
+
44
+ # Encode to UTF-8 in decdump format (spaced at every code unit = every byte)
45
+ # @param str [String] Input string to encode
46
+ # @return [String] decdump (UTF-8 encoded)
47
+ # @example
48
+ # Unisec::Decdump.utf8('🐋') # => "240 159 144 139"
49
+ def self.utf8(str)
50
+ str.encode('UTF-8').to_hex.scan(/.{2}/).map { |x| x.hex2dec(padding: 3) }.join(' ')
51
+ end
52
+
53
+ # Encode to UTF-16BE in decdump format (packed by code unit = every 2 bytes)
54
+ # @param str [String] Input string to encode
55
+ # @return [String] decdump (UTF-16BE encoded)
56
+ # @example
57
+ # Unisec::Decdump.utf16be('🐋') # => "|216 061| |220 011|"
58
+ def self.utf16be(str)
59
+ dec_chuncks = str.encode('UTF-16BE').to_hex.scan(/.{2}/).map do |x|
60
+ x.hex2dec(padding: 3)
61
+ end
62
+ dec_chuncks.join(' ').scan(/\d+ \d+/).map { |x| "|#{x}|" }.join(' ')
63
+ end
64
+
65
+ # Encode to UTF-16LE in decdump format (packed by code unit = every 2 bytes)
66
+ # @param str [String] Input string to encode
67
+ # @return [String] decdump (UTF-16LE encoded)
68
+ # @example
69
+ # Unisec::Decdump.utf16le('🐋') # => "|061 216| |011 220|"
70
+ def self.utf16le(str)
71
+ dec_chuncks = str.encode('UTF-16LE').to_hex.scan(/.{2}/).map do |x|
72
+ x.hex2dec(padding: 3)
73
+ end
74
+ dec_chuncks.join(' ').scan(/\d+ \d+/).map { |x| "|#{x}|" }.join(' ')
75
+ end
76
+
77
+ # Encode to UTF-32BE in decdump format (packed by code unit = every 4 bytes)
78
+ # @param str [String] Input string to encode
79
+ # @return [String] decdump (UTF-32BE encoded)
80
+ # @example
81
+ # Unisec::Decdump.utf32be('🐋') # => "|000 001 244 011|"
82
+ def self.utf32be(str)
83
+ dec_chuncks = str.encode('UTF-32BE').to_hex.scan(/.{2}/).map do |x|
84
+ x.hex2dec(padding: 3)
85
+ end
86
+ dec_chuncks.join(' ').scan(/\d+ \d+ \d+ \d+/).map { |x| "|#{x}|" }.join(' ')
87
+ end
88
+
89
+ # Encode to UTF-32LE in decdump format (packed by code unit = every 4 bytes)
90
+ # @param str [String] Input string to encode
91
+ # @return [String] decdump (UTF-32LE encoded)
92
+ # @example
93
+ # Unisec::Decdump.utf32le('🐋') # => "|011 244 001 000|"
94
+ def self.utf32le(str)
95
+ dec_chuncks = str.encode('UTF-32LE').to_hex.scan(/.{2}/).map do |x|
96
+ x.hex2dec(padding: 3)
97
+ end
98
+ dec_chuncks.join(' ').scan(/\d+ \d+ \d+ \d+/).map { |x| "|#{x}|" }.join(' ')
99
+ end
100
+
101
+ # Display a CLI-friendly output summurizing the decdump in all Unicode encodings
102
+ # @return [String] CLI-ready output
103
+ # @example
104
+ # puts Unisec::Decdump.new('K').display # =>
105
+ # # UTF-8: 226 132 170
106
+ # # UTF-16BE: |033 042|
107
+ # # UTF-16LE: |042 033|
108
+ # # UTF-32BE: |000 000 033 042|
109
+ # # UTF-32LE: |042 033 000 000|
110
+ def display
111
+ "UTF-8: #{@utf8}\n" \
112
+ "UTF-16BE: #{@utf16be}\n" \
113
+ "UTF-16LE: #{@utf16le}\n" \
114
+ "UTF-32BE: #{@utf32be}\n" \
115
+ "UTF-32LE: #{@utf32le}".gsub('|', Paint['|', :red])
116
+ end
117
+ end
118
+ end
@@ -3,7 +3,7 @@
3
3
  require 'ctf_party'
4
4
 
5
5
  module Unisec
6
- # Hexdump of all Unicode encodings.
6
+ # Hexadecimal dump (hexdump) of all Unicode encodings.
7
7
  class Hexdump
8
8
  # UTF-8 hexdump
9
9
  # @return [String] UTF-8 hexdump
@@ -1,10 +1,22 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'ctf_party'
4
+ require 'paint'
5
+ require 'unisec/utils'
4
6
 
5
7
  module Unisec
6
8
  # Normalization Forms
7
9
  class Normalization
10
+ # HTML escapable characters mapped with their Unicode counterparts that will
11
+ # cast to themself after applying normalization forms using compatibility mode.
12
+ HTML_ESCAPE_BYPASS = {
13
+ '<' => ['﹤', '<'],
14
+ '>' => ['﹥', '>'],
15
+ '"' => ['"'],
16
+ "'" => ['''],
17
+ '&' => ['﹠', '&']
18
+ }.freeze
19
+
8
20
  # Original input
9
21
  # @return [String] untouched input
10
22
  attr_reader :original
@@ -64,6 +76,25 @@ module Unisec
64
76
  str.unicode_normalize(:nfkd)
65
77
  end
66
78
 
79
+ # Replace HTML escapable characters with their Unicode counterparts that will
80
+ # cast to themself after applying normalization forms using compatibility mode.
81
+ # Usefull for XSS, to bypass HTML escape.
82
+ # If several values are possible, one is picked randomly.
83
+ # @param str [String] the target string
84
+ # @return [String] escaped input
85
+ def self.replace_bypass(str)
86
+ str = str.dup
87
+ HTML_ESCAPE_BYPASS.each do |k, v|
88
+ str.gsub!(k, v.sample)
89
+ end
90
+ str
91
+ end
92
+
93
+ # Instance version of {Normalization.replace_bypass}.
94
+ def replace_bypass
95
+ Normalization.replace_bypass(@original)
96
+ end
97
+
67
98
  # Display a CLI-friendly output summurizing all normalization forms
68
99
  # @return [String] CLI-ready output
69
100
  # @example
@@ -82,7 +113,7 @@ module Unisec
82
113
  def display
83
114
  colorize = lambda { |form_title, form_attr|
84
115
  "#{Paint[form_title.to_s, :underline,
85
- :bold]}: #{form_attr}\n #{Paint[Unisec::Properties.chars2codepoints(form_attr), :red]}\n"
116
+ :bold]}: #{form_attr}\n #{Paint[Unisec::Utils::String.chars2codepoints(form_attr), :red]}\n"
86
117
  }
87
118
  colorize.call('Original', @original) +
88
119
  colorize.call('NFC', @nfc) +
@@ -90,5 +121,19 @@ module Unisec
90
121
  colorize.call('NFD', @nfd) +
91
122
  colorize.call('NFKD', @nfkd)
92
123
  end
124
+
125
+ # Display a CLI-friendly output of the XSS payload to bypass HTML escape and
126
+ # what it does once normalized in NFKC & NFKD.
127
+ def display_replace
128
+ colorize = lambda { |form_title, form_attr|
129
+ "#{Paint[form_title.to_s, :underline,
130
+ :bold]}: #{form_attr}\n #{Paint[Unisec::Utils::String.chars2codepoints(form_attr), :red]}\n"
131
+ }
132
+ payload = replace_bypass
133
+ colorize.call('Original', @original) +
134
+ colorize.call('Bypass payload', payload) +
135
+ colorize.call('NFKC', Normalization.nfkc(payload)) +
136
+ colorize.call('NFKD', Normalization.nfkd(payload))
137
+ end
93
138
  end
94
139
  end
@@ -0,0 +1,224 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'paint'
4
+ require 'unisec/utils'
5
+
6
+ module Unisec
7
+ # Operations about Unicode planes
8
+ class Planes # rubocop:disable Metrics/ClassLength
9
+ # Data about the planes
10
+ PLANES = [
11
+ { range: 0x0..0xffff, name: 'Basic Multilingual Plane' },
12
+ { range: 0x10000..0x1ffff, name: 'Supplementary Multilingual Plane' },
13
+ { range: 0x20000..0x2ffff, name: 'Supplementary Ideographic Plane' },
14
+ { range: 0x30000..0x3ffff, name: 'Tertiary Ideographic Plane' },
15
+ { range: 0x40000..0x4ffff, name: 'unassigned' },
16
+ { range: 0x50000..0x5ffff, name: 'unassigned' },
17
+ { range: 0x60000..0x6ffff, name: 'unassigned' },
18
+ { range: 0x70000..0x7ffff, name: 'unassigned' },
19
+ { range: 0x80000..0x8ffff, name: 'unassigned' },
20
+ { range: 0x90000..0x9ffff, name: 'unassigned' },
21
+ { range: 0xa0000..0xaffff, name: 'unassigned' },
22
+ { range: 0xb0000..0xbffff, name: 'unassigned' },
23
+ { range: 0xc0000..0xcffff, name: 'unassigned' },
24
+ { range: 0xd0000..0xdffff, name: 'unassigned' },
25
+ { range: 0xe0000..0xeffff, name: 'Supplement­ary Special-purpose Plane' },
26
+ { range: 0xf0000..0xfffff, name: 'supplement­ary Private Use Area planes' },
27
+ { range: 0x100000..0x10ffff, name: 'supplement­ary Private Use Area planes' }
28
+ ].freeze
29
+
30
+ # List Unicode planes name
31
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count? (warning: very slow, very unoptimized, see {Unisec::Blocks.list})
32
+ # @return [Array<Hash>] blocks name, range and character and blocks count
33
+ # as well as abbreviation
34
+ # @example
35
+ # Unisec::Planes.list # =>
36
+ # # [{range: 0..65535,
37
+ # # name: "Basic Multilingual Plane",
38
+ # # blocks:
39
+ # # [{range: 0..127, name: "Basic Latin", range_size: nil, char_count: nil},
40
+ # # {range: 128..255, name: "Latin-1 Supplement", range_size: nil, char_count: nil},
41
+ # # […]
42
+ def self.list(with_count: false)
43
+ PLANES.zip(plane2blocks(PLANES, with_count: with_count)).map do |base, extra|
44
+ base.merge(blocks: extra)
45
+ end
46
+ end
47
+
48
+ # List details about target plane including the list of associated blocks
49
+ # @param plane_arg [String|Integer] name or number of the plane
50
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count? (see {Unisec::Blocks.list})
51
+ # @return [Hash|Array<Hash>|nil] nil if no match, Hash of the plane if one match,
52
+ # Array of planes' Hash if several matches
53
+ # @example
54
+ # Unisec::Planes.plane(4) # =>
55
+ # # {range: 196608..262143,
56
+ # # name: "unassigned",
57
+ # # blocks:
58
+ # # [{range: 196608..201551, name: "CJK Unified Ideographs Extension G", range_size: nil, char_count: nil},
59
+ # # {range: 201552..205743, name: "CJK Unified Ideographs Extension H", range_size: nil, char_count: nil},
60
+ # # {range: 205744..210047, name: "CJK Unified Ideographs Extension J", range_size: nil, char_count: nil}]}
61
+ # Unisec::Planes.plane('Supplementary Ideographic Plane') # =>
62
+ # # {range: 131072..196607,
63
+ # # name: "Supplementary Ideographic Plane",
64
+ # # blocks:
65
+ # # [{range: 131072..173791, name: "CJK Unified Ideographs Extension B", range_size: nil, char_count: nil},
66
+ # # {range: 173824..177983, name: "CJK Unified Ideographs Extension C", range_size: nil, char_count: nil},
67
+ # # {range: 177984..178207, name: "CJK Unified Ideographs Extension D", range_size: nil, char_count: nil},
68
+ # # {range: 178208..183983, name: "CJK Unified Ideographs Extension E", range_size: nil, char_count: nil},
69
+ # # {range: 183984..191471, name: "CJK Unified Ideographs Extension F", range_size: nil, char_count: nil},
70
+ # # {range: 191472..192095, name: "CJK Unified Ideographs Extension I", range_size: nil, char_count: nil},
71
+ # # {range: 194560..195103, name: "CJK Compatibility Ideographs Supplement", range_size: nil, char_count: nil}]}
72
+ # Unisec::Planes.plane('unassigned') # =>
73
+ # # [{range: 262144..327679, name: "unassigned", blocks: []},
74
+ # # {range: 327680..393215, name: "unassigned", blocks: []},
75
+ # # {range: 393216..458751, name: "unassigned", blocks: []},
76
+ # # {range: 458752..524287, name: "unassigned", blocks: []},
77
+ # # {range: 524288..589823, name: "unassigned", blocks: []},
78
+ # # {range: 589824..655359, name: "unassigned", blocks: []},
79
+ # # {range: 655360..720895, name: "unassigned", blocks: []},
80
+ # # {range: 720896..786431, name: "unassigned", blocks: []},
81
+ # # {range: 786432..851967, name: "unassigned", blocks: []},
82
+ # # {range: 851968..917503, name: "unassigned", blocks: []}]
83
+ def self.plane(plane_arg, with_count: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength
84
+ case plane_arg
85
+ when Integer # search by plane number
86
+ res = PLANES[plane_arg]
87
+ when String # search by plane name
88
+ res = PLANES.select { |plane| plane[:name].downcase == plane_arg.downcase }
89
+ return nil if res.empty?
90
+
91
+ res = res.first if res.size == 1 # Hash if one, Array of Hash if multiples
92
+ else
93
+ raise ArgumentError
94
+ end
95
+ case res
96
+ when nil
97
+ nil # handle invalide search term
98
+ # Enrich plane data with blocks
99
+ when Hash # When 1 plane
100
+ res[:blocks] = plane2blocks(res, with_count: with_count)
101
+ res
102
+ when Array # When multiple planes
103
+ res.zip(plane2blocks(res, with_count: with_count)).map do |base, extra|
104
+ base.merge(blocks: extra)
105
+ end
106
+ end
107
+ end
108
+
109
+ # Find the blocks included in a given plane
110
+ # @param plane [Hash|Array<Hash>] plane hash or array of plane hash
111
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count? (see {Unisec::Blocks.list})
112
+ # @return [Array<Hash>] plane(s) enriched with blocks data
113
+ # @example
114
+ # Unisec::Planes.plane2blocks({ range: 0x20000..0x2ffff, name: 'Supplementary Ideographic Plane' }) # =>
115
+ # # [{range: 131072..173791, name: "CJK Unified Ideographs Extension B", range_size: nil, char_count: nil},
116
+ # # {range: 173824..177983, name: "CJK Unified Ideographs Extension C", range_size: nil, char_count: nil},
117
+ # # {range: 177984..178207, name: "CJK Unified Ideographs Extension D", range_size: nil, char_count: nil},
118
+ # # {range: 178208..183983, name: "CJK Unified Ideographs Extension E", range_size: nil, char_count: nil},
119
+ # # {range: 183984..191471, name: "CJK Unified Ideographs Extension F", range_size: nil, char_count: nil},
120
+ # # {range: 191472..192095, name: "CJK Unified Ideographs Extension I", range_size: nil, char_count: nil},
121
+ # # {range: 194560..195103, name: "CJK Compatibility Ideographs Supplement", range_size: nil, char_count: nil}]
122
+ def self.plane2blocks(plane, with_count: false)
123
+ blocks = []
124
+ case plane
125
+ when Hash
126
+ Unisec::Blocks.list(with_count: with_count).each do |block|
127
+ blocks << block if plane[:range].include_range?(block[:range])
128
+ end
129
+ when Array
130
+ plane.each do |pl|
131
+ blocks << plane2blocks(pl, with_count: with_count)
132
+ end
133
+ else
134
+ raise ArgumentError
135
+ end
136
+ blocks
137
+ end
138
+
139
+ # Abbreviate a plane name (based on uppercase letters)
140
+ # @param name [String] plane name (as in {PLANES} `:name`)
141
+ # @return [String] plane abbreviation
142
+ # @example
143
+ # Unisec::Planes.abbr('Basic Multilingual Plane') # => "BMP"
144
+ # Unisec::Planes.abbr('supplement­ary Private Use Area planes') # => "PUA"
145
+ def self.abbr(name)
146
+ name.scan(/\p{Upper}/).join
147
+ end
148
+
149
+ # Display a CLI-friendly output listing all planes
150
+ # @param with_blocks [TrueClass|FalseClass] display the blocks associated with each plane
151
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count? (see {Unisec::Blocks.list})
152
+ # @return [nil]
153
+ # @example
154
+ # Unisec::Planes.list_display(with_blocks: true, with_count: false)
155
+ # # Range: U+0000 - U+FFFF Name: Basic Multilingual Plane
156
+ # # Blocks:
157
+ # # Range: U+0000 - U+007F Name: Basic Latin
158
+ # # Range: U+0080 - U+00FF Name: Latin-1 Supplement
159
+ # # Range: U+0100 - U+017F Name: Latin Extended-A
160
+ # # […]
161
+ def self.list_display(with_blocks: false, with_count: false) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
162
+ planes = list(with_count: with_count)
163
+ display = ->(key, value, just) { print Paint[key, :red, :bold] + " #{value}".ljust(just) }
164
+ display_blk = ->(key, value, just) { print Paint[key, :magenta, :bold] + " #{value}".ljust(just) }
165
+ planes.each do |pla|
166
+ display.call('Range:', Utils::Range.range2codepoint_range(pla[:range]), 22)
167
+ display.call('Name:', pla[:name], 50)
168
+ if with_blocks
169
+ puts
170
+ display.call(' Blocks:', "\n", 0)
171
+ pla[:blocks].each do |block|
172
+ display_blk.call(' Range:', Utils::Range.range2codepoint_range(block[:range]), 22)
173
+ display_blk.call('Name:', block[:name], 50)
174
+ if with_count
175
+ display_blk.call('Range size:', block[:range_size], 8)
176
+ display_blk.call('Char count:', block[:char_count], 0)
177
+ end
178
+ puts
179
+ end
180
+ end
181
+ puts
182
+ end
183
+ nil
184
+ end
185
+
186
+ # Display a CLI-friendly output searchfing for a plane
187
+ # @param plane_arg [String|Integer] name or number of the plane
188
+ # @param with_blocks [TrueClass|FalseClass] display the blocks associated with each plane
189
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count? (see {Unisec::Blocks.list})
190
+ # @return [nil]
191
+ # @example
192
+ # Unisec::Planes.plane_display(3, with_blocks: true)
193
+ # # Range: U+30000 - U+3FFFF Name: Tertiary Ideographic Plane
194
+ # # Blocks:
195
+ # # Range: U+30000 - U+3134F Name: CJK Unified Ideographs Extension G
196
+ # # Range: U+31350 - U+323AF Name: CJK Unified Ideographs Extension H
197
+ # # Range: U+323B0 - U+3347F Name: CJK Unified Ideographs Extension J
198
+ def self.plane_display(plane_arg, with_blocks: false, with_count: false) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
199
+ planes = plane(plane_arg, with_count: with_count)
200
+ planes = [planes] if planes.is_a?(Hash)
201
+ display = ->(key, value, just) { print Paint[key, :red, :bold] + " #{value}".ljust(just) }
202
+ display_blk = ->(key, value, just) { print Paint[key, :magenta, :bold] + " #{value}".ljust(just) }
203
+ planes.each do |pla|
204
+ display.call('Range:', Utils::Range.range2codepoint_range(pla[:range]), 22)
205
+ display.call('Name:', pla[:name], 50)
206
+ if with_blocks
207
+ puts
208
+ display.call(' Blocks:', "\n", 0)
209
+ pla[:blocks].each do |block|
210
+ display_blk.call(' Range:', Utils::Range.range2codepoint_range(block[:range]), 22)
211
+ display_blk.call('Name:', block[:name], 50)
212
+ if with_count
213
+ display_blk.call('Range size:', block[:range_size], 8)
214
+ display_blk.call('Char count:', block[:char_count], 0)
215
+ end
216
+ puts
217
+ end
218
+ end
219
+ puts
220
+ end
221
+ nil
222
+ end
223
+ end
224
+ end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'twitter_cldr'
4
4
  require 'paint'
5
+ require 'unisec/utils'
5
6
 
6
7
  module Unisec
7
8
  # Manipulate Unicode properties
@@ -50,7 +51,7 @@ module Unisec
50
51
  def self.codepoints_display(prop)
51
52
  codepoints = Properties.codepoints(prop)
52
53
  codepoints.each do |cp|
53
- puts "#{Properties.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
54
+ puts "#{Utils::Integer.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
54
55
  end
55
56
  nil
56
57
  end
@@ -77,7 +78,7 @@ module Unisec
77
78
  block: props.block.join,
78
79
  category: categories[1],
79
80
  subcategory: categories[0],
80
- codepoint: Properties.char2codepoint(chr),
81
+ codepoint: Utils::String.char2codepoint(chr),
81
82
  name: cp.name,
82
83
  script: props.script.join,
83
84
  case: {
@@ -127,22 +128,22 @@ module Unisec
127
128
  display.call('Since (age):', "Version #{data[:age]}")
128
129
  puts
129
130
  x = data.dig(:case, :twitter, :uppercase)
130
- display.call('Uppercase:', x + " (#{Properties.char2codepoint(x)})")
131
+ display.call('Uppercase:', x + " (#{Utils::String.char2codepoint(x)})")
131
132
  x = data.dig(:case, :twitter, :lowercase)
132
- display.call('Lowercase:', x + " (#{Properties.char2codepoint(x)})")
133
+ display.call('Lowercase:', x + " (#{Utils::String.char2codepoint(x)})")
133
134
  x = data.dig(:case, :twitter, :titlecase)
134
- display.call('Titlecase:', x + " (#{Properties.char2codepoint(x)})")
135
+ display.call('Titlecase:', x + " (#{Utils::String.char2codepoint(x)})")
135
136
  x = data.dig(:case, :twitter, :casefold)
136
- display.call('Casefold:', x + " (#{Properties.char2codepoint(x)})")
137
+ display.call('Casefold:', x + " (#{Utils::String.char2codepoint(x)})")
137
138
  puts
138
139
  x = data.dig(:normalization, :twitter, :nfkd)
139
- display.call('Normalization NFKD:', x + " (#{Properties.chars2codepoints(x)})")
140
+ display.call('Normalization NFKD:', x + " (#{Utils::String.chars2codepoints(x)})")
140
141
  x = data.dig(:normalization, :twitter, :nfkc)
141
- display.call('Normalization NFKC:', x + " (#{Properties.chars2codepoints(x)})")
142
+ display.call('Normalization NFKC:', x + " (#{Utils::String.chars2codepoints(x)})")
142
143
  x = data.dig(:normalization, :twitter, :nfd)
143
- display.call('Normalization NFD:', x + " (#{Properties.chars2codepoints(x)})")
144
+ display.call('Normalization NFD:', x + " (#{Utils::String.chars2codepoints(x)})")
144
145
  x = data.dig(:normalization, :twitter, :nfc)
145
- display.call('Normalization NFC:', x + " (#{Properties.chars2codepoints(x)})")
146
+ display.call('Normalization NFC:', x + " (#{Utils::String.chars2codepoints(x)})")
146
147
  if extended
147
148
  puts
148
149
  data[:other_properties].each do |k, v|
@@ -151,37 +152,5 @@ module Unisec
151
152
  end
152
153
  nil
153
154
  end
154
-
155
- # Display the code point in Unicode format for a given character (code point as string)
156
- # @param chr [String] Unicode code point (as character / string)
157
- # @return [String] code point in Unicode format
158
- # @example
159
- # Unisec::Properties.char2codepoint('💎') # => "U+1F48E"
160
- def self.char2codepoint(chr)
161
- Properties.deccp2stdhexcp(chr.codepoints.first)
162
- end
163
-
164
- # Display the code points in Unicode format for the given characters (code points as string)
165
- # @param chrs [String] Unicode code points (as characters / string)
166
- # @return [String] code points in Unicode format
167
- # @example
168
- # Unisec::Properties.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
169
- # Unisec::Properties.chars2codepoints("🧑‍🌾") # => "U+1F9D1 U+200D U+1F33E"
170
- def self.chars2codepoints(chrs)
171
- out = []
172
- chrs.each_char do |chr|
173
- out << Properties.char2codepoint(chr)
174
- end
175
- out.join(' ')
176
- end
177
-
178
- # Convert from decimal code point to standardized format hexadecimal code point
179
- # @param int_cp [Integer] Code point in decimal format
180
- # @return [String] code point in Unicode format
181
- # @example
182
- # Unisec::Properties.intcp2stdhexcp(128640) # => "U+1F680"
183
- def self.deccp2stdhexcp(int_cp)
184
- "U+#{format('%.4x', int_cp).upcase}"
185
- end
186
155
  end
187
156
  end
data/lib/unisec/rugrep.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'twitter_cldr'
4
4
  require 'paint'
5
+ require 'unisec/utils'
5
6
 
6
7
  module Unisec
7
8
  # Ruby grep : Ruby regular expression search for Unicode code point names
@@ -64,7 +65,7 @@ module Unisec
64
65
  def self.regrep_display(regexp)
65
66
  codepoints = regrep(regexp)
66
67
  codepoints.each do |cp|
67
- puts "#{Properties.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
68
+ puts "#{Utils::Integer.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
68
69
  end
69
70
  nil
70
71
  end
@@ -118,7 +119,7 @@ module Unisec
118
119
  def self.regrep_display_slow(regexp)
119
120
  codepoints = regrep_slow(regexp)
120
121
  codepoints.each do |cp|
121
- puts "#{Properties.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
122
+ puts "#{Utils::Integer.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
122
123
  end
123
124
  nil
124
125
  end