unisec 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/unisec/bidi.rb CHANGED
@@ -18,10 +18,10 @@ module Unisec
18
18
  # @param input [String] the target string
19
19
  # @param opts [Hash] optional parameters, see {Spoof.bidi_affix}
20
20
  # @return [String] the target string
21
- def set_target_display(input, **opts)
21
+ def set_target_display(input, **)
22
22
  @target_display = input
23
- @spoof_string = reverse(**opts)
24
- @spoof_payload = bidi_affix(**opts)
23
+ @spoof_string = reverse(**)
24
+ @spoof_payload = bidi_affix(**)
25
25
  @target_display
26
26
  end
27
27
 
@@ -66,8 +66,8 @@ module Unisec
66
66
  end
67
67
 
68
68
  # Call {Spoof.reverse} with `@target_display` as default input (target).
69
- def reverse(**opts)
70
- Spoof.reverse(@target_display, **opts)
69
+ def reverse(**)
70
+ Spoof.reverse(@target_display, **)
71
71
  end
72
72
 
73
73
  # Inject BiDi characters into the input string
@@ -121,8 +121,8 @@ module Unisec
121
121
  end
122
122
 
123
123
  # Call {Spoof.bidi_affix} with `@spoof_string` as input.
124
- def bidi_affix(**opts)
125
- Spoof.bidi_affix(@spoof_string, **opts)
124
+ def bidi_affix(**)
125
+ Spoof.bidi_affix(@spoof_string, **)
126
126
  end
127
127
 
128
128
  # Display a CLI-friendly output summurizing the spoof payload
@@ -157,7 +157,7 @@ module Unisec
157
157
  "Spoof payload (hex, escaped): #{@spoof_payload.to_hex(prefixall: '\\x')}\n" \
158
158
  "Spoof payload (base64): #{@spoof_payload.to_b64}\n" \
159
159
  "Spoof payload (urlencode): #{@spoof_payload.urlencode}\n" \
160
- "Spoof payload (code points): #{Unisec::Properties.chars2codepoints(@spoof_payload)}\n" \
160
+ "Spoof payload (code points): #{Unisec::Utils::String.chars2codepoints(@spoof_payload)}\n" \
161
161
  "\n\n\n" \
162
162
  '⚠: for the spoof payload to display correctly, be sure your VTE has RTL support, ' \
163
163
  "e.g. see https://wiki.archlinux.org/title/Bidirectional_text#Terminal.\n" \
@@ -0,0 +1,209 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'paint'
4
+ require 'unisec/utils'
5
+
6
+ module Unisec
7
+ # Operations about Unicode blocks
8
+ class Blocks # rubocop:disable Metrics/ClassLength
9
+ # UCD Blocks file location
10
+ # @see https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt
11
+ UCD_BLOCKS = File.join(__dir__, '../../data/Blocks.txt')
12
+
13
+ # List of invalid, private, reserved ranges. Unasigned, unallocated ranges are calculated dynamically in {list_unassigned}.
14
+ INVALID_RANGES = [
15
+ { range: 0xd800..0xdfff, name: 'Surrogates (invalid outside UTF-16)' },
16
+ { range: 0xe000..0xf8ff, name: 'Private Use Area (located in BMP)' },
17
+ { range: 0xf0000..0xfffff, name: 'Supplementary Private Use Area-A' },
18
+ { range: 0x100000..0x10ffff, name: 'Supplementary Private Use Area-B' }
19
+ ].freeze
20
+
21
+ # Returns the version of Unicode used in UCD local file (data/Blocks.txt)
22
+ # @return [String] Unicode version
23
+ # @example
24
+ # Unisec::Blocks.ucd_blocks_version # => "17.0.0"
25
+ def self.ucd_blocks_version
26
+ first_line = File.open(UCD_BLOCKS, &:readline)
27
+ first_line.match(/-(\d+\.\d+\.\d+)\.txt/).captures.first
28
+ end
29
+
30
+ # List Unicode blocks name
31
+ # ⚠️ Char count value may be wrong for CJK UNIFIED IDEOGRAPH because they are poorly described in DerivedName.txt.
32
+ # ⚠️ Populating char_count is slow and can take a few seconds.
33
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count?
34
+ # @return [Array<Hash>] List of blocks (block name, range and count)
35
+ # @example
36
+ # Unisec::Blocks.list # => [{range: 0..127, name: "Basic Latin", range_size: nil, char_count: nil}, … ]
37
+ # Unisec::Blocks.list(with_count: true) # => [{range: 0..127, name: "Basic Latin", range_size: 128, char_count: 95}, … ]
38
+ def self.list(with_count: false)
39
+ out = []
40
+ file = File.new(UCD_BLOCKS)
41
+ file.each_line(chomp: true) do |line|
42
+ # Skip if the line is empty or a comment
43
+ next if line.empty? || line[0] == '#'
44
+
45
+ # parse the line to extract code point range and the name
46
+ blk_range, blk_name = line.split(';')
47
+ blk_range = Unisec::Utils::String.to_range(blk_range)
48
+ blk_name.lstrip!
49
+ out << {
50
+ range: blk_range,
51
+ name: blk_name,
52
+ range_size: with_count ? blk_range.size : nil,
53
+ char_count: with_count ? count_char_in_block(blk_range) : nil
54
+ }
55
+ end
56
+ out
57
+ end
58
+
59
+ # Count the number of characters allocated in a block.
60
+ # ⚠️ Char count value may be wrong for CJK UNIFIED IDEOGRAPH because they are poorly described in DerivedName.txt.
61
+ # @param range [Range] Block code point range
62
+ # @return [Integer] number of code points in the block
63
+ # @example
64
+ # Unisec::Blocks::count_char_in_block(0xAC00..0xD7AF) # => 11172
65
+ def self.count_char_in_block(range) # rubocop:disable Metrics/AbcSize
66
+ counter = 0
67
+ file = File.new(Rugrep::UCD_DERIVEDNAME)
68
+ file.each_line(chomp: true) do |line|
69
+ # Skip if the line is empty or a comment
70
+ next if line.empty? || line[0] == '#'
71
+
72
+ # parse the line to extract code point as integer and the name
73
+ cp_int, _name = line.split(';')
74
+ if cp_int.include?('..') # handle ranges in DerivedName.txt
75
+ ucd_range = Utils::String.to_range(cp_int)
76
+ next unless range.include_range?(ucd_range)
77
+
78
+ counter += ucd_range.size
79
+ next
80
+ end
81
+ cp_int = cp_int.chomp.to_i(16)
82
+ next unless range.include?(cp_int)
83
+
84
+ counter += 1
85
+ break if cp_int == range.end
86
+ end
87
+ counter
88
+ end
89
+
90
+ # Find the block including the target character or code point, or matching the provided name.
91
+ # @param block_arg [Integer|String] Decimal code point or standardized hexadecimal codepoint or string character (only one, so be careful with emojis, composed or joint characters using several units) or directly look for the block name (case insensitive).
92
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count?
93
+ # @return [Hash|nil] Maching block (block name, range and count) or nil if not found
94
+ # @example
95
+ # Unisec::Blocks.block(65, with_count:true) # => {range: 0..127, name: "Basic Latin", range_size: 128, char_count: 95}
96
+ # Unisec::Blocks.block("U+1f4a9") # => {range: 127744..128511, name: "Miscellaneous Symbols and Pictographs", range_size: nil, char_count: nil}
97
+ # Unisec::Blocks.block("…", with_count:true) # => {range: 8192..8303, name: "General Punctuation", range_size: 112, char_count: 111}
98
+ # Unisec::Blocks.block("javanese") # => {range: 43392..43487, name: "Javanese", range_size: nil, char_count: nil}
99
+ def self.block(block_arg, with_count: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
100
+ file = File.new(UCD_BLOCKS)
101
+ found = false
102
+ file.each_line(chomp: true) do |line|
103
+ # Skip if the line is empty or a comment
104
+ next if line.empty? || line[0] == '#'
105
+
106
+ # parse the line to extract code point range and the name
107
+ blk_range, blk_name = line.split(';')
108
+ blk_range = Unisec::Utils::String.to_range(blk_range)
109
+ blk_name.lstrip!
110
+ case block_arg
111
+ when Integer # block_arg is an intgeger code point
112
+ found = true if blk_range.include?(block_arg)
113
+ when String # can be a char or block name or a string code point
114
+ if block_arg.size == 1 # is a char (1 code unit, not one grapheme)
115
+ found = true if blk_range.include?(Utils::String.convert_to_integer(block_arg))
116
+ elsif block_arg.start_with?('U+') # string code point
117
+ found = true if blk_range.include?(Utils::String.stdhexcp2deccp(block_arg))
118
+ elsif blk_name.downcase == block_arg.downcase # block name
119
+ found = true
120
+ end
121
+ end
122
+ if found
123
+ return {
124
+ range: blk_range,
125
+ name: blk_name,
126
+ range_size: with_count ? blk_range.size : nil,
127
+ char_count: with_count ? count_char_in_block(blk_range) : nil
128
+ }
129
+ end
130
+ end
131
+ nil # not found
132
+ end
133
+
134
+ # List unasigned, unallocated ranges.
135
+ # @return [Array<Range>] List of unassigned (code-point) ranges
136
+ # @example
137
+ # Unisec::Blocks.list_unassigned # => [12256..12271, 66048..66175, …]
138
+ def self.list_unassigned # rubocop:disable Metrics/AbcSize
139
+ base = (0x0000..0x10ffff)
140
+ assigned = Unisec::Blocks.list.map { |b| b[:range] }
141
+
142
+ unassigned = []
143
+ cursor = base.begin
144
+
145
+ assigned.each do |r|
146
+ unassigned << (cursor..(r.begin - 1)) if cursor < r.begin
147
+ cursor = r.end + 1
148
+ break if cursor > base.end
149
+ end
150
+
151
+ unassigned << (cursor..base.end) if cursor <= base.end
152
+
153
+ unassigned
154
+ end
155
+
156
+ # Display a CLI-friendly output listing all blocks
157
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count?
158
+ def self.list_display(with_count: false) # rubocop:disable Metrics/AbcSize
159
+ blocks = list(with_count: with_count)
160
+ display = ->(key, value, just) { print Paint[key, :red, :bold] + " #{value}".ljust(just) }
161
+ blocks.each do |blk|
162
+ display.call('Range:', Utils::Range.range2codepoint_range(blk[:range]), 22)
163
+ display.call('Name:', blk[:name], 50)
164
+ if with_count
165
+ display.call('Range size:', blk[:range_size], 8)
166
+ display.call('Char count:', blk[:char_count], 0)
167
+ end
168
+ puts
169
+ end
170
+ nil
171
+ end
172
+
173
+ # Display a CLI-friendly output detailing the searched block
174
+ # @param block_arg [Integer|String] Decimal code point or standardized hexadecimal codepoint or string character (only one, so be careful with emojis, composed or joint characters using several units) or directly look for the block name (case insensitive).
175
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count?
176
+ def self.block_display(block_arg, with_count: false)
177
+ blk = block(block_arg, with_count: with_count)
178
+ if blk.nil?
179
+ puts "no block found with #{block_arg}"
180
+ else
181
+ display = ->(key, value) { puts Paint[key, :red, :bold] + " #{value}" }
182
+ display.call('Range:', Utils::Range.range2codepoint_range(blk[:range]))
183
+ display.call('Name:', blk[:name])
184
+ if with_count
185
+ display.call('Range size:', blk[:range_size])
186
+ display.call('Char count:', blk[:char_count])
187
+ end
188
+ end
189
+ nil
190
+ end
191
+
192
+ # Display a CLI-friendly output listing all invalid and unsassigned ranges.
193
+ def self.list_invalid_display # rubocop:disable Metrics/AbcSize
194
+ display = ->(key, value, just) { print Paint[key, :red, :bold] + " #{value}".ljust(just) }
195
+ puts '(Assigned) invalid, private, reserved ranges:'
196
+ INVALID_RANGES.each do |blk|
197
+ display.call('Range:', Utils::Range.range2codepoint_range(blk[:range]), 22)
198
+ display.call('Name:', blk[:name], 50)
199
+ puts
200
+ end
201
+ puts "\nUnasigned, unallocated ranges:"
202
+ list_unassigned.each do |blk|
203
+ display.call('Range:', Utils::Range.range2codepoint_range(blk), 22)
204
+ puts
205
+ end
206
+ nil
207
+ end
208
+ end
209
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+ require 'unisec/utils'
6
+
7
+ module Unisec
8
+ module CLI
9
+ module Commands
10
+ # CLI sub-commands `unisec blocks xxx` for the class {Unisec::Blocks} from the lib.
11
+ module Blocks
12
+ # Command `unisec blocks list`
13
+ #
14
+ # Example:
15
+ #
16
+ # ```plaintext
17
+ # $ unisec blocks list
18
+ # Range: U+0000 - U+007F Name: Basic Latin
19
+ # Range: U+0080 - U+00FF Name: Latin-1 Supplement
20
+ # …
21
+ # ```
22
+ class List < Dry::CLI::Command
23
+ desc 'List all Unicode blocks'
24
+
25
+ option :with_count, default: 'false', values: %w[true false],
26
+ desc: "calculate block's range size & char count?"
27
+
28
+ # List Unicode blocks
29
+ def call(**options)
30
+ Unisec::Blocks.list_display(with_count: options[:with_count].to_bool)
31
+ end
32
+ end
33
+
34
+ # Command `unisec blocks search`
35
+ #
36
+ # Example:
37
+ #
38
+ # ```plaintext
39
+ # $ unisec blocks search 127745
40
+ # $ unisec blocks search U+1f4a9
41
+ # $ unisec blocks search …
42
+ # $ unisec blocks search javanese
43
+ # ```
44
+ class Search < Dry::CLI::Command
45
+ desc 'Search for a specific block'
46
+
47
+ argument :block_arg, required: true,
48
+ desc: 'Decimal code point | standardized hexadecimal codepoint | string character ' \
49
+ '(only one, so be careful with emojis, composed or joint characters using ' \
50
+ 'several units) | block name (case insensitive)'
51
+
52
+ option :with_count, default: 'false', values: %w[true false],
53
+ desc: "calculate block's range size & char count?"
54
+
55
+ # Display a block matching a decimal code point, standardized hexadecimal codepoint, string character or block name
56
+ # @param block_arg [Integer|String] Decimal code point or standardized hexadecimal codepoint or string character (only one, so be careful with emojis, composed or joint characters using several units) or directly look for the block name (case insensitive).
57
+ def call(block_arg: nil, **options)
58
+ block_arg = block_arg.to_i if /\A\d+\Z/.match?(block_arg) # cast decimal string to integer
59
+ Unisec::Blocks.block_display(block_arg, with_count: options[:with_count].to_bool)
60
+ end
61
+ end
62
+
63
+ # Command `unisec blocks invalid`
64
+ #
65
+ # Example:
66
+ #
67
+ # ```plaintext
68
+ # $ unisec blocks invalid
69
+ # (Assigned) invalid, private, reserved ranges:
70
+ # Range: U+D800 - U+DFFF Name: Surrogates (invalid outside UTF-16)
71
+ # Range: U+E000 - U+F8FF Name: Private Use Area (located in BMP)
72
+ # Range: U+F0000 - U+FFFFF Name: Supplementary Private Use Area-A
73
+ # Range: U+100000 - U+10FFFF Name: Supplementary Private Use Area-B
74
+ #
75
+ # Unasigned, unallocated ranges:
76
+ # Range: U+2FE0 - U+2FEF
77
+ # Range: U+10200 - U+1027F
78
+ # Range: U+103E0 - U+103FF
79
+ # Range: U+107C0 - U+107FF
80
+ # …
81
+ # ```
82
+ class Invalid < Dry::CLI::Command
83
+ desc 'List all invalid and unsassigned ranges'
84
+
85
+ # List all invalid and unsassigned ranges
86
+ def call(**)
87
+ Unisec::Blocks.list_invalid_display
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
@@ -1,9 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'unisec/cli/bidi'
4
+ require 'unisec/cli/blocks'
4
5
  require 'unisec/cli/confusables'
5
- require 'unisec/cli/hexdump'
6
+ require 'unisec/cli/dump'
6
7
  require 'unisec/cli/normalization'
8
+ require 'unisec/cli/planes'
7
9
  require 'unisec/cli/properties'
8
10
  require 'unisec/cli/rugrep'
9
11
  require 'unisec/cli/size'
@@ -20,12 +22,18 @@ module Unisec
20
22
  # Mapping between the (sub-)commands as seen by the user
21
23
  # on the command-line interface and the CLI modules in the lib
22
24
  register 'bidi spoof', Bidi::Spoof
25
+ register 'blocks invalid', Blocks::Invalid
26
+ register 'blocks list', Blocks::List
27
+ register 'blocks search', Blocks::Search
23
28
  register 'confusables list', Confusables::List
24
29
  register 'confusables randomize', Confusables::Randomize
30
+ register 'dump dec', Dump::Dec
31
+ register 'dump hex', Dump::Hex
25
32
  register 'grep', Grep
26
- register 'hexdump', Hexdump
27
33
  register 'normalize all', Normalize::All
28
34
  register 'normalize replace', Normalize::Replace
35
+ register 'planes list', Planes::List
36
+ register 'planes search', Planes::Search
29
37
  register 'properties char', Properties::Char
30
38
  register 'properties codepoints', Properties::Codepoints
31
39
  register 'properties list', Properties::List
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+
6
+ module Unisec
7
+ module CLI
8
+ module Commands
9
+ # CLI sub-commands `unisec dump xxx` for several dump classes like {Unisec::Hexdump} or {Unisec::Decxdump} from the lib.
10
+ module Dump
11
+ # CLI command `unisec dump hex` for the class {Unisec::Hexdump} from the lib.
12
+ #
13
+ # Example:
14
+ #
15
+ # ```plaintext
16
+ # $ unisec dump hex "ACCEIS"
17
+ # UTF-8: 41 43 43 45 49 53
18
+ # UTF-16BE: 0041 0043 0043 0045 0049 0053
19
+ # UTF-16LE: 4100 4300 4300 4500 4900 5300
20
+ # UTF-32BE: 00000041 00000043 00000043 00000045 00000049 00000053
21
+ # UTF-32LE: 41000000 43000000 43000000 45000000 49000000 53000000
22
+ #
23
+ # $ unisec dump hex "ACCEIS" --enc utf16le
24
+ # 4100 4300 4300 4500 4900 5300
25
+ # ```
26
+ class Hex < Dry::CLI::Command
27
+ desc 'Hexadecimal dump (hexdump) in all Unicode encodings'
28
+
29
+ argument :input, required: true,
30
+ desc: 'String input. Read from STDIN if equal to -.'
31
+
32
+ option :enc, default: nil, values: %w[utf8 utf16be utf16le utf32be utf32le],
33
+ desc: 'Output only in the specified encoding.'
34
+
35
+ # Hexdump of all Unicode encodings.
36
+ # @param input [String] Input string to encode
37
+ def call(input: nil, **options)
38
+ input = $stdin.read.chomp if input == '-'
39
+ if options[:enc].nil?
40
+ puts Unisec::Hexdump.new(input).display
41
+ else
42
+ # using send() is safe here thanks to the value whitelist
43
+ puts Unisec::Hexdump.send(options[:enc], input)
44
+ end
45
+ end
46
+ end
47
+
48
+ # CLI command `unisec dump dec` for the class {Unisec::Decdump} from the lib.
49
+ #
50
+ # Example:
51
+ #
52
+ # ```plaintext
53
+ # $ unisec dump dec "noraj"
54
+ # UTF-8: 110 111 114 097 106
55
+ # UTF-16BE: |000 110| |000 111| |000 114| |000 097| |000 106|
56
+ # UTF-16LE: |110 000| |111 000| |114 000| |097 000| |106 000|
57
+ # UTF-32BE: |000 000 000 110| |000 000 000 111| |000 000 000 114| |000 000 000 097| |000 000 000 106|
58
+ # UTF-32LE: |110 000 000 000| |111 000 000 000| |114 000 000 000| |097 000 000 000| |106 000 000 000|
59
+ #
60
+ # $ unisec dump dec "noraj" --enc utf16le
61
+ # |110 000| |111 000| |114 000| |097 000| |106 000|
62
+ # ```
63
+ class Dec < Dry::CLI::Command
64
+ desc 'Decimal dump (decdump) in all Unicode encodings'
65
+
66
+ argument :input, required: true,
67
+ desc: 'String input. Read from STDIN if equal to -.'
68
+
69
+ option :enc, default: nil, values: %w[utf8 utf16be utf16le utf32be utf32le],
70
+ desc: 'Output only in the specified encoding.'
71
+
72
+ # Decdump of all Unicode encodings.
73
+ # @param input [String] Input string to encode
74
+ def call(input: nil, **options)
75
+ input = $stdin.read.chomp if input == '-'
76
+ if options[:enc].nil?
77
+ puts Unisec::Decdump.new(input).display
78
+ else
79
+ # using send() is safe here thanks to the value whitelist
80
+ puts Unisec::Decdump.send(options[:enc], input)
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+ require 'unisec/utils'
6
+
7
+ module Unisec
8
+ module CLI
9
+ module Commands
10
+ # CLI sub-commands `unisec planes xxx` for the class {Unisec::Planes} from the lib.
11
+ module Planes
12
+ # Command `unisec planes list`
13
+ #
14
+ # Example:
15
+ #
16
+ # ```plaintext
17
+ # $ unisec planes list
18
+ # Range: U+0000 - U+FFFF Name: Basic Multilingual Plane
19
+ # Range: U+10000 - U+1FFFF Name: Supplementary Multilingual Plane
20
+ # Range: U+20000 - U+2FFFF Name: Supplementary Ideographic Plane
21
+ # Range: U+30000 - U+3FFFF Name: Tertiary Ideographic Plane
22
+ # …
23
+ # $ unisec planes list --with-blocks=true
24
+ # Range: U+0000 - U+FFFF Name: Basic Multilingual Plane
25
+ # Blocks:
26
+ # Range: U+0000 - U+007F Name: Basic Latin
27
+ # Range: U+0080 - U+00FF Name: Latin-1 Supplement
28
+ # Range: U+0100 - U+017F Name: Latin Extended-A
29
+ # Range: U+0180 - U+024F Name: Latin Extended-B
30
+ # ```
31
+ class List < Dry::CLI::Command
32
+ desc 'List all Unicode planes'
33
+
34
+ option :with_blocks, default: 'false', values: %w[true false],
35
+ desc: 'display the blocks associated with each plane?'
36
+ option :with_count, default: 'false', values: %w[true false],
37
+ desc: "calculate block's range size & char count?"
38
+
39
+ # List Unicode blocks
40
+ def call(**options)
41
+ Unisec::Planes.list_display(with_blocks: options[:with_blocks].to_bool,
42
+ with_count: options[:with_count].to_bool)
43
+ end
44
+ end
45
+
46
+ # Command `unisec planes search`
47
+ #
48
+ # Example:
49
+ #
50
+ # ```plaintext
51
+ # $ unisec planes search 3
52
+ # Range: U+30000 - U+3FFFF Name: Tertiary Ideographic Plane
53
+ # $ unisec planes search 2 --with-blocks=true
54
+ # Range: U+20000 - U+2FFFF Name: Supplementary Ideographic Plane
55
+ # Blocks:
56
+ # Range: U+20000 - U+2A6DF Name: CJK Unified Ideographs Extension B
57
+ # Range: U+2A700 - U+2B73F Name: CJK Unified Ideographs Extension C
58
+ # Range: U+2B740 - U+2B81F Name: CJK Unified Ideographs Extension D
59
+ # Range: U+2B820 - U+2CEAF Name: CJK Unified Ideographs Extension E
60
+ # Range: U+2CEB0 - U+2EBEF Name: CJK Unified Ideographs Extension F
61
+ # Range: U+2EBF0 - U+2EE5F Name: CJK Unified Ideographs Extension I
62
+ # Range: U+2F800 - U+2FA1F Name: CJK Compatibility Ideographs Supplement
63
+ # $ unisec planes search 'basic multilingual plane'
64
+ # Range: U+0000 - U+FFFF Name: Basic Multilingual Plane
65
+ # $ unisec planes search 'unassigned'
66
+ # Range: U+40000 - U+4FFFF Name: unassigned
67
+ # Range: U+50000 - U+5FFFF Name: unassigned
68
+ # Range: U+60000 - U+6FFFF Name: unassigned
69
+ # Range: U+70000 - U+7FFFF Name: unassigned
70
+ # Range: U+80000 - U+8FFFF Name: unassigned
71
+ # Range: U+90000 - U+9FFFF Name: unassigned
72
+ # Range: U+A0000 - U+AFFFF Name: unassigned
73
+ # Range: U+B0000 - U+BFFFF Name: unassigned
74
+ # Range: U+C0000 - U+CFFFF Name: unassigned
75
+ # Range: U+D0000 - U+DFFFF Name: unassigned
76
+ # ```
77
+ class Search < Dry::CLI::Command
78
+ desc 'Search for a specific plane'
79
+
80
+ argument :plane_arg, required: true,
81
+ desc: 'Name or number of the plane'
82
+
83
+ option :with_blocks, default: 'false', values: %w[true false],
84
+ desc: 'display the blocks associated with each plane?'
85
+ option :with_count, default: 'false', values: %w[true false],
86
+ desc: "calculate block's range size & char count?"
87
+
88
+ # Display a plane matching a plane name or plane number
89
+ # @param plane_arg [String|Integer] name or number of the plane
90
+ def call(plane_arg: nil, **options)
91
+ plane_arg = plane_arg.to_i if /\A\d+\Z/.match?(plane_arg) # cast decimal string to integer
92
+ Unisec::Planes.plane_display(plane_arg, with_blocks: options[:with_blocks].to_bool,
93
+ with_count: options[:with_count].to_bool)
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -21,7 +21,7 @@ module Unisec
21
21
  argument :regexp, required: true,
22
22
  desc: 'regular expression'
23
23
 
24
- # Hexdump of all Unicode encodings.
24
+ # Unicode code point names matching regexp.
25
25
  # @param regexp [Regexp] Regular expression without delimiters or modifiers.
26
26
  # Supports everything Ruby Regexp supports
27
27
  def call(regexp: nil, **)
@@ -2,6 +2,8 @@
2
2
 
3
3
  require 'unicode/confusable'
4
4
  require 'twitter_cldr'
5
+ require 'paint'
6
+ require 'unisec/utils'
5
7
 
6
8
  module Unisec
7
9
  # Operations about Unicode confusable characters (homoglyphs).
@@ -22,7 +24,7 @@ module Unisec
22
24
  # @param map [Boolean] allows partial mapping, includes confusable where the given chart is a part of
23
25
  def self.list_display(chr, map: true)
24
26
  Confusables.list(chr, map: map).each do |confu|
25
- puts "#{Properties.char2codepoint(confu).ljust(9)} #{confu.ljust(4)} " \
27
+ puts "#{Utils::String.char2codepoint(confu).ljust(9)} #{confu.ljust(4)} " \
26
28
  "#{TwitterCldr::Shared::CodePoint.get(confu.codepoints.first).name}"
27
29
  end
28
30
  nil