unisec 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/unisec/bidi.rb CHANGED
@@ -18,10 +18,10 @@ module Unisec
18
18
  # @param input [String] the target string
19
19
  # @param opts [Hash] optional parameters, see {Spoof.bidi_affix}
20
20
  # @return [String] the target string
21
- def set_target_display(input, **opts)
21
+ def set_target_display(input, **)
22
22
  @target_display = input
23
- @spoof_string = reverse(**opts)
24
- @spoof_payload = bidi_affix(**opts)
23
+ @spoof_string = reverse(**)
24
+ @spoof_payload = bidi_affix(**)
25
25
  @target_display
26
26
  end
27
27
 
@@ -66,8 +66,8 @@ module Unisec
66
66
  end
67
67
 
68
68
  # Call {Spoof.reverse} with `@target_display` as default input (target).
69
- def reverse(**opts)
70
- Spoof.reverse(@target_display, **opts)
69
+ def reverse(**)
70
+ Spoof.reverse(@target_display, **)
71
71
  end
72
72
 
73
73
  # Inject BiDi characters into the input string
@@ -121,8 +121,8 @@ module Unisec
121
121
  end
122
122
 
123
123
  # Call {Spoof.bidi_affix} with `@spoof_string` as input.
124
- def bidi_affix(**opts)
125
- Spoof.bidi_affix(@spoof_string, **opts)
124
+ def bidi_affix(**)
125
+ Spoof.bidi_affix(@spoof_string, **)
126
126
  end
127
127
 
128
128
  # Display a CLI-friendly output summurizing the spoof payload
@@ -157,7 +157,7 @@ module Unisec
157
157
  "Spoof payload (hex, escaped): #{@spoof_payload.to_hex(prefixall: '\\x')}\n" \
158
158
  "Spoof payload (base64): #{@spoof_payload.to_b64}\n" \
159
159
  "Spoof payload (urlencode): #{@spoof_payload.urlencode}\n" \
160
- "Spoof payload (code points): #{Unisec::Properties.chars2codepoints(@spoof_payload)}\n" \
160
+ "Spoof payload (code points): #{Unisec::Utils::String.chars2codepoints(@spoof_payload)}\n" \
161
161
  "\n\n\n" \
162
162
  '⚠: for the spoof payload to display correctly, be sure your VTE has RTL support, ' \
163
163
  "e.g. see https://wiki.archlinux.org/title/Bidirectional_text#Terminal.\n" \
@@ -0,0 +1,209 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'paint'
4
+ require 'unisec/utils'
5
+
6
+ module Unisec
7
+ # Operations about Unicode blocks
8
+ class Blocks # rubocop:disable Metrics/ClassLength
9
+ # UCD Blocks file location
10
+ # @see https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt
11
+ UCD_BLOCKS = File.join(__dir__, '../../data/Blocks.txt')
12
+
13
+ # List of invalid, private, reserved ranges. Unasigned, unallocated ranges are calculated dynamically in {list_unassigned}.
14
+ INVALID_RANGES = [
15
+ { range: 0xd800..0xdfff, name: 'Surrogates (invalid outside UTF-16)' },
16
+ { range: 0xe000..0xf8ff, name: 'Private Use Area (located in BMP)' },
17
+ { range: 0xf0000..0xfffff, name: 'Supplementary Private Use Area-A' },
18
+ { range: 0x100000..0x10ffff, name: 'Supplementary Private Use Area-B' }
19
+ ].freeze
20
+
21
+ # Returns the version of Unicode used in UCD local file (data/Blocks.txt)
22
+ # @return [String] Unicode version
23
+ # @example
24
+ # Unisec::Blocks.ucd_blocks_version # => "17.0.0"
25
+ def self.ucd_blocks_version
26
+ first_line = File.open(UCD_BLOCKS, &:readline)
27
+ first_line.match(/-(\d+\.\d+\.\d+)\.txt/).captures.first
28
+ end
29
+
30
+ # List Unicode blocks name
31
+ # ⚠️ Char count value may be wrong for CJK UNIFIED IDEOGRAPH because they are poorly described in DerivedName.txt.
32
+ # ⚠️ Populating char_count is slow and can take a few seconds.
33
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count?
34
+ # @return [Array<Hash>] List of blocks (block name, range and count)
35
+ # @example
36
+ # Unisec::Blocks.list # => [{range: 0..127, name: "Basic Latin", range_size: nil, char_count: nil}, … ]
37
+ # Unisec::Blocks.list(with_count: true) # => [{range: 0..127, name: "Basic Latin", range_size: 128, char_count: 95}, … ]
38
+ def self.list(with_count: false)
39
+ out = []
40
+ file = File.new(UCD_BLOCKS)
41
+ file.each_line(chomp: true) do |line|
42
+ # Skip if the line is empty or a comment
43
+ next if line.empty? || line[0] == '#'
44
+
45
+ # parse the line to extract code point range and the name
46
+ blk_range, blk_name = line.split(';')
47
+ blk_range = Unisec::Utils::String.to_range(blk_range)
48
+ blk_name.lstrip!
49
+ out << {
50
+ range: blk_range,
51
+ name: blk_name,
52
+ range_size: with_count ? blk_range.size : nil,
53
+ char_count: with_count ? count_char_in_block(blk_range) : nil
54
+ }
55
+ end
56
+ out
57
+ end
58
+
59
+ # Count the number of characters allocated in a block.
60
+ # ⚠️ Char count value may be wrong for CJK UNIFIED IDEOGRAPH because they are poorly described in DerivedName.txt.
61
+ # @param range [Range] Block code point range
62
+ # @return [Integer] number of code points in the block
63
+ # @example
64
+ # Unisec::Blocks::count_char_in_block(0xAC00..0xD7AF) # => 11172
65
+ def self.count_char_in_block(range) # rubocop:disable Metrics/AbcSize
66
+ counter = 0
67
+ file = File.new(Rugrep::UCD_DERIVEDNAME)
68
+ file.each_line(chomp: true) do |line|
69
+ # Skip if the line is empty or a comment
70
+ next if line.empty? || line[0] == '#'
71
+
72
+ # parse the line to extract code point as integer and the name
73
+ cp_int, _name = line.split(';')
74
+ if cp_int.include?('..') # handle ranges in DerivedName.txt
75
+ ucd_range = Utils::String.to_range(cp_int)
76
+ next unless range.include_range?(ucd_range)
77
+
78
+ counter += ucd_range.size
79
+ next
80
+ end
81
+ cp_int = cp_int.chomp.to_i(16)
82
+ next unless range.include?(cp_int)
83
+
84
+ counter += 1
85
+ break if cp_int == range.end
86
+ end
87
+ counter
88
+ end
89
+
90
+ # Find the block including the target character or code point, or matching the provided name.
91
+ # @param block_arg [Integer|String] Decimal code point or standardized hexadecimal codepoint or string character (only one, so be careful with emojis, composed or joint characters using several units) or directly look for the block name (case insensitive).
92
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count?
93
+ # @return [Hash|nil] Maching block (block name, range and count) or nil if not found
94
+ # @example
95
+ # Unisec::Blocks.block(65, with_count:true) # => {range: 0..127, name: "Basic Latin", range_size: 128, char_count: 95}
96
+ # Unisec::Blocks.block("U+1f4a9") # => {range: 127744..128511, name: "Miscellaneous Symbols and Pictographs", range_size: nil, char_count: nil}
97
+ # Unisec::Blocks.block("…", with_count:true) # => {range: 8192..8303, name: "General Punctuation", range_size: 112, char_count: 111}
98
+ # Unisec::Blocks.block("javanese") # => {range: 43392..43487, name: "Javanese", range_size: nil, char_count: nil}
99
+ def self.block(block_arg, with_count: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
100
+ file = File.new(UCD_BLOCKS)
101
+ found = false
102
+ file.each_line(chomp: true) do |line|
103
+ # Skip if the line is empty or a comment
104
+ next if line.empty? || line[0] == '#'
105
+
106
+ # parse the line to extract code point range and the name
107
+ blk_range, blk_name = line.split(';')
108
+ blk_range = Unisec::Utils::String.to_range(blk_range)
109
+ blk_name.lstrip!
110
+ case block_arg
111
+ when Integer # block_arg is an intgeger code point
112
+ found = true if blk_range.include?(block_arg)
113
+ when String # can be a char or block name or a string code point
114
+ if block_arg.size == 1 # is a char (1 code unit, not one grapheme)
115
+ found = true if blk_range.include?(Utils::String.convert_to_integer(block_arg))
116
+ elsif block_arg.start_with?('U+') # string code point
117
+ found = true if blk_range.include?(Utils::String.stdhexcp2deccp(block_arg))
118
+ elsif blk_name.downcase == block_arg.downcase # block name
119
+ found = true
120
+ end
121
+ end
122
+ if found
123
+ return {
124
+ range: blk_range,
125
+ name: blk_name,
126
+ range_size: with_count ? blk_range.size : nil,
127
+ char_count: with_count ? count_char_in_block(blk_range) : nil
128
+ }
129
+ end
130
+ end
131
+ nil # not found
132
+ end
133
+
134
+ # List unasigned, unallocated ranges.
135
+ # @return [Array<Range>] List of unassigned (code-point) ranges
136
+ # @example
137
+ # Unisec::Blocks.list_unassigned # => [12256..12271, 66048..66175, …]
138
+ def self.list_unassigned # rubocop:disable Metrics/AbcSize
139
+ base = (0x0000..0x10ffff)
140
+ assigned = Unisec::Blocks.list.map { |b| b[:range] }
141
+
142
+ unassigned = []
143
+ cursor = base.begin
144
+
145
+ assigned.each do |r|
146
+ unassigned << (cursor..(r.begin - 1)) if cursor < r.begin
147
+ cursor = r.end + 1
148
+ break if cursor > base.end
149
+ end
150
+
151
+ unassigned << (cursor..base.end) if cursor <= base.end
152
+
153
+ unassigned
154
+ end
155
+
156
+ # Display a CLI-friendly output listing all blocks
157
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count?
158
+ def self.list_display(with_count: false) # rubocop:disable Metrics/AbcSize
159
+ blocks = list(with_count: with_count)
160
+ display = ->(key, value, just) { print Paint[key, :red, :bold] + " #{value}".ljust(just) }
161
+ blocks.each do |blk|
162
+ display.call('Range:', Utils::Range.range2codepoint_range(blk[:range]), 22)
163
+ display.call('Name:', blk[:name], 50)
164
+ if with_count
165
+ display.call('Range size:', blk[:range_size], 8)
166
+ display.call('Char count:', blk[:char_count], 0)
167
+ end
168
+ puts
169
+ end
170
+ nil
171
+ end
172
+
173
+ # Display a CLI-friendly output detailing the searched block
174
+ # @param block_arg [Integer|String] Decimal code point or standardized hexadecimal codepoint or string character (only one, so be careful with emojis, composed or joint characters using several units) or directly look for the block name (case insensitive).
175
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count?
176
+ def self.block_display(block_arg, with_count: false)
177
+ blk = block(block_arg, with_count: with_count)
178
+ if blk.nil?
179
+ puts "no block found with #{block_arg}"
180
+ else
181
+ display = ->(key, value) { puts Paint[key, :red, :bold] + " #{value}" }
182
+ display.call('Range:', Utils::Range.range2codepoint_range(blk[:range]))
183
+ display.call('Name:', blk[:name])
184
+ if with_count
185
+ display.call('Range size:', blk[:range_size])
186
+ display.call('Char count:', blk[:char_count])
187
+ end
188
+ end
189
+ nil
190
+ end
191
+
192
+ # Display a CLI-friendly output listing all invalid and unsassigned ranges.
193
+ def self.list_invalid_display # rubocop:disable Metrics/AbcSize
194
+ display = ->(key, value, just) { print Paint[key, :red, :bold] + " #{value}".ljust(just) }
195
+ puts '(Assigned) invalid, private, reserved ranges:'
196
+ INVALID_RANGES.each do |blk|
197
+ display.call('Range:', Utils::Range.range2codepoint_range(blk[:range]), 22)
198
+ display.call('Name:', blk[:name], 50)
199
+ puts
200
+ end
201
+ puts "\nUnasigned, unallocated ranges:"
202
+ list_unassigned.each do |blk|
203
+ display.call('Range:', Utils::Range.range2codepoint_range(blk), 22)
204
+ puts
205
+ end
206
+ nil
207
+ end
208
+ end
209
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+ require 'unisec/utils'
6
+
7
+ module Unisec
8
+ module CLI
9
+ module Commands
10
+ # CLI sub-commands `unisec blocks xxx` for the class {Unisec::Blocks} from the lib.
11
+ module Blocks
12
+ # Command `unisec blocks list`
13
+ #
14
+ # Example:
15
+ #
16
+ # ```plaintext
17
+ # $ unisec blocks list
18
+ # Range: U+0000 - U+007F Name: Basic Latin
19
+ # Range: U+0080 - U+00FF Name: Latin-1 Supplement
20
+ # …
21
+ # ```
22
+ class List < Dry::CLI::Command
23
+ desc 'List all Unicode blocks'
24
+
25
+ option :with_count, default: 'false', values: %w[true false],
26
+ desc: "calculate block's range size & char count?"
27
+
28
+ # List Unicode blocks
29
+ def call(**options)
30
+ Unisec::Blocks.list_display(with_count: options[:with_count].to_bool)
31
+ end
32
+ end
33
+
34
+ # Command `unisec blocks search`
35
+ #
36
+ # Example:
37
+ #
38
+ # ```plaintext
39
+ # $ unisec blocks search 127745
40
+ # $ unisec blocks search U+1f4a9
41
+ # $ unisec blocks search …
42
+ # $ unisec blocks search javanese
43
+ # ```
44
+ class Search < Dry::CLI::Command
45
+ desc 'Search for a specific block'
46
+
47
+ argument :block_arg, required: true,
48
+ desc: 'Decimal code point | standardized hexadecimal codepoint | string character ' \
49
+ '(only one, so be careful with emojis, composed or joint characters using ' \
50
+ 'several units) | block name (case insensitive)'
51
+
52
+ option :with_count, default: 'false', values: %w[true false],
53
+ desc: "calculate block's range size & char count?"
54
+
55
+ # Display a block matching a decimal code point, standardized hexadecimal codepoint, string character or block name
56
+ # @param block_arg [Integer|String] Decimal code point or standardized hexadecimal codepoint or string character (only one, so be careful with emojis, composed or joint characters using several units) or directly look for the block name (case insensitive).
57
+ def call(block_arg: nil, **options)
58
+ block_arg = block_arg.to_i if /\A\d+\Z/.match?(block_arg) # cast decimal string to integer
59
+ Unisec::Blocks.block_display(block_arg, with_count: options[:with_count].to_bool)
60
+ end
61
+ end
62
+
63
+ # Command `unisec blocks invalid`
64
+ #
65
+ # Example:
66
+ #
67
+ # ```plaintext
68
+ # $ unisec blocks invalid
69
+ # (Assigned) invalid, private, reserved ranges:
70
+ # Range: U+D800 - U+DFFF Name: Surrogates (invalid outside UTF-16)
71
+ # Range: U+E000 - U+F8FF Name: Private Use Area (located in BMP)
72
+ # Range: U+F0000 - U+FFFFF Name: Supplementary Private Use Area-A
73
+ # Range: U+100000 - U+10FFFF Name: Supplementary Private Use Area-B
74
+ #
75
+ # Unasigned, unallocated ranges:
76
+ # Range: U+2FE0 - U+2FEF
77
+ # Range: U+10200 - U+1027F
78
+ # Range: U+103E0 - U+103FF
79
+ # Range: U+107C0 - U+107FF
80
+ # …
81
+ # ```
82
+ class Invalid < Dry::CLI::Command
83
+ desc 'List all invalid and unsassigned ranges'
84
+
85
+ # List all invalid and unsassigned ranges
86
+ def call(**)
87
+ Unisec::Blocks.list_invalid_display
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
@@ -1,9 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'unisec/cli/bidi'
4
+ require 'unisec/cli/blocks'
4
5
  require 'unisec/cli/confusables'
5
- require 'unisec/cli/hexdump'
6
+ require 'unisec/cli/dump'
6
7
  require 'unisec/cli/normalization'
8
+ require 'unisec/cli/planes'
7
9
  require 'unisec/cli/properties'
8
10
  require 'unisec/cli/rugrep'
9
11
  require 'unisec/cli/size'
@@ -20,11 +22,18 @@ module Unisec
20
22
  # Mapping between the (sub-)commands as seen by the user
21
23
  # on the command-line interface and the CLI modules in the lib
22
24
  register 'bidi spoof', Bidi::Spoof
25
+ register 'blocks invalid', Blocks::Invalid
26
+ register 'blocks list', Blocks::List
27
+ register 'blocks search', Blocks::Search
23
28
  register 'confusables list', Confusables::List
24
29
  register 'confusables randomize', Confusables::Randomize
30
+ register 'dump dec', Dump::Dec
31
+ register 'dump hex', Dump::Hex
25
32
  register 'grep', Grep
26
- register 'hexdump', Hexdump
27
- register 'normalize', Normalize
33
+ register 'normalize all', Normalize::All
34
+ register 'normalize replace', Normalize::Replace
35
+ register 'planes list', Planes::List
36
+ register 'planes search', Planes::Search
28
37
  register 'properties char', Properties::Char
29
38
  register 'properties codepoints', Properties::Codepoints
30
39
  register 'properties list', Properties::List
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+
6
+ module Unisec
7
+ module CLI
8
+ module Commands
9
+ # CLI sub-commands `unisec dump xxx` for several dump classes like {Unisec::Hexdump} or {Unisec::Decxdump} from the lib.
10
+ module Dump
11
+ # CLI command `unisec dump hex` for the class {Unisec::Hexdump} from the lib.
12
+ #
13
+ # Example:
14
+ #
15
+ # ```plaintext
16
+ # $ unisec dump hex "ACCEIS"
17
+ # UTF-8: 41 43 43 45 49 53
18
+ # UTF-16BE: 0041 0043 0043 0045 0049 0053
19
+ # UTF-16LE: 4100 4300 4300 4500 4900 5300
20
+ # UTF-32BE: 00000041 00000043 00000043 00000045 00000049 00000053
21
+ # UTF-32LE: 41000000 43000000 43000000 45000000 49000000 53000000
22
+ #
23
+ # $ unisec dump hex "ACCEIS" --enc utf16le
24
+ # 4100 4300 4300 4500 4900 5300
25
+ # ```
26
+ class Hex < Dry::CLI::Command
27
+ desc 'Hexadecimal dump (hexdump) in all Unicode encodings'
28
+
29
+ argument :input, required: true,
30
+ desc: 'String input. Read from STDIN if equal to -.'
31
+
32
+ option :enc, default: nil, values: %w[utf8 utf16be utf16le utf32be utf32le],
33
+ desc: 'Output only in the specified encoding.'
34
+
35
+ # Hexdump of all Unicode encodings.
36
+ # @param input [String] Input string to encode
37
+ def call(input: nil, **options)
38
+ input = $stdin.read.chomp if input == '-'
39
+ if options[:enc].nil?
40
+ puts Unisec::Hexdump.new(input).display
41
+ else
42
+ # using send() is safe here thanks to the value whitelist
43
+ puts Unisec::Hexdump.send(options[:enc], input)
44
+ end
45
+ end
46
+ end
47
+
48
+ # CLI command `unisec dump dec` for the class {Unisec::Decdump} from the lib.
49
+ #
50
+ # Example:
51
+ #
52
+ # ```plaintext
53
+ # $ unisec dump dec "noraj"
54
+ # UTF-8: 110 111 114 097 106
55
+ # UTF-16BE: |000 110| |000 111| |000 114| |000 097| |000 106|
56
+ # UTF-16LE: |110 000| |111 000| |114 000| |097 000| |106 000|
57
+ # UTF-32BE: |000 000 000 110| |000 000 000 111| |000 000 000 114| |000 000 000 097| |000 000 000 106|
58
+ # UTF-32LE: |110 000 000 000| |111 000 000 000| |114 000 000 000| |097 000 000 000| |106 000 000 000|
59
+ #
60
+ # $ unisec dump dec "noraj" --enc utf16le
61
+ # |110 000| |111 000| |114 000| |097 000| |106 000|
62
+ # ```
63
+ class Dec < Dry::CLI::Command
64
+ desc 'Decimal dump (decdump) in all Unicode encodings'
65
+
66
+ argument :input, required: true,
67
+ desc: 'String input. Read from STDIN if equal to -.'
68
+
69
+ option :enc, default: nil, values: %w[utf8 utf16be utf16le utf32be utf32le],
70
+ desc: 'Output only in the specified encoding.'
71
+
72
+ # Decdump of all Unicode encodings.
73
+ # @param input [String] Input string to encode
74
+ def call(input: nil, **options)
75
+ input = $stdin.read.chomp if input == '-'
76
+ if options[:enc].nil?
77
+ puts Unisec::Decdump.new(input).display
78
+ else
79
+ # using send() is safe here thanks to the value whitelist
80
+ puts Unisec::Decdump.send(options[:enc], input)
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
@@ -8,45 +8,77 @@ module Unisec
8
8
  module CLI
9
9
  module Commands
10
10
  # CLI sub-commands `unisec normalize xxx` for the class {Unisec::Normalization} from the lib.
11
- #
12
- # Command `unisec normalize "example"`
13
- #
14
- # Example:
15
- #
16
- # ```plaintext
17
- # ➜ unisec normalize ẛ̣
18
- # Original: ẛ̣
19
- # U+1E9B U+0323
20
- # NFC: ẛ̣
21
- # U+1E9B U+0323
22
- # NFKC: ṩ
23
- # U+1E69
24
- # NFD: ẛ̣
25
- # U+017F U+0323 U+0307
26
- # NFKD: ṩ
27
- # U+0073 U+0323 U+0307
28
- #
29
- # ➜ unisec normalize ẛ̣ --form nfkd
30
- # ṩ
31
- # ```
32
- class Normalize < Dry::CLI::Command
33
- desc 'Normalize in all forms'
34
-
35
- argument :input, required: true,
36
- desc: 'String input. Read from STDIN if equal to -.'
37
-
38
- option :form, default: nil, values: %w[nfc nfkc nfd nfkd],
39
- desc: 'Output only in the specified normalization form.'
40
-
41
- # Normalize in all forms
42
- # @param input [String] Input string to normalize
43
- def call(input: nil, **options)
44
- input = $stdin.read.chomp if input == '-'
45
- if options[:form].nil?
46
- puts Unisec::Normalization.new(input).display
47
- else
48
- # using send() is safe here thanks to the value whitelist
49
- puts Unisec::Normalization.send(options[:form], input)
11
+ module Normalize
12
+ # Command `unisec normalize all "example"`
13
+ #
14
+ # Example:
15
+ #
16
+ # ```plaintext
17
+ # ➜ unisec normalize all ẛ̣
18
+ # Original: ẛ̣
19
+ # U+1E9B U+0323
20
+ # NFC: ẛ̣
21
+ # U+1E9B U+0323
22
+ # NFKC: ṩ
23
+ # U+1E69
24
+ # NFD: ẛ̣
25
+ # U+017F U+0323 U+0307
26
+ # NFKD: ṩ
27
+ # U+0073 U+0323 U+0307
28
+ #
29
+ # ➜ unisec normalize all ẛ̣ --form nfkd
30
+ # ṩ
31
+ # ```
32
+ class All < Dry::CLI::Command
33
+ desc 'Normalize in all forms'
34
+
35
+ argument :input, required: true,
36
+ desc: 'String input. Read from STDIN if equal to -.'
37
+
38
+ option :form, default: nil, values: %w[nfc nfkc nfd nfkd],
39
+ desc: 'Output only in the specified normalization form.'
40
+
41
+ # Normalize in all forms
42
+ # @param input [String] Input string to normalize
43
+ def call(input: nil, **options)
44
+ input = $stdin.read.chomp if input == '-'
45
+ if options[:form].nil?
46
+ puts Unisec::Normalization.new(input).display
47
+ else
48
+ # using send() is safe here thanks to the value whitelist
49
+ puts Unisec::Normalization.send(options[:form], input)
50
+ end
51
+ end
52
+ end
53
+
54
+ # Command `unisec normalize replace "example"`
55
+ #
56
+ # Example:
57
+ #
58
+ # ```plaintext
59
+ # ➜ unisec normalize replace "<svg onload=\"alert('XSS')\">"
60
+ # Original: <svg onload="alert('XSS')">
61
+ # U+003C U+0073 U+0076 U+0067 U+0020 U+006F U+006E U+006C U+006F U+0061 U+0064 U+003D U+0022 U+0061 U+006C U+0065 U+0072 U+0074 U+0028 U+0027 U+0058 U+0053 U+0053 U+0027 U+0029 U+0022 U+003E
62
+ # Bypass payload: ﹤svg onload="alert('XSS')"﹥
63
+ # U+FE64 U+0073 U+0076 U+0067 U+0020 U+006F U+006E U+006C U+006F U+0061 U+0064 U+003D U+FF02 U+0061 U+006C U+0065 U+0072 U+0074 U+0028 U+FF07 U+0058 U+0053 U+0053 U+FF07 U+0029 U+FF02 U+FE65
64
+ # NFKC: <svg onload="alert('XSS')">
65
+ # U+003C U+0073 U+0076 U+0067 U+0020 U+006F U+006E U+006C U+006F U+0061 U+0064 U+003D U+0022 U+0061 U+006C U+0065 U+0072 U+0074 U+0028 U+0027 U+0058 U+0053 U+0053 U+0027 U+0029 U+0022 U+003E
66
+ # NFKD: <svg onload="alert('XSS')">
67
+ # U+003C U+0073 U+0076 U+0067 U+0020 U+006F U+006E U+006C U+006F U+0061 U+0064 U+003D U+0022 U+0061 U+006C U+0065 U+0072 U+0074 U+0028 U+0027 U+0058 U+0053 U+0053 U+0027 U+0029 U+0022 U+003E
68
+ #
69
+ # ➜ echo -n "<svg onload=\"alert('XSS')\">" | unisec normalize replace -
70
+ # ```
71
+ class Replace < Dry::CLI::Command
72
+ desc 'Prepare a XSS payload for HTML escape bypass (HTML escape followed by NFKC / NFKD normalization)'
73
+
74
+ argument :input, required: true,
75
+ desc: 'String input. Read from STDIN if equal to -.'
76
+
77
+ # Prepare a XSS payload for HTML escape bypass (HTML escape followed by NFKC / NFKD normalization)
78
+ # @param input [String] Input string to normalize
79
+ def call(input: nil, **_options)
80
+ input = $stdin.read.chomp if input == '-'
81
+ puts Unisec::Normalization.new(input).display_replace
50
82
  end
51
83
  end
52
84
  end