unisec 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/unisec/bidi.rb +3 -3
- data/lib/unisec/blocks.rb +34 -0
- data/lib/unisec/cli/blocks.rb +28 -0
- data/lib/unisec/cli/cli.rb +7 -0
- data/lib/unisec/cli/dump.rb +35 -0
- data/lib/unisec/cli/normalization.rb +31 -0
- data/lib/unisec/cli/planes.rb +52 -0
- data/lib/unisec/hexdump.rb +51 -0
- data/lib/unisec/normalization.rb +72 -0
- data/lib/unisec/planes.rb +66 -0
- data/lib/unisec/properties.rb +3 -1
- data/lib/unisec/utils.rb +44 -2
- data/lib/unisec/version.rb +1 -1
- metadata +21 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e1c859ae327cc9381cc578456525a9fc0d6e68299f10bce6cd4f6439431a7fc0
|
|
4
|
+
data.tar.gz: 8c091df7ffc3e8f720ca9e5cee3d022e4cba4876530727150cc8277d61509f7c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7981fd667521cbccf1c3fdfda8610722fdf9892392568be8bacdd36719109982e07d906c9c4b5c3aff4c90d10252b93460698a3f404348d5dcbd8783124e77cb
|
|
7
|
+
data.tar.gz: 3b32516d01be17f5d462acade421755c5420f1f2f7d596f972c87d17425a64e06cee0fc7963d916113ea970f6a8882b47aa4e84113d31c992f8cc115c2ea5f59
|
data/lib/unisec/bidi.rb
CHANGED
|
@@ -18,10 +18,10 @@ module Unisec
|
|
|
18
18
|
# @param input [String] the target string
|
|
19
19
|
# @param opts [Hash] optional parameters, see {Spoof.bidi_affix}
|
|
20
20
|
# @return [String] the target string
|
|
21
|
-
def set_target_display(input, **)
|
|
21
|
+
def set_target_display(input, **opts)
|
|
22
22
|
@target_display = input
|
|
23
|
-
@spoof_string = reverse(**)
|
|
24
|
-
@spoof_payload = bidi_affix(**)
|
|
23
|
+
@spoof_string = reverse(**opts)
|
|
24
|
+
@spoof_payload = bidi_affix(**opts)
|
|
25
25
|
@target_display
|
|
26
26
|
end
|
|
27
27
|
|
data/lib/unisec/blocks.rb
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'paint'
|
|
4
|
+
require 'twitter_cldr'
|
|
4
5
|
require 'unisec/utils'
|
|
5
6
|
|
|
6
7
|
module Unisec
|
|
@@ -205,5 +206,38 @@ module Unisec
|
|
|
205
206
|
end
|
|
206
207
|
nil
|
|
207
208
|
end
|
|
209
|
+
|
|
210
|
+
# Returns the name of the Unicode block containing the given character.
|
|
211
|
+
# @param char [String] Single character (only one code unit, so be careful with
|
|
212
|
+
# emojis, composed or joint characters using several units, only the first
|
|
213
|
+
# code unit will be kept).
|
|
214
|
+
# @return [String] Block name or empty string if not found.
|
|
215
|
+
# @example
|
|
216
|
+
# Unisec::Blocks.reverse('…') # => "General Punctuation"
|
|
217
|
+
# Unisec::Blocks.reverse('A') # => "Basic Latin"
|
|
218
|
+
# Unisec::Blocks.reverse('💩') # => "Miscellaneous Symbols and Pictographs"
|
|
219
|
+
# Unisec::Blocks.reverse('🇫🇷') # => "Enclosed Alphanumeric Supplement" (only first unit is kept)
|
|
220
|
+
def self.reverse(char)
|
|
221
|
+
cp_num = TwitterCldr::Utils::CodePoints.from_string(char)
|
|
222
|
+
cp = TwitterCldr::Shared::CodePoint.get(cp_num.first)
|
|
223
|
+
props = cp.properties
|
|
224
|
+
props.block.join
|
|
225
|
+
rescue NoMethodError # in case of invalid character where CodePoint.get() => nil
|
|
226
|
+
''
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Display a CLI-friendly output showing the block name for a given character.
|
|
230
|
+
# @param char [String] Single character (only one code unit, so be careful with
|
|
231
|
+
# emojis, composed or joint characters using several units, only the first
|
|
232
|
+
# code unit will be kept).
|
|
233
|
+
def self.reverse_display(char)
|
|
234
|
+
blk_name = reverse(char)
|
|
235
|
+
if blk_name.empty?
|
|
236
|
+
puts "no block found for #{char.inspect}"
|
|
237
|
+
else
|
|
238
|
+
puts blk_name
|
|
239
|
+
end
|
|
240
|
+
nil
|
|
241
|
+
end
|
|
208
242
|
end
|
|
209
243
|
end
|
data/lib/unisec/cli/blocks.rb
CHANGED
|
@@ -60,6 +60,34 @@ module Unisec
|
|
|
60
60
|
end
|
|
61
61
|
end
|
|
62
62
|
|
|
63
|
+
# Command `unisec blocks reverse`
|
|
64
|
+
#
|
|
65
|
+
# Example:
|
|
66
|
+
#
|
|
67
|
+
# ```plaintext
|
|
68
|
+
# $ unisec blocks reverse '…'
|
|
69
|
+
# General Punctuation
|
|
70
|
+
# $ unisec blocks reverse 'A'
|
|
71
|
+
# Basic Latin
|
|
72
|
+
# $ unisec blocks reverse '💩'
|
|
73
|
+
# Miscellaneous Symbols and Pictographs
|
|
74
|
+
# $ unisec blocks reverse '🇫🇷'
|
|
75
|
+
# Enclosed Alphanumeric Supplement
|
|
76
|
+
# ```
|
|
77
|
+
class Reverse < Dry::CLI::Command
|
|
78
|
+
desc 'Search in which Unicode block a given character is'
|
|
79
|
+
|
|
80
|
+
argument :char, required: true,
|
|
81
|
+
desc: 'Single character (only one code unit, so be careful with emojis, composed or ' \
|
|
82
|
+
'joint characters using several units, only the first code unit will be kept)'
|
|
83
|
+
|
|
84
|
+
# Display the Unicode block name for a given character
|
|
85
|
+
# @param char [String] Single character (only one code unit, so be careful with emojis, composed or joint characters using several units, only the first code unit will be kept).
|
|
86
|
+
def call(char: nil, **)
|
|
87
|
+
Unisec::Blocks.reverse_display(char)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
63
91
|
# Command `unisec blocks invalid`
|
|
64
92
|
#
|
|
65
93
|
# Example:
|
data/lib/unisec/cli/cli.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'dry/cli/completion/command'
|
|
3
4
|
require 'unisec/cli/bidi'
|
|
4
5
|
require 'unisec/cli/blocks'
|
|
5
6
|
require 'unisec/cli/confusables'
|
|
@@ -24,17 +25,23 @@ module Unisec
|
|
|
24
25
|
register 'bidi spoof', Bidi::Spoof
|
|
25
26
|
register 'blocks invalid', Blocks::Invalid
|
|
26
27
|
register 'blocks list', Blocks::List
|
|
28
|
+
register 'blocks reverse', Blocks::Reverse
|
|
27
29
|
register 'blocks search', Blocks::Search
|
|
30
|
+
register 'completion', Dry::CLI::Completion::Command[self]
|
|
28
31
|
register 'confusables list', Confusables::List
|
|
29
32
|
register 'confusables randomize', Confusables::Randomize
|
|
30
33
|
register 'dump codepoints integer', Dump::Codepoints::Integer
|
|
31
34
|
register 'dump codepoints standard', Dump::Codepoints::Standard
|
|
32
35
|
register 'dump dec', Dump::Dec
|
|
33
36
|
register 'dump hex', Dump::Hex
|
|
37
|
+
register 'dump rev', Dump::Reverse
|
|
34
38
|
register 'grep', Grep
|
|
35
39
|
register 'normalize all', Normalize::All
|
|
36
40
|
register 'normalize replace', Normalize::Replace
|
|
41
|
+
register 'normalize reverse', Normalize::Reverse
|
|
42
|
+
register 'planes block', Planes::Block
|
|
37
43
|
register 'planes list', Planes::List
|
|
44
|
+
register 'planes reverse', Planes::Reverse
|
|
38
45
|
register 'planes search', Planes::Search
|
|
39
46
|
register 'properties char', Properties::Char
|
|
40
47
|
register 'properties codepoints', Properties::Codepoints
|
data/lib/unisec/cli/dump.rb
CHANGED
|
@@ -127,6 +127,41 @@ module Unisec
|
|
|
127
127
|
end
|
|
128
128
|
end
|
|
129
129
|
end
|
|
130
|
+
|
|
131
|
+
# CLI command `unisec dump rev` for the method {Unisec::Hexdump.reverse} from the lib.
|
|
132
|
+
#
|
|
133
|
+
# Example:
|
|
134
|
+
#
|
|
135
|
+
# ```plaintext
|
|
136
|
+
# $ unisec dump rev 0a0d --enc=utf16be
|
|
137
|
+
# (U+0A0D) - 0a0d
|
|
138
|
+
#
|
|
139
|
+
# $ unisec dump rev 808080 --enc=utf8 --exact=false
|
|
140
|
+
# (U+40000) - f1 80 80 80
|
|
141
|
+
# (U+80000) - f2 80 80 80
|
|
142
|
+
# (U+C0000) - f3 80 80 80
|
|
143
|
+
# (U+100000) - f4 80 80 80
|
|
144
|
+
# ```
|
|
145
|
+
class Reverse < Dry::CLI::Command
|
|
146
|
+
desc 'Reverse search in hexadecimal dump'
|
|
147
|
+
|
|
148
|
+
argument :hexbytes, required: true,
|
|
149
|
+
desc: 'Byte(s) in hexadecimal to search for. Read from STDIN if equal to -.'
|
|
150
|
+
|
|
151
|
+
option :enc, default: 'utf8', values: %w[utf8 utf16be utf16le utf32be utf32le],
|
|
152
|
+
desc: 'The target encoding in which to search.'
|
|
153
|
+
|
|
154
|
+
option :exact, default: 'true', values: %w[true false],
|
|
155
|
+
desc: 'true (default) = exact search, false = "sub-string" search / the value is included ' \
|
|
156
|
+
'in the encoded value'
|
|
157
|
+
|
|
158
|
+
# Search X byte(s) hexadecimal value in Y encoding, basically which characters will give this resulting encoded value
|
|
159
|
+
# @param hexbytes [String] The target encoding in which to search.
|
|
160
|
+
def call(hexbytes: nil, **options)
|
|
161
|
+
hexbytes = $stdin.read.chomp if hexbytes == '-'
|
|
162
|
+
puts Unisec::Hexdump.display_reverse(hexbytes, options[:enc], exact: options[:exact].to_bool)
|
|
163
|
+
end
|
|
164
|
+
end
|
|
130
165
|
end
|
|
131
166
|
end
|
|
132
167
|
end
|
|
@@ -81,6 +81,37 @@ module Unisec
|
|
|
81
81
|
puts Unisec::Normalization.new(input).display_replace
|
|
82
82
|
end
|
|
83
83
|
end
|
|
84
|
+
|
|
85
|
+
# Command `unisec normalize reverse '<'`
|
|
86
|
+
#
|
|
87
|
+
# Example:
|
|
88
|
+
#
|
|
89
|
+
# ```plaintext
|
|
90
|
+
# $ unisec normalize reverse '"' --forms 'nfkc,nfkd'
|
|
91
|
+
# Original:
|
|
92
|
+
# " (U+0022)
|
|
93
|
+
# NFKC
|
|
94
|
+
# " (U+FF02)
|
|
95
|
+
# NFKD
|
|
96
|
+
# " (U+FF02)
|
|
97
|
+
# ```
|
|
98
|
+
class Reverse < Dry::CLI::Command
|
|
99
|
+
desc 'List reverse normalization candidates (what characters will transform into target after normalization)'
|
|
100
|
+
|
|
101
|
+
argument :target, required: true,
|
|
102
|
+
desc: 'Normalization target. Read from STDIN if equal to -.'
|
|
103
|
+
|
|
104
|
+
option :forms, default: %i[nfc nfd nfkc nfkd],
|
|
105
|
+
desc: 'Output only in the specified normalization form(s). ' \
|
|
106
|
+
'Separate by comma if multiple values.'
|
|
107
|
+
|
|
108
|
+
# Reverse normalize
|
|
109
|
+
# @param target [String] Normalization target
|
|
110
|
+
def call(target: nil, **options)
|
|
111
|
+
target = $stdin.read.chomp if target == '-'
|
|
112
|
+
puts Unisec::Normalization.display_reverse_normalize(target, forms: options[:forms])
|
|
113
|
+
end
|
|
114
|
+
end
|
|
84
115
|
end
|
|
85
116
|
end
|
|
86
117
|
end
|
data/lib/unisec/cli/planes.rb
CHANGED
|
@@ -93,6 +93,58 @@ module Unisec
|
|
|
93
93
|
with_count: options[:with_count].to_bool)
|
|
94
94
|
end
|
|
95
95
|
end
|
|
96
|
+
|
|
97
|
+
# Command `unisec planes reverse`
|
|
98
|
+
#
|
|
99
|
+
# Example:
|
|
100
|
+
#
|
|
101
|
+
# ```plaintext
|
|
102
|
+
# $ unisec planes reverse '…'
|
|
103
|
+
# Basic Multilingual Plane
|
|
104
|
+
# $ unisec planes reverse '🨂'
|
|
105
|
+
# Supplementary Multilingual Plane
|
|
106
|
+
# $ unisec planes reverse '𠀀'
|
|
107
|
+
# Supplementary Ideographic Plane
|
|
108
|
+
# $ unisec planes reverse '🇫🇷'
|
|
109
|
+
# Supplementary Multilingual Plane
|
|
110
|
+
# ```
|
|
111
|
+
class Reverse < Dry::CLI::Command
|
|
112
|
+
desc 'Search in which Unicode plane a given character is'
|
|
113
|
+
|
|
114
|
+
argument :char, required: true,
|
|
115
|
+
desc: 'Single character (only one code unit, so be careful with emojis, composed or joint ' \
|
|
116
|
+
'characters using several units), only the first code unit will be kept).'
|
|
117
|
+
|
|
118
|
+
# Display the Unicode plane name for a given character
|
|
119
|
+
# @param char [String] Single character (only one code unit, so be careful with emojis,
|
|
120
|
+
# composed or joint characters using several units, only the first code unit will be kept).
|
|
121
|
+
def call(char: nil, **)
|
|
122
|
+
Unisec::Planes.reverse_display(char)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Command `unisec planes block`
|
|
127
|
+
#
|
|
128
|
+
# Example:
|
|
129
|
+
#
|
|
130
|
+
# ```plaintext
|
|
131
|
+
# $ unisec planes block 'Basic Latin'
|
|
132
|
+
# Basic Multilingual Plane
|
|
133
|
+
# $ unisec planes block 'Miscellaneous Symbols and Pictographs'
|
|
134
|
+
# Supplementary Multilingual Plane
|
|
135
|
+
# ```
|
|
136
|
+
class Block < Dry::CLI::Command
|
|
137
|
+
desc 'Search in which Unicode plane a block is'
|
|
138
|
+
|
|
139
|
+
argument :block_arg, required: true,
|
|
140
|
+
desc: 'Block name (case insensitive)'
|
|
141
|
+
|
|
142
|
+
# Display the Unicode plane name for a given block
|
|
143
|
+
# @param block_arg [String] Block name (case insensitive).
|
|
144
|
+
def call(block_arg: nil, **)
|
|
145
|
+
Unisec::Planes.block_display(block_arg)
|
|
146
|
+
end
|
|
147
|
+
end
|
|
96
148
|
end
|
|
97
149
|
end
|
|
98
150
|
end
|
data/lib/unisec/hexdump.rb
CHANGED
|
@@ -85,6 +85,33 @@ module Unisec
|
|
|
85
85
|
str.encode('UTF-32LE').to_hex.scan(/.{8}/).join(' ')
|
|
86
86
|
end
|
|
87
87
|
|
|
88
|
+
# Search X byte(s) hexadecimal value in Y encoding, basically which characters will give this resulting encoded value
|
|
89
|
+
# @param hexbytes [String] Byte(s) in hexadecimal to search for
|
|
90
|
+
# @param enc [String] The target encoding in which to search. It uses Unisec CLI argument values (utf8 utf16be utf16le utf32be utf32le).
|
|
91
|
+
# @param exact [TrueClass|FalseClass] true (default) = exact search, false = "sub-string" search / the value is included in the encoded value
|
|
92
|
+
# @return [Array<String>] all matching source characters
|
|
93
|
+
# @example
|
|
94
|
+
# Unisec::Hexdump.reverse('61', 'utf8') # => ["a"]
|
|
95
|
+
# Unisec::Hexdump.reverse('a6', 'utf8', exact: true) # => []
|
|
96
|
+
# Unisec::Hexdump.reverse('a6', 'utf8', exact: false) # => ["¦", "æ", "Ħ", "Ŧ", "Ʀ", "Ǧ", … ]
|
|
97
|
+
# Unisec::Hexdump.reverse('0d0a', 'utf16be', exact: true) # => ["\u0D0A"] (ഊ)
|
|
98
|
+
def self.reverse(hexbytes, enc, exact: true)
|
|
99
|
+
chars = []
|
|
100
|
+
(0x000000..0x10FFFF).each do |i|
|
|
101
|
+
char = i.chr(Unisec::Utils::Arguments.argenc2enc(enc, target: 'class'))
|
|
102
|
+
encoded_value = Unisec::Hexdump.send(enc, char).delete(' ')
|
|
103
|
+
if exact && encoded_value == hexbytes # exact match
|
|
104
|
+
chars << char
|
|
105
|
+
break
|
|
106
|
+
elsif !exact && encoded_value.include?(hexbytes) # includes value
|
|
107
|
+
chars << char
|
|
108
|
+
end
|
|
109
|
+
rescue RangeError # skip invalid code points for selected encoding
|
|
110
|
+
next
|
|
111
|
+
end
|
|
112
|
+
chars
|
|
113
|
+
end
|
|
114
|
+
|
|
88
115
|
# Display a CLI-friendly output summurizing the hexdump in all Unicode encodings
|
|
89
116
|
# @return [String] CLI-ready output
|
|
90
117
|
# @example
|
|
@@ -101,5 +128,29 @@ module Unisec
|
|
|
101
128
|
"UTF-32BE: #{@utf32be}\n" \
|
|
102
129
|
"UTF-32LE: #{@utf32le}"
|
|
103
130
|
end
|
|
131
|
+
|
|
132
|
+
# Display a CLI-friendly output summurizing the reverse hexdump search results
|
|
133
|
+
# @param hexbytes [String] see {Unisec::Hexdump.reverse}
|
|
134
|
+
# @param enc [String] see {Unisec::Hexdump.reverse}
|
|
135
|
+
# @param exact [TrueClass|FalseClass] see {Unisec::Hexdump.reverse}
|
|
136
|
+
# @return [String] CLI-ready output
|
|
137
|
+
# @example
|
|
138
|
+
# puts Unisec::Hexdump.display_reverse('0d0a', 'utf16be', exact: true)
|
|
139
|
+
# # ഊ (U+0D0A) - 0d0a
|
|
140
|
+
# puts Unisec::Hexdump.display_reverse('808080', 'utf8', exact: false)
|
|
141
|
+
# # (U+40000) - f1 80 80 80
|
|
142
|
+
# # (U+80000) - f2 80 80 80
|
|
143
|
+
# # (U+C0000) - f3 80 80 80
|
|
144
|
+
# # (U+100000) - f4 80 80 80
|
|
145
|
+
def self.display_reverse(hexbytes, enc, exact: true)
|
|
146
|
+
res = Unisec::Hexdump.reverse(hexbytes, enc, exact: exact)
|
|
147
|
+
out = ''
|
|
148
|
+
res.each do |char|
|
|
149
|
+
cp = Utils::String.char2codepoint(char)
|
|
150
|
+
hxd = Unisec::Hexdump.send(enc, char)
|
|
151
|
+
out += "#{char.encode('UTF-8')} (#{cp}) - #{hxd}\n"
|
|
152
|
+
end
|
|
153
|
+
out
|
|
154
|
+
end
|
|
104
155
|
end
|
|
105
156
|
end
|
data/lib/unisec/normalization.rb
CHANGED
|
@@ -95,6 +95,35 @@ module Unisec
|
|
|
95
95
|
Normalization.replace_bypass(@original)
|
|
96
96
|
end
|
|
97
97
|
|
|
98
|
+
# Find the list of symbols that will transform into a given symbol after normalization
|
|
99
|
+
# @param target [String]
|
|
100
|
+
# @param forms [String|Symbol|Array<Symbol>]
|
|
101
|
+
# @return [Hash] (results won't include input)
|
|
102
|
+
# @example
|
|
103
|
+
# Unisec::Normalization.reverse_normalize('<') # => {nfc: [], nfd: [], nfkc: ["﹤", "<"], nfkd: ["﹤", "<"]}
|
|
104
|
+
# Unisec::Normalization.reverse_normalize('.', forms: [:nfkc, :nfkd]) # => {nfkc: ["․", "﹒", "."], nfkd: ["․", "﹒", "."]}
|
|
105
|
+
# Unisec::Normalization.reverse_normalize('ffi', forms: :nfkc) # => {nfkc: ["ffi"]}
|
|
106
|
+
# Unisec::Normalization.reverse_normalize('≯', forms: 'nfd') # => {nfd: ["≯"]}
|
|
107
|
+
# Unisec::Normalization.reverse_normalize('ô', forms: 'nfc,nfd') # => {nfc: [], nfd: []}
|
|
108
|
+
def self.reverse_normalize(target, forms: %i[nfc nfd nfkc nfkd])
|
|
109
|
+
forms = Utils::Arguments.to_array_of_sym(forms)
|
|
110
|
+
result = {}
|
|
111
|
+
forms.each do |form|
|
|
112
|
+
result[form] = []
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
(0x000000..0x10FFFF).each do |codepoint|
|
|
116
|
+
char = codepoint.chr(Encoding::UTF_8)
|
|
117
|
+
forms.each do |form|
|
|
118
|
+
result[form] << char if (char.unicode_normalize(form) == target) && (char != target)
|
|
119
|
+
end
|
|
120
|
+
rescue RangeError # skip UTF-16 surrogates and potential other invalid code points
|
|
121
|
+
next
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
result
|
|
125
|
+
end
|
|
126
|
+
|
|
98
127
|
# Display a CLI-friendly output summurizing all normalization forms
|
|
99
128
|
# @return [String] CLI-ready output
|
|
100
129
|
# @example
|
|
@@ -124,6 +153,18 @@ module Unisec
|
|
|
124
153
|
|
|
125
154
|
# Display a CLI-friendly output of the XSS payload to bypass HTML escape and
|
|
126
155
|
# what it does once normalized in NFKC & NFKD.
|
|
156
|
+
# @return [String] CLI-ready output
|
|
157
|
+
# @example
|
|
158
|
+
# $ puts Unisec::Normalization.new('<script>').display_replace
|
|
159
|
+
# # =>
|
|
160
|
+
# # Original: <script>
|
|
161
|
+
# # U+003C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+003E
|
|
162
|
+
# # Bypass payload: <script>
|
|
163
|
+
# # U+FF1C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+FF1E
|
|
164
|
+
# # NFKC: <script>
|
|
165
|
+
# # U+003C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+003E
|
|
166
|
+
# # NFKD: <script>
|
|
167
|
+
# # U+003C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+003E
|
|
127
168
|
def display_replace
|
|
128
169
|
colorize = lambda { |form_title, form_attr|
|
|
129
170
|
"#{Paint[form_title.to_s, :underline,
|
|
@@ -135,5 +176,36 @@ module Unisec
|
|
|
135
176
|
colorize.call('NFKC', Normalization.nfkc(payload)) +
|
|
136
177
|
colorize.call('NFKD', Normalization.nfkd(payload))
|
|
137
178
|
end
|
|
179
|
+
|
|
180
|
+
# Display a CLI-friendly output reverse normalization results
|
|
181
|
+
# @param target [String] see {Unisec::Normalization.reverse_normalize}
|
|
182
|
+
# @param forms [String|Symbol|Array<Symbol>] see {Unisec::Normalization.reverse_normalize}
|
|
183
|
+
# @return [String] CLI-ready output
|
|
184
|
+
# @example
|
|
185
|
+
# puts Unisec::Normalization.display_reverse_normalize('<')
|
|
186
|
+
# # =>
|
|
187
|
+
# # Original:
|
|
188
|
+
# # < (U+003C)
|
|
189
|
+
# # NFKC
|
|
190
|
+
# # ﹤ (U+FE64)
|
|
191
|
+
# # < (U+FF1C)
|
|
192
|
+
# # NFKD
|
|
193
|
+
# # ﹤ (U+FE64)
|
|
194
|
+
# # < (U+FF1C)
|
|
195
|
+
def self.display_reverse_normalize(target, forms: %i[nfc nfd nfkc nfkd]) # rubocop:disable Metrics/AbcSize
|
|
196
|
+
colorize_form = ->(form_title) { Paint[form_title, :underline, :bold] }
|
|
197
|
+
colorize_char = ->(char) { " #{char} (#{Paint[Unisec::Utils::String.chars2codepoints(char), :red]})\n" }
|
|
198
|
+
out = "#{colorize_form.call('Original')}:\n#{colorize_char.call(target)}"
|
|
199
|
+
res = Unisec::Normalization.reverse_normalize(target, forms: forms) # => {nfc: [], nfd: [], nfkc: ["﹤", "<"], nfkd: ["﹤", "<"]}
|
|
200
|
+
res.each_key do |k|
|
|
201
|
+
next if res[k].empty?
|
|
202
|
+
|
|
203
|
+
out += "#{colorize_form.call(k.to_s.upcase)}\n"
|
|
204
|
+
res[k].each do |v|
|
|
205
|
+
out += colorize_char.call(v)
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
out
|
|
209
|
+
end
|
|
138
210
|
end
|
|
139
211
|
end
|
data/lib/unisec/planes.rb
CHANGED
|
@@ -220,5 +220,71 @@ module Unisec
|
|
|
220
220
|
end
|
|
221
221
|
nil
|
|
222
222
|
end
|
|
223
|
+
|
|
224
|
+
# Returns the name of the Unicode plane containing the given character.
|
|
225
|
+
# @param char [String] Single character (only one code unit, so be careful with
|
|
226
|
+
# emojis, composed or joint characters using several units, only the first
|
|
227
|
+
# code unit will be kept).
|
|
228
|
+
# @return [String] Plane name or empty string if not found.
|
|
229
|
+
# @example
|
|
230
|
+
# Unisec::Planes.reverse('…') # => "Basic Multilingual Plane"
|
|
231
|
+
# Unisec::Planes.reverse('🨂') # => "Supplementary Multilingual Plane"
|
|
232
|
+
# Unisec::Planes.reverse('𠀀') # => "Supplementary Ideographic Plane"
|
|
233
|
+
# Unisec::Planes.reverse('🇫🇷') # => "Supplementary Multilingual Plane" (first unit kept)
|
|
234
|
+
def self.reverse(char)
|
|
235
|
+
return '' unless char.is_a?(String)
|
|
236
|
+
|
|
237
|
+
cp = Utils::String.convert_to_integer(char[0])
|
|
238
|
+
PLANES.each do |plane|
|
|
239
|
+
return plane[:name] if plane[:range].include?(cp)
|
|
240
|
+
end
|
|
241
|
+
'' # not found
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Display a CLI-friendly output showing the plane name for a given character.
|
|
245
|
+
# @param char [String] Single character (only one code unit, so be careful with
|
|
246
|
+
# emojis, composed or joint characters using several units, only the first
|
|
247
|
+
# code unit will be kept).
|
|
248
|
+
def self.reverse_display(char)
|
|
249
|
+
plane_name = reverse(char)
|
|
250
|
+
if plane_name.empty?
|
|
251
|
+
puts "no plane found for #{char.inspect}"
|
|
252
|
+
else
|
|
253
|
+
puts plane_name
|
|
254
|
+
end
|
|
255
|
+
nil
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# Returns the name of the Unicode plane containing the given block.
|
|
259
|
+
# @param block_arg [String] Block name (case insensitive).
|
|
260
|
+
# @return [String] Plane name or empty string if not found.
|
|
261
|
+
# @example
|
|
262
|
+
# Unisec::Planes.block('Basic Latin') # => "Basic Multilingual Plane"
|
|
263
|
+
# Unisec::Planes.block('Miscellaneous Symbols and Pictographs') # => "Supplementary Multilingual Plane"
|
|
264
|
+
def self.block(block_arg) # rubocop:disable Metrics/CyclomaticComplexity
|
|
265
|
+
# support only search by block name
|
|
266
|
+
return '' if block_arg.is_a?(Integer)
|
|
267
|
+
return '' if block_arg.is_a?(String) && (block_arg.size == 1 || block_arg.start_with?('U+'))
|
|
268
|
+
|
|
269
|
+
blk = Blocks.block(block_arg, with_count: false)
|
|
270
|
+
return '' unless blk # block name not found
|
|
271
|
+
|
|
272
|
+
PLANES.each do |plane|
|
|
273
|
+
return plane[:name] if plane[:range].cover?(blk[:range])
|
|
274
|
+
end
|
|
275
|
+
'' # not found
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# Display a CLI-friendly output showing the plane name for a given block.
|
|
279
|
+
# @param block_arg [String] Block name (case insensitive).
|
|
280
|
+
def self.block_display(block_arg)
|
|
281
|
+
plane_name = block(block_arg)
|
|
282
|
+
if plane_name.empty?
|
|
283
|
+
puts "no plane found for block #{block_arg.inspect}"
|
|
284
|
+
else
|
|
285
|
+
puts plane_name
|
|
286
|
+
end
|
|
287
|
+
nil
|
|
288
|
+
end
|
|
223
289
|
end
|
|
224
290
|
end
|
data/lib/unisec/properties.rb
CHANGED
|
@@ -75,9 +75,10 @@ module Unisec
|
|
|
75
75
|
end
|
|
76
76
|
{
|
|
77
77
|
age: props.age.join,
|
|
78
|
+
plane: Unisec::Planes.reverse(chr),
|
|
78
79
|
block: props.block.join,
|
|
79
80
|
category: categories[1],
|
|
80
|
-
subcategory: categories[0],
|
|
81
|
+
subcategory: "#{categories[0]} (#{cp.category})",
|
|
81
82
|
codepoint: Utils::String.char2codepoint(chr),
|
|
82
83
|
name: cp.name,
|
|
83
84
|
script: props.script.join,
|
|
@@ -121,6 +122,7 @@ module Unisec
|
|
|
121
122
|
display.call('Name:', data[:name])
|
|
122
123
|
display.call('Code Point:', data[:codepoint] + " (#{Utils::String.convert(chr, :integer)})")
|
|
123
124
|
puts
|
|
125
|
+
display.call('Plane', data[:plane])
|
|
124
126
|
display.call('Block:', data[:block])
|
|
125
127
|
display.call('Category:', data[:category])
|
|
126
128
|
display.call('Sub-Category:', data[:subcategory])
|
data/lib/unisec/utils.rb
CHANGED
|
@@ -207,7 +207,7 @@ module Unisec
|
|
|
207
207
|
# @example
|
|
208
208
|
# Unisec::Utils::String.chars2intcodepoints('I 💕 Ruby 💎') # => "73 32 128149 32 82 117 98 121 32 128142"
|
|
209
209
|
def self.chars2intcodepoints(chrs)
|
|
210
|
-
chrs.codepoints.
|
|
210
|
+
chrs.codepoints.join(' ')
|
|
211
211
|
end
|
|
212
212
|
|
|
213
213
|
# Convert a string of hex encoded Unicode code points range to actual
|
|
@@ -236,12 +236,54 @@ module Unisec
|
|
|
236
236
|
module Range
|
|
237
237
|
# Convert a (integer) range to a range of Unicode code points
|
|
238
238
|
# @param range [::Range]
|
|
239
|
-
# @return [String]
|
|
239
|
+
# @return [::String]
|
|
240
240
|
# @example
|
|
241
241
|
# Unisec::Utils::Range.range2codepoint_range(1048576..1114111) # => "U+100000 - U+10FFFF"
|
|
242
242
|
def self.range2codepoint_range(range)
|
|
243
243
|
"#{Integer.deccp2stdhexcp(range.begin)} - #{Integer.deccp2stdhexcp(range.end)}"
|
|
244
244
|
end
|
|
245
245
|
end
|
|
246
|
+
|
|
247
|
+
module Arguments
|
|
248
|
+
# Converts an argument that is a string, a string of arguments separated by comma, a symbol to an array of symbol.
|
|
249
|
+
# Useful for methods that are expected to work on array of symbols but can receive various format of imputs (e.g. from CLI).
|
|
250
|
+
# @param input [::String|Symbol] (anything else will be returned untransformed)
|
|
251
|
+
# @return [Array<Symbol>] (or anything else if input type is not respected)
|
|
252
|
+
# @example
|
|
253
|
+
# Unisec::Utils::Arguments.to_array_of_sym("arg") # => [:arg]
|
|
254
|
+
# Unisec::Utils::Arguments.to_array_of_sym("a,b,c") # => [:a, :b, :c]
|
|
255
|
+
# Unisec::Utils::Arguments.to_array_of_sym(:snake) # => [:snake]
|
|
256
|
+
# Unisec::Utils::Arguments.to_array_of_sym([:a, :b, :c]) # => [:a, :b, :c]
|
|
257
|
+
def self.to_array_of_sym(input)
|
|
258
|
+
case input
|
|
259
|
+
when ::String # a,b,c => [:a, :b, :c]
|
|
260
|
+
input.split(',').map(&:to_sym)
|
|
261
|
+
when ::Symbol # :a => [:a]
|
|
262
|
+
[input]
|
|
263
|
+
else
|
|
264
|
+
input
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
# Converts encoding name from CLI to encoding name in standard format or Ruby Class
|
|
269
|
+
# @param argenc [::String] Encoding name as used as argument in Unisec CLI (authorized values are: utf8 utf16be utf16le utf32be utf32le).
|
|
270
|
+
# @param target [::String] 'standard' for standard encoding name, 'class' for Ruby class naming
|
|
271
|
+
# @return [::String|Class]
|
|
272
|
+
# @example
|
|
273
|
+
# Unisec::Utils::Arguments.argenc2enc('utf8', target: 'standard') # => "UTF-8"
|
|
274
|
+
# Unisec::Utils::Arguments.argenc2enc('utf16be', target: 'class') # => #<Encoding:UTF-16BE (autoload)>
|
|
275
|
+
def self.argenc2enc(argenc, target: 'standard')
|
|
276
|
+
argument_encodings = %w[utf8 utf16be utf16le utf32be utf32le]
|
|
277
|
+
raise ArgumentError unless argument_encodings.include?(argenc)
|
|
278
|
+
|
|
279
|
+
if target == 'standard'
|
|
280
|
+
argenc.upcase.insert(3, '-')
|
|
281
|
+
elsif target == 'class'
|
|
282
|
+
Encoding.const_get(argenc.upcase.insert(3, '_')) # const_get safe thanks to input whitelist
|
|
283
|
+
else
|
|
284
|
+
raise ArgumentError
|
|
285
|
+
end
|
|
286
|
+
end
|
|
287
|
+
end
|
|
246
288
|
end
|
|
247
289
|
end
|
data/lib/unisec/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: unisec
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.9
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Alexandre ZANNI
|
|
@@ -29,14 +29,28 @@ dependencies:
|
|
|
29
29
|
requirements:
|
|
30
30
|
- - "~>"
|
|
31
31
|
- !ruby/object:Gem::Version
|
|
32
|
-
version: '1.
|
|
32
|
+
version: '1.4'
|
|
33
33
|
type: :runtime
|
|
34
34
|
prerelease: false
|
|
35
35
|
version_requirements: !ruby/object:Gem::Requirement
|
|
36
36
|
requirements:
|
|
37
37
|
- - "~>"
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
|
-
version: '1.
|
|
39
|
+
version: '1.4'
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: dry-cli-completion
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - "~>"
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: 2.0.0
|
|
47
|
+
type: :runtime
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - "~>"
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: 2.0.0
|
|
40
54
|
- !ruby/object:Gem::Dependency
|
|
41
55
|
name: paint
|
|
42
56
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -71,14 +85,14 @@ dependencies:
|
|
|
71
85
|
requirements:
|
|
72
86
|
- - "~>"
|
|
73
87
|
- !ruby/object:Gem::Version
|
|
74
|
-
version: '1.
|
|
88
|
+
version: '1.13'
|
|
75
89
|
type: :runtime
|
|
76
90
|
prerelease: false
|
|
77
91
|
version_requirements: !ruby/object:Gem::Requirement
|
|
78
92
|
requirements:
|
|
79
93
|
- - "~>"
|
|
80
94
|
- !ruby/object:Gem::Version
|
|
81
|
-
version: '1.
|
|
95
|
+
version: '1.13'
|
|
82
96
|
description: 'Toolkit for security research manipulating Unicode: confusables, homoglyphs,
|
|
83
97
|
hexdump, code point, UTF-8, UTF-16, UTF-32, properties, regexp search, size, grapheme,
|
|
84
98
|
surrogates, version, ICU, CLDR, UCD, BiDi, normalization'
|
|
@@ -137,7 +151,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
137
151
|
requirements:
|
|
138
152
|
- - ">="
|
|
139
153
|
- !ruby/object:Gem::Version
|
|
140
|
-
version: 3.
|
|
154
|
+
version: 3.3.0
|
|
141
155
|
- - "<"
|
|
142
156
|
- !ruby/object:Gem::Version
|
|
143
157
|
version: '5.0'
|
|
@@ -147,7 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
147
161
|
- !ruby/object:Gem::Version
|
|
148
162
|
version: '0'
|
|
149
163
|
requirements: []
|
|
150
|
-
rubygems_version: 4.0.
|
|
164
|
+
rubygems_version: 4.0.10
|
|
151
165
|
specification_version: 4
|
|
152
166
|
summary: Unicode Security Toolkit
|
|
153
167
|
test_files: []
|