unisec 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/unisec/bidi.rb +171 -0
- data/lib/unisec/cli/bidi.rb +63 -0
- data/lib/unisec/cli/cli.rb +4 -0
- data/lib/unisec/cli/hexdump.rb +15 -3
- data/lib/unisec/cli/normalization.rb +55 -0
- data/lib/unisec/hexdump.rb +1 -0
- data/lib/unisec/normalization.rb +94 -0
- data/lib/unisec/surrogates.rb +1 -0
- data/lib/unisec/utils.rb +10 -0
- data/lib/unisec/version.rb +1 -1
- data/lib/unisec/versions.rb +13 -13
- data/lib/unisec.rb +2 -0
- metadata +10 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0b6f74de3dc3d0f9aebac59ff68bc5731b7185e9e73e86e87477c23408900452
|
4
|
+
data.tar.gz: 910be7b95b71022f352cc6d612c7752fe2ea13a0e6e3dc89aca4566cb1879569
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 47b730a884a30979be5968d90a30bc214ec93725383f20c48a02ec1090ce64e6043f95427c7ed79ebc31b3b4e1d3b66e01c7b0d63f936793f6c8eb008ce5f9fe
|
7
|
+
data.tar.gz: be9ca5cc40baaf9cd56141a3a8e41f5709c6071d5e7797ebd64a1daf231671e6835c168bce53f540e72ead4f68fc93bfd2d211764ba474e1cd8a4ca1bf5e7a65
|
data/lib/unisec/bidi.rb
ADDED
@@ -0,0 +1,171 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'unisec/utils'
|
4
|
+
require 'ctf_party'
|
5
|
+
|
6
|
+
module Unisec
|
7
|
+
# Manipulation of bidirectional related content
|
8
|
+
class Bidi
|
9
|
+
# Attack using BiDi code points like RtLO, for example, for spoofing a domain name or a file name
|
10
|
+
class Spoof
|
11
|
+
# The target string to spoof (eg. URL, domain or file name)
|
12
|
+
# @return [String] the target string
|
13
|
+
attr_reader :target_display
|
14
|
+
|
15
|
+
# Set a new target string to spoof
|
16
|
+
#
|
17
|
+
# It will automatically set `@spoof_string` and `@spoof_payload` as well.
|
18
|
+
# @param input [String] the target string
|
19
|
+
# @param opts [Hash] optional parameters, see {Spoof.bidi_affix}
|
20
|
+
# @return [String] the target string
|
21
|
+
def set_target_display(input, **opts)
|
22
|
+
@target_display = input
|
23
|
+
@spoof_string = reverse(**opts)
|
24
|
+
@spoof_payload = bidi_affix(**opts)
|
25
|
+
@target_display
|
26
|
+
end
|
27
|
+
|
28
|
+
# The string for the spoofing attack without the BiDi characters
|
29
|
+
# @return [String] the spoof string (without BiDi)
|
30
|
+
attr_reader :spoof_string
|
31
|
+
|
32
|
+
# The string for the spoofing attack with the BiDi characters. (Spoof payload = spoof string + BiDi)
|
33
|
+
# @return [String] the spoof string (with BiDi)
|
34
|
+
attr_reader :spoof_payload
|
35
|
+
|
36
|
+
# @param input [String] the target string
|
37
|
+
# @param opts [Hash] optional parameters, see {Spoof.bidi_affix}
|
38
|
+
# @example
|
39
|
+
# bd = Unisec::Bidi::Spoof.new('https://moc.example.org//:sptth')
|
40
|
+
# bd.target_display # => "https://moc.example.org//:sptth"
|
41
|
+
# bd.spoof_string # => "https://gro.elpmaxe.com//:sptth"
|
42
|
+
# bd.spoof_payload => "https://gro.elpmaxe.com//:sptth"
|
43
|
+
def initialize(input, **opts)
|
44
|
+
opts[:index] ||= opts[:infix_pos]
|
45
|
+
|
46
|
+
@target_display = input
|
47
|
+
@spoof_string = reverse(**opts)
|
48
|
+
@spoof_payload = bidi_affix(**opts)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Reverse the (sub)-string (grapheme cluster aware)
|
52
|
+
# @param target [String] string to reverse
|
53
|
+
# @param opts [Hash] optional parameters
|
54
|
+
# @option opts [String] :index Index at which the revese starts (before this position will be left untouched)
|
55
|
+
# @return [String] the reversed string
|
56
|
+
# @example
|
57
|
+
# Unisec::Bidi::Spoof.reverse('document_anntxt.exe', index: 12)
|
58
|
+
# # => "document_annexe.txt"
|
59
|
+
#
|
60
|
+
# Unisec::Bidi::Spoof.reverse("🇫🇷🐓")
|
61
|
+
# # => "🐓🇫🇷"
|
62
|
+
def self.reverse(target, **opts)
|
63
|
+
opts[:index] ||= 0
|
64
|
+
|
65
|
+
target[0...opts[:index]] + Unisec::Utils::String.grapheme_reverse(target[opts[:index]..])
|
66
|
+
end
|
67
|
+
|
68
|
+
# Call {Spoof.reverse} with `@target_display` as default input (target).
|
69
|
+
def reverse(**opts)
|
70
|
+
Spoof.reverse(@target_display, **opts)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Inject BiDi characters into the input string
|
74
|
+
# @param input [String] input string
|
75
|
+
# @param opts [Hash] optional parameters
|
76
|
+
# @option opts [String] :prefix Prefix Bidi. Default: RLO (U+202E).
|
77
|
+
# @option opts [String] :suffix Suffix Bidi. Default: PDF (U+202C).
|
78
|
+
# @option opts [String] :infix_bidi Bidi injected at a chosen position. Default: none (empty string).
|
79
|
+
# @option opts [String] :infix_pos Position (index) where to inject an extra BiDi. Default: 0.
|
80
|
+
# @return [String] spoof payload (input string with injected BiDi)
|
81
|
+
# @example
|
82
|
+
# # By default inject a RLO prefix, a PDF suffix and no infix.
|
83
|
+
# Unisec::Bidi::Spoof.bidi_affix('acceis')
|
84
|
+
# # => "acceis"
|
85
|
+
#
|
86
|
+
# # RLI ... PDI
|
87
|
+
# Unisec::Bidi::Spoof.bidi_affix('acceis', prefix: "\u{2067}", suffix: "\u{2069}")
|
88
|
+
# # => "acceis"
|
89
|
+
#
|
90
|
+
# # RLE ... PDF
|
91
|
+
# Unisec::Bidi::Spoof.bidi_affix('acceis', prefix: "\u{202B}", suffix: "\u{202C}")
|
92
|
+
# # => "acceis"
|
93
|
+
#
|
94
|
+
# # RLO ... PDF
|
95
|
+
# Unisec::Bidi::Spoof.bidi_affix('https://moc.example.org//:sptth', prefix: "\u{202E}", suffix: "\u{202C}")
|
96
|
+
# # => "https://moc.example.org//:sptth"
|
97
|
+
#
|
98
|
+
# # FSI RLO ... PDF PDI
|
99
|
+
# Unisec::Bidi::Spoof.bidi_affix('https://moc.example.org//:sptth', prefix: "\u{2068 202E}", suffix: "\u{202C 2069}")
|
100
|
+
# # => "https://moc.example.org//:sptth"
|
101
|
+
#
|
102
|
+
# # RLM ...
|
103
|
+
# Unisec::Bidi::Spoof.bidi_affix('unicode', prefix: "\u{200F}", suffix: '')
|
104
|
+
# # => "unicode"
|
105
|
+
#
|
106
|
+
# # For file name spoofing, it is useful to be able to inject just a RLO before the fake extension
|
107
|
+
# # so we can void the prefix and suffix and just set the position of an infix
|
108
|
+
# ex = Unisec::Bidi::Spoof.bidi_affix('document_anntxt.exe', prefix: '', suffix: '', infix_bidi: "\u{202E}", infix_pos: 12)
|
109
|
+
# # => "document_anntxt.exe"
|
110
|
+
# puts ex
|
111
|
+
# # document_anntxt.exe
|
112
|
+
def self.bidi_affix(input, **opts)
|
113
|
+
opts[:prefix] ||= "\u{202E}" # RLO
|
114
|
+
opts[:suffix] ||= "\u{202C}" # PDF
|
115
|
+
opts[:infix_bidi] ||= ''
|
116
|
+
opts[:infix_pos] ||= 0
|
117
|
+
|
118
|
+
out = "#{opts[:prefix]}#{input}#{opts[:suffix]}"
|
119
|
+
out.insert(opts[:infix_pos], opts[:infix_bidi])
|
120
|
+
out
|
121
|
+
end
|
122
|
+
|
123
|
+
# Call {Spoof.bidi_affix} with `@spoof_string` as input.
|
124
|
+
def bidi_affix(**opts)
|
125
|
+
Spoof.bidi_affix(@spoof_string, **opts)
|
126
|
+
end
|
127
|
+
|
128
|
+
# Display a CLI-friendly output summurizing the spoof payload
|
129
|
+
#
|
130
|
+
# The light version displays only the spoof payload for easy piping with other commands.
|
131
|
+
# @param light [Boolean] `true` = light display (displays only the spoof payload for easy piping with other commands), `false` (default) = full display.
|
132
|
+
# @return [String] CLI-ready output
|
133
|
+
# @example
|
134
|
+
# puts Unisec::Bidi::Spoof.new('noraj').display
|
135
|
+
# # Target string: noraj
|
136
|
+
# # Spoof payload (display) ⚠: jaron
|
137
|
+
# # Spoof string 🛈: jaron
|
138
|
+
# # Spoof payload (hex): e280ae6a61726f6ee280ac
|
139
|
+
# # Spoof payload (hex, escaped): \xe2\x80\xae\x6a\x61\x72\x6f\x6e\xe2\x80\xac
|
140
|
+
# # Spoof payload (base64): 4oCuamFyb27igKw=
|
141
|
+
# # Spoof payload (urlencode): %E2%80%AEjaron%E2%80%AC
|
142
|
+
# # Spoof payload (code points): U+202E U+006A U+0061 U+0072 U+006F U+006E U+202C
|
143
|
+
# #
|
144
|
+
# #
|
145
|
+
# #
|
146
|
+
# # ⚠: for the spoof payload to display correctly, be sure your VTE has RTL support, e.g. see https://wiki.archlinux.org/title/Bidirectional_text#Terminal.
|
147
|
+
# # 🛈: Does not contain the BiDi character (e.g. RtLO).
|
148
|
+
#
|
149
|
+
# puts Unisec::Bidi::Spoof.new('noraj').display(light: true)
|
150
|
+
# # jaron
|
151
|
+
def display(light: false)
|
152
|
+
if light == false # full display
|
153
|
+
"Target string: #{@target_display}\n" \
|
154
|
+
"Spoof payload (display) ⚠: #{@spoof_payload}\n" \
|
155
|
+
"Spoof string 🛈: #{@spoof_string}\n" \
|
156
|
+
"Spoof payload (hex): #{@spoof_payload.to_hex}\n" \
|
157
|
+
"Spoof payload (hex, escaped): #{@spoof_payload.to_hex(prefixall: '\\x')}\n" \
|
158
|
+
"Spoof payload (base64): #{@spoof_payload.to_b64}\n" \
|
159
|
+
"Spoof payload (urlencode): #{@spoof_payload.urlencode}\n" \
|
160
|
+
"Spoof payload (code points): #{Unisec::Properties.chars2codepoints(@spoof_payload)}\n" \
|
161
|
+
"\n\n\n" \
|
162
|
+
'⚠: for the spoof payload to display correctly, be sure your VTE has RTL support, ' \
|
163
|
+
"e.g. see https://wiki.archlinux.org/title/Bidirectional_text#Terminal.\n" \
|
164
|
+
'🛈: Does not contain the BiDi character (e.g. RtLO).'
|
165
|
+
else # light display
|
166
|
+
@spoof_payload
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'dry/cli'
|
4
|
+
require 'unisec'
|
5
|
+
require 'unisec/utils'
|
6
|
+
|
7
|
+
module Unisec
|
8
|
+
module CLI
|
9
|
+
module Commands
|
10
|
+
# CLI sub-commands `unisec bidi xxx` for the class {Unisec::Bidi} from the lib.
|
11
|
+
module Bidi
|
12
|
+
# Command `unisec bidi spoof`
|
13
|
+
#
|
14
|
+
# Example:
|
15
|
+
#
|
16
|
+
# ```plaintext
|
17
|
+
# $ unisec bidi spoof noraj
|
18
|
+
# Target string: noraj
|
19
|
+
# Spoof payload (display) ⚠: jaron
|
20
|
+
# Spoof string 🛈: jaron
|
21
|
+
# Spoof payload (hex): e280ae6a61726f6ee280ac
|
22
|
+
# Spoof payload (hex, escaped): \xe2\x80\xae\x6a\x61\x72\x6f\x6e\xe2\x80\xac
|
23
|
+
# Spoof payload (base64): 4oCuamFyb27igKw=
|
24
|
+
# Spoof payload (urlencode): %E2%80%AEjaron%E2%80%AC
|
25
|
+
# Spoof payload (code points): U+202E U+006A U+0061 U+0072 U+006F U+006E U+202C
|
26
|
+
#
|
27
|
+
#
|
28
|
+
#
|
29
|
+
# ⚠: for the spoof payload to display correctly, be sure your VTE has RTL support, e.g. see https://wiki.archlinux.org/title/Bidirectional_text#Terminal.
|
30
|
+
# 🛈: Does not contain the BiDi character (e.g. RtLO).
|
31
|
+
#
|
32
|
+
# $ unisec bidi spoof 'document_annexe.txt' --prefix '' --suffix '' --infix-bidi $'\U202E' --infix-pos 12 --light=true
|
33
|
+
# document_anntxt.exe
|
34
|
+
# ```
|
35
|
+
class Spoof < Dry::CLI::Command
|
36
|
+
desc 'Craft a payload for BiDi attacks (for example, for spoofing a domain name or a file name)'
|
37
|
+
|
38
|
+
argument :input, required: true,
|
39
|
+
desc: 'String input'
|
40
|
+
option :light, default: false, values: %w[true false],
|
41
|
+
desc: 'true = light display (displays only the spoof payload for easy piping with other ' \
|
42
|
+
'commands), false = full display'
|
43
|
+
option :prefix, default: nil, desc: 'Prefix Bidi. Default: RLO (U+202E).'
|
44
|
+
option :suffix, default: nil, desc: 'Suffix Bidi. Default: PDF (U+202C).'
|
45
|
+
option :infix_bidi, default: nil, desc: 'Bidi injected at a chosen position. Default: none (empty string).'
|
46
|
+
option :infix_pos, default: nil, desc: 'Spoof payload (input string with injected BiDi)'
|
47
|
+
|
48
|
+
# Craft a payload for BiDi attacks
|
49
|
+
# @param input [String] Input string to spoof
|
50
|
+
# @param options [Hash] optional parameters, see {Unisec::Bidi::Spoof.bidi_affix}
|
51
|
+
def call(input: nil, **options)
|
52
|
+
to_bool = ->(str) { ['true', true].include?(str) }
|
53
|
+
light = to_bool.call(options.fetch(:light))
|
54
|
+
infix_pos = options[:infix_pos].to_i unless options[:infix_pos].nil?
|
55
|
+
puts Unisec::Bidi::Spoof.new(input, prefix: options[:prefix], suffix: options[:suffix],
|
56
|
+
infix_bidi: options[:infix_bidi],
|
57
|
+
infix_pos: infix_pos).display(light: light)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/lib/unisec/cli/cli.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'unisec/cli/bidi'
|
3
4
|
require 'unisec/cli/confusables'
|
4
5
|
require 'unisec/cli/hexdump'
|
6
|
+
require 'unisec/cli/normalization'
|
5
7
|
require 'unisec/cli/properties'
|
6
8
|
require 'unisec/cli/rugrep'
|
7
9
|
require 'unisec/cli/size'
|
@@ -17,10 +19,12 @@ module Unisec
|
|
17
19
|
|
18
20
|
# Mapping between the (sub-)commands as seen by the user
|
19
21
|
# on the command-line interface and the CLI modules in the lib
|
22
|
+
register 'bidi spoof', Bidi::Spoof
|
20
23
|
register 'confusables list', Confusables::List
|
21
24
|
register 'confusables randomize', Confusables::Randomize
|
22
25
|
register 'grep', Grep
|
23
26
|
register 'hexdump', Hexdump
|
27
|
+
register 'normalize', Normalize
|
24
28
|
register 'properties char', Properties::Char
|
25
29
|
register 'properties codepoints', Properties::Codepoints
|
26
30
|
register 'properties list', Properties::List
|
data/lib/unisec/cli/hexdump.rb
CHANGED
@@ -17,17 +17,29 @@ module Unisec
|
|
17
17
|
# UTF-16LE: 4100 4300 4300 4500 4900 5300
|
18
18
|
# UTF-32BE: 00000041 00000043 00000043 00000045 00000049 00000053
|
19
19
|
# UTF-32LE: 41000000 43000000 43000000 45000000 49000000 53000000
|
20
|
+
#
|
21
|
+
# $unisec hexdump "ACCEIS" --enc utf16le
|
22
|
+
# 4100 4300 4300 4500 4900 5300
|
20
23
|
# ```
|
21
24
|
class Hexdump < Dry::CLI::Command
|
22
25
|
desc 'Hexdump in all Unicode encodings'
|
23
26
|
|
24
27
|
argument :input, required: true,
|
25
|
-
desc: 'String input'
|
28
|
+
desc: 'String input. Read from STDIN if equal to -.'
|
29
|
+
|
30
|
+
option :enc, default: nil, values: %w[utf8 utf16be utf16le utf32be utf32le],
|
31
|
+
desc: 'Output only in the specified encoding.'
|
26
32
|
|
27
33
|
# Hexdump of all Unicode encodings.
|
28
34
|
# @param input [String] Input string to encode
|
29
|
-
def call(input: nil, **)
|
30
|
-
|
35
|
+
def call(input: nil, **options)
|
36
|
+
input = $stdin.read.chomp if input == '-'
|
37
|
+
if options[:enc].nil?
|
38
|
+
puts Unisec::Hexdump.new(input).display
|
39
|
+
else
|
40
|
+
# using send() is safe here thanks to the value whitelist
|
41
|
+
puts Unisec::Hexdump.send(options[:enc], input)
|
42
|
+
end
|
31
43
|
end
|
32
44
|
end
|
33
45
|
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'dry/cli'
|
4
|
+
require 'unisec'
|
5
|
+
require 'unisec/utils'
|
6
|
+
|
7
|
+
module Unisec
|
8
|
+
module CLI
|
9
|
+
module Commands
|
10
|
+
# CLI sub-commands `unisec normalize xxx` for the class {Unisec::Normalization} from the lib.
|
11
|
+
#
|
12
|
+
# Command `unisec normalize "example"`
|
13
|
+
#
|
14
|
+
# Example:
|
15
|
+
#
|
16
|
+
# ```plaintext
|
17
|
+
# ➜ unisec normalize ẛ̣
|
18
|
+
# Original: ẛ̣
|
19
|
+
# U+1E9B U+0323
|
20
|
+
# NFC: ẛ̣
|
21
|
+
# U+1E9B U+0323
|
22
|
+
# NFKC: ṩ
|
23
|
+
# U+1E69
|
24
|
+
# NFD: ẛ̣
|
25
|
+
# U+017F U+0323 U+0307
|
26
|
+
# NFKD: ṩ
|
27
|
+
# U+0073 U+0323 U+0307
|
28
|
+
#
|
29
|
+
# ➜ unisec normalize ẛ̣ --form nfkd
|
30
|
+
# ṩ
|
31
|
+
# ```
|
32
|
+
class Normalize < Dry::CLI::Command
|
33
|
+
desc 'Normalize in all forms'
|
34
|
+
|
35
|
+
argument :input, required: true,
|
36
|
+
desc: 'String input. Read from STDIN if equal to -.'
|
37
|
+
|
38
|
+
option :form, default: nil, values: %w[nfc nfkc nfd nfkd],
|
39
|
+
desc: 'Output only in the specified normalization form.'
|
40
|
+
|
41
|
+
# Normalize in all forms
|
42
|
+
# @param input [String] Input string to normalize
|
43
|
+
def call(input: nil, **options)
|
44
|
+
input = $stdin.read.chomp if input == '-'
|
45
|
+
if options[:form].nil?
|
46
|
+
puts Unisec::Normalization.new(input).display
|
47
|
+
else
|
48
|
+
# using send() is safe here thanks to the value whitelist
|
49
|
+
puts Unisec::Normalization.send(options[:form], input)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/unisec/hexdump.rb
CHANGED
@@ -0,0 +1,94 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'ctf_party'
|
4
|
+
|
5
|
+
module Unisec
|
6
|
+
# Normalization Forms
|
7
|
+
class Normalization
|
8
|
+
# Original input
|
9
|
+
# @return [String] untouched input
|
10
|
+
attr_reader :original
|
11
|
+
|
12
|
+
# Normalization Form C (NFC) - Canonical Decomposition, followed by Canonical Composition
|
13
|
+
# @return [String] input normalized with NFC
|
14
|
+
attr_reader :nfc
|
15
|
+
|
16
|
+
# Normalization Form KC (NFKC) - Compatibility Decomposition, followed by Canonical Composition
|
17
|
+
# @return [String] input normalized with NFKC
|
18
|
+
attr_reader :nfkc
|
19
|
+
|
20
|
+
# Normalization Form D (NFD) - Canonical Decomposition
|
21
|
+
# @return [String] input normalized with NFD
|
22
|
+
attr_reader :nfd
|
23
|
+
|
24
|
+
# Normalization Form KD (NFKD) - Compatibility Decomposition
|
25
|
+
# @return [String] input normalized with NFKD
|
26
|
+
attr_reader :nfkd
|
27
|
+
|
28
|
+
# Generate all normilzation forms for a given input
|
29
|
+
# @param str [String] the target string
|
30
|
+
# @return [nil]
|
31
|
+
def initialize(str)
|
32
|
+
@original = str
|
33
|
+
@nfc = Normalization.nfc(str)
|
34
|
+
@nfkc = Normalization.nfkc(str)
|
35
|
+
@nfd = Normalization.nfd(str)
|
36
|
+
@nfkd = Normalization.nfkd(str)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Normalization Form C (NFC) - Canonical Decomposition, followed by Canonical Composition
|
40
|
+
# @param str [String] the target string
|
41
|
+
# @return [String] input normalized with NFC
|
42
|
+
def self.nfc(str)
|
43
|
+
str.unicode_normalize(:nfc)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Normalization Form KC (NFKC) - Compatibility Decomposition, followed by Canonical Composition
|
47
|
+
# @param str [String] the target string
|
48
|
+
# @return [String] input normalized with NFKC
|
49
|
+
def self.nfkc(str)
|
50
|
+
str.unicode_normalize(:nfkc)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Normalization Form D (NFD) - Canonical Decomposition
|
54
|
+
# @param str [String] the target string
|
55
|
+
# @return [String] input normalized with NFD
|
56
|
+
def self.nfd(str)
|
57
|
+
str.unicode_normalize(:nfd)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Normalization Form KD (NFKD) - Compatibility Decomposition
|
61
|
+
# @param str [String] the target string
|
62
|
+
# @return [String] input normalized with NFKD
|
63
|
+
def self.nfkd(str)
|
64
|
+
str.unicode_normalize(:nfkd)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Display a CLI-friendly output summurizing all normalization forms
|
68
|
+
# @return [String] CLI-ready output
|
69
|
+
# @example
|
70
|
+
# puts Unisec::Normalization.new("\u{1E9B 0323}").display
|
71
|
+
# # =>
|
72
|
+
# # Original: ẛ̣
|
73
|
+
# # U+1E9B U+0323
|
74
|
+
# # NFC: ẛ̣
|
75
|
+
# # U+1E9B U+0323
|
76
|
+
# # NFKC: ṩ
|
77
|
+
# # U+1E69
|
78
|
+
# # NFD: ẛ̣
|
79
|
+
# # U+017F U+0323 U+0307
|
80
|
+
# # NFKD: ṩ
|
81
|
+
# # U+0073 U+0323 U+0307
|
82
|
+
def display
|
83
|
+
colorize = lambda { |form_title, form_attr|
|
84
|
+
"#{Paint[form_title.to_s, :underline,
|
85
|
+
:bold]}: #{form_attr}\n #{Paint[Unisec::Properties.chars2codepoints(form_attr), :red]}\n"
|
86
|
+
}
|
87
|
+
colorize.call('Original', @original) +
|
88
|
+
colorize.call('NFC', @nfc) +
|
89
|
+
colorize.call('NFKC', @nfkc) +
|
90
|
+
colorize.call('NFD', @nfd) +
|
91
|
+
colorize.call('NFKD', @nfkd)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
data/lib/unisec/surrogates.rb
CHANGED
@@ -102,6 +102,7 @@ module Unisec
|
|
102
102
|
# Display a CLI-friendly output summurizing everithing about the surrogates:
|
103
103
|
# the corresponding character, code point, high and low surrogates
|
104
104
|
# (each displayed as hexadecimal, decimal and binary).
|
105
|
+
# @return [String] CLI-ready output
|
105
106
|
# @example
|
106
107
|
# surr = Unisec::Surrogates.new(128169)
|
107
108
|
# puts surr.display # =>
|
data/lib/unisec/utils.rb
CHANGED
@@ -98,6 +98,16 @@ module Unisec
|
|
98
98
|
:string
|
99
99
|
end
|
100
100
|
end
|
101
|
+
|
102
|
+
# Reverse a string by graphemes (not by code points)
|
103
|
+
# @return [String] the reversed string
|
104
|
+
# @example
|
105
|
+
# b = "\u{1f1eb}\u{1f1f7}\u{1F413}" # => "🇫🇷🐓"
|
106
|
+
# b.reverse # => "🐓🇷🇫"
|
107
|
+
# Unisec::Utils::String.grapheme_reverse(b) # => "🐓🇫🇷"
|
108
|
+
def self.grapheme_reverse(str)
|
109
|
+
str.grapheme_clusters.reverse.join
|
110
|
+
end
|
101
111
|
end
|
102
112
|
end
|
103
113
|
end
|
data/lib/unisec/version.rb
CHANGED
data/lib/unisec/versions.rb
CHANGED
@@ -72,19 +72,19 @@ module Unisec
|
|
72
72
|
# # …
|
73
73
|
def self.display # rubocop:disable Metrics/AbcSize
|
74
74
|
data = versions
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
75
|
+
colorize = ->(node) { Paint[data[node][:label], :red, :bold].ljust(44) + " #{data[node][:version]}\n" }
|
76
|
+
Paint["Unicode:\n", :underline] +
|
77
|
+
colorize.call(:ruby_unicode) +
|
78
|
+
colorize.call(:twittercldr_unicode) +
|
79
|
+
colorize.call(:unicodeconfusable_unicode) +
|
80
|
+
colorize.call(:twittercldr_icu) +
|
81
|
+
colorize.call(:twittercldr_cldr) +
|
82
|
+
colorize.call(:ruby_unicode_emoji) +
|
83
|
+
colorize.call(:ucd_derivedname) +
|
84
|
+
Paint["\nGems:\n", :underline] +
|
85
|
+
colorize.call(:unisec) +
|
86
|
+
colorize.call(:twittercldr) +
|
87
|
+
colorize.call(:unicodeconfusable)
|
88
88
|
end
|
89
89
|
end
|
90
90
|
end
|
data/lib/unisec.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unisec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexandre ZANNI
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-02-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ctf-party
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '3.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '3.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: dry-cli
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -88,7 +88,7 @@ dependencies:
|
|
88
88
|
version: '1.9'
|
89
89
|
description: 'Toolkit for security research manipulating Unicode: confusables, homoglyphs,
|
90
90
|
hexdump, code point, UTF-8, UTF-16, UTF-32, properties, regexp search, size, grapheme,
|
91
|
-
surrogates, version, ICU, CLDR, UCD'
|
91
|
+
surrogates, version, ICU, CLDR, UCD, BiDi, normalization'
|
92
92
|
email: alexandre.zanni@europe.com
|
93
93
|
executables:
|
94
94
|
- unisec
|
@@ -99,9 +99,12 @@ files:
|
|
99
99
|
- bin/unisec
|
100
100
|
- data/DerivedName.txt
|
101
101
|
- lib/unisec.rb
|
102
|
+
- lib/unisec/bidi.rb
|
103
|
+
- lib/unisec/cli/bidi.rb
|
102
104
|
- lib/unisec/cli/cli.rb
|
103
105
|
- lib/unisec/cli/confusables.rb
|
104
106
|
- lib/unisec/cli/hexdump.rb
|
107
|
+
- lib/unisec/cli/normalization.rb
|
105
108
|
- lib/unisec/cli/properties.rb
|
106
109
|
- lib/unisec/cli/rugrep.rb
|
107
110
|
- lib/unisec/cli/size.rb
|
@@ -109,6 +112,7 @@ files:
|
|
109
112
|
- lib/unisec/cli/versions.rb
|
110
113
|
- lib/unisec/confusables.rb
|
111
114
|
- lib/unisec/hexdump.rb
|
115
|
+
- lib/unisec/normalization.rb
|
112
116
|
- lib/unisec/properties.rb
|
113
117
|
- lib/unisec/rugrep.rb
|
114
118
|
- lib/unisec/size.rb
|
@@ -145,7 +149,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
145
149
|
- !ruby/object:Gem::Version
|
146
150
|
version: '0'
|
147
151
|
requirements: []
|
148
|
-
rubygems_version: 3.
|
152
|
+
rubygems_version: 3.5.3
|
149
153
|
signing_key:
|
150
154
|
specification_version: 4
|
151
155
|
summary: Unicode Security Toolkit
|