unisec 0.0.7 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/unisec/bidi.rb +3 -3
- data/lib/unisec/blocks.rb +35 -1
- data/lib/unisec/cli/blocks.rb +28 -0
- data/lib/unisec/cli/cli.rb +9 -0
- data/lib/unisec/cli/dump.rb +81 -0
- data/lib/unisec/cli/normalization.rb +31 -0
- data/lib/unisec/cli/planes.rb +52 -0
- data/lib/unisec/hexdump.rb +51 -0
- data/lib/unisec/normalization.rb +72 -0
- data/lib/unisec/planes.rb +66 -0
- data/lib/unisec/properties.rb +4 -2
- data/lib/unisec/utils.rb +105 -23
- data/lib/unisec/version.rb +1 -1
- metadata +21 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e1c859ae327cc9381cc578456525a9fc0d6e68299f10bce6cd4f6439431a7fc0
|
|
4
|
+
data.tar.gz: 8c091df7ffc3e8f720ca9e5cee3d022e4cba4876530727150cc8277d61509f7c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7981fd667521cbccf1c3fdfda8610722fdf9892392568be8bacdd36719109982e07d906c9c4b5c3aff4c90d10252b93460698a3f404348d5dcbd8783124e77cb
|
|
7
|
+
data.tar.gz: 3b32516d01be17f5d462acade421755c5420f1f2f7d596f972c87d17425a64e06cee0fc7963d916113ea970f6a8882b47aa4e84113d31c992f8cc115c2ea5f59
|
data/lib/unisec/bidi.rb
CHANGED
|
@@ -18,10 +18,10 @@ module Unisec
|
|
|
18
18
|
# @param input [String] the target string
|
|
19
19
|
# @param opts [Hash] optional parameters, see {Spoof.bidi_affix}
|
|
20
20
|
# @return [String] the target string
|
|
21
|
-
def set_target_display(input, **)
|
|
21
|
+
def set_target_display(input, **opts)
|
|
22
22
|
@target_display = input
|
|
23
|
-
@spoof_string = reverse(**)
|
|
24
|
-
@spoof_payload = bidi_affix(**)
|
|
23
|
+
@spoof_string = reverse(**opts)
|
|
24
|
+
@spoof_payload = bidi_affix(**opts)
|
|
25
25
|
@target_display
|
|
26
26
|
end
|
|
27
27
|
|
data/lib/unisec/blocks.rb
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'paint'
|
|
4
|
+
require 'twitter_cldr'
|
|
4
5
|
require 'unisec/utils'
|
|
5
6
|
|
|
6
7
|
module Unisec
|
|
@@ -114,7 +115,7 @@ module Unisec
|
|
|
114
115
|
if block_arg.size == 1 # is a char (1 code unit, not one grapheme)
|
|
115
116
|
found = true if blk_range.include?(Utils::String.convert_to_integer(block_arg))
|
|
116
117
|
elsif block_arg.start_with?('U+') # string code point
|
|
117
|
-
found = true if blk_range.include?(Utils::String.
|
|
118
|
+
found = true if blk_range.include?(Utils::String.convert(block_arg, :integer))
|
|
118
119
|
elsif blk_name.downcase == block_arg.downcase # block name
|
|
119
120
|
found = true
|
|
120
121
|
end
|
|
@@ -205,5 +206,38 @@ module Unisec
|
|
|
205
206
|
end
|
|
206
207
|
nil
|
|
207
208
|
end
|
|
209
|
+
|
|
210
|
+
# Returns the name of the Unicode block containing the given character.
|
|
211
|
+
# @param char [String] Single character (only one code unit, so be careful with
|
|
212
|
+
# emojis, composed or joint characters using several units, only the first
|
|
213
|
+
# code unit will be kept).
|
|
214
|
+
# @return [String] Block name or empty string if not found.
|
|
215
|
+
# @example
|
|
216
|
+
# Unisec::Blocks.reverse('…') # => "General Punctuation"
|
|
217
|
+
# Unisec::Blocks.reverse('A') # => "Basic Latin"
|
|
218
|
+
# Unisec::Blocks.reverse('💩') # => "Miscellaneous Symbols and Pictographs"
|
|
219
|
+
# Unisec::Blocks.reverse('🇫🇷') # => "Enclosed Alphanumeric Supplement" (only first unit is kept)
|
|
220
|
+
def self.reverse(char)
|
|
221
|
+
cp_num = TwitterCldr::Utils::CodePoints.from_string(char)
|
|
222
|
+
cp = TwitterCldr::Shared::CodePoint.get(cp_num.first)
|
|
223
|
+
props = cp.properties
|
|
224
|
+
props.block.join
|
|
225
|
+
rescue NoMethodError # in case of invalid character where CodePoint.get() => nil
|
|
226
|
+
''
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Display a CLI-friendly output showing the block name for a given character.
|
|
230
|
+
# @param char [String] Single character (only one code unit, so be careful with
|
|
231
|
+
# emojis, composed or joint characters using several units, only the first
|
|
232
|
+
# code unit will be kept).
|
|
233
|
+
def self.reverse_display(char)
|
|
234
|
+
blk_name = reverse(char)
|
|
235
|
+
if blk_name.empty?
|
|
236
|
+
puts "no block found for #{char.inspect}"
|
|
237
|
+
else
|
|
238
|
+
puts blk_name
|
|
239
|
+
end
|
|
240
|
+
nil
|
|
241
|
+
end
|
|
208
242
|
end
|
|
209
243
|
end
|
data/lib/unisec/cli/blocks.rb
CHANGED
|
@@ -60,6 +60,34 @@ module Unisec
|
|
|
60
60
|
end
|
|
61
61
|
end
|
|
62
62
|
|
|
63
|
+
# Command `unisec blocks reverse`
|
|
64
|
+
#
|
|
65
|
+
# Example:
|
|
66
|
+
#
|
|
67
|
+
# ```plaintext
|
|
68
|
+
# $ unisec blocks reverse '…'
|
|
69
|
+
# General Punctuation
|
|
70
|
+
# $ unisec blocks reverse 'A'
|
|
71
|
+
# Basic Latin
|
|
72
|
+
# $ unisec blocks reverse '💩'
|
|
73
|
+
# Miscellaneous Symbols and Pictographs
|
|
74
|
+
# $ unisec blocks reverse '🇫🇷'
|
|
75
|
+
# Enclosed Alphanumeric Supplement
|
|
76
|
+
# ```
|
|
77
|
+
class Reverse < Dry::CLI::Command
|
|
78
|
+
desc 'Search in which Unicode block a given character is'
|
|
79
|
+
|
|
80
|
+
argument :char, required: true,
|
|
81
|
+
desc: 'Single character (only one code unit, so be careful with emojis, composed or ' \
|
|
82
|
+
'joint characters using several units, only the first code unit will be kept)'
|
|
83
|
+
|
|
84
|
+
# Display the Unicode block name for a given character
|
|
85
|
+
# @param char [String] Single character (only one code unit, so be careful with emojis, composed or joint characters using several units, only the first code unit will be kept).
|
|
86
|
+
def call(char: nil, **)
|
|
87
|
+
Unisec::Blocks.reverse_display(char)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
63
91
|
# Command `unisec blocks invalid`
|
|
64
92
|
#
|
|
65
93
|
# Example:
|
data/lib/unisec/cli/cli.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'dry/cli/completion/command'
|
|
3
4
|
require 'unisec/cli/bidi'
|
|
4
5
|
require 'unisec/cli/blocks'
|
|
5
6
|
require 'unisec/cli/confusables'
|
|
@@ -24,15 +25,23 @@ module Unisec
|
|
|
24
25
|
register 'bidi spoof', Bidi::Spoof
|
|
25
26
|
register 'blocks invalid', Blocks::Invalid
|
|
26
27
|
register 'blocks list', Blocks::List
|
|
28
|
+
register 'blocks reverse', Blocks::Reverse
|
|
27
29
|
register 'blocks search', Blocks::Search
|
|
30
|
+
register 'completion', Dry::CLI::Completion::Command[self]
|
|
28
31
|
register 'confusables list', Confusables::List
|
|
29
32
|
register 'confusables randomize', Confusables::Randomize
|
|
33
|
+
register 'dump codepoints integer', Dump::Codepoints::Integer
|
|
34
|
+
register 'dump codepoints standard', Dump::Codepoints::Standard
|
|
30
35
|
register 'dump dec', Dump::Dec
|
|
31
36
|
register 'dump hex', Dump::Hex
|
|
37
|
+
register 'dump rev', Dump::Reverse
|
|
32
38
|
register 'grep', Grep
|
|
33
39
|
register 'normalize all', Normalize::All
|
|
34
40
|
register 'normalize replace', Normalize::Replace
|
|
41
|
+
register 'normalize reverse', Normalize::Reverse
|
|
42
|
+
register 'planes block', Planes::Block
|
|
35
43
|
register 'planes list', Planes::List
|
|
44
|
+
register 'planes reverse', Planes::Reverse
|
|
36
45
|
register 'planes search', Planes::Search
|
|
37
46
|
register 'properties char', Properties::Char
|
|
38
47
|
register 'properties codepoints', Properties::Codepoints
|
data/lib/unisec/cli/dump.rb
CHANGED
|
@@ -81,6 +81,87 @@ module Unisec
|
|
|
81
81
|
end
|
|
82
82
|
end
|
|
83
83
|
end
|
|
84
|
+
|
|
85
|
+
module Codepoints
|
|
86
|
+
# CLI command `unisec dump codepoints standard`.
|
|
87
|
+
#
|
|
88
|
+
# Example:
|
|
89
|
+
#
|
|
90
|
+
# ```plaintext
|
|
91
|
+
# $ unisec dump codepoints standard "unicode"
|
|
92
|
+
# U+0075 U+006E U+0069 U+0063 U+006F U+0064 U+0065
|
|
93
|
+
# ```
|
|
94
|
+
class Standard < Dry::CLI::Command
|
|
95
|
+
desc 'Code point dump (standard format)'
|
|
96
|
+
|
|
97
|
+
argument :input, required: true,
|
|
98
|
+
desc: 'String input. Read from STDIN if equal to -.'
|
|
99
|
+
|
|
100
|
+
# Code point dump (standard format).
|
|
101
|
+
# @param input [String] Input string to encode
|
|
102
|
+
def call(input: nil)
|
|
103
|
+
input = $stdin.read.chomp if input == '-'
|
|
104
|
+
puts Unisec::Utils::String.chars2codepoints(input)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# CLI command `unisec dump codepoints integer`.
|
|
109
|
+
#
|
|
110
|
+
# Example:
|
|
111
|
+
#
|
|
112
|
+
# ```plaintext
|
|
113
|
+
# $ unisec dump codepoints integer 'I 💕 Ruby 💎'
|
|
114
|
+
# 73 32 128149 32 82 117 98 121 32 128142
|
|
115
|
+
# ```
|
|
116
|
+
class Integer < Dry::CLI::Command
|
|
117
|
+
desc 'Code point dump (integer format)'
|
|
118
|
+
|
|
119
|
+
argument :input, required: true,
|
|
120
|
+
desc: 'String input. Read from STDIN if equal to -.'
|
|
121
|
+
|
|
122
|
+
# Code point dump (integer format).
|
|
123
|
+
# @param input [String] Input string to encode
|
|
124
|
+
def call(input: nil)
|
|
125
|
+
input = $stdin.read.chomp if input == '-'
|
|
126
|
+
puts Unisec::Utils::String.chars2intcodepoints(input)
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# CLI command `unisec dump rev` for the method {Unisec::Hexdump.reverse} from the lib.
|
|
132
|
+
#
|
|
133
|
+
# Example:
|
|
134
|
+
#
|
|
135
|
+
# ```plaintext
|
|
136
|
+
# $ unisec dump rev 0a0d --enc=utf16be
|
|
137
|
+
# (U+0A0D) - 0a0d
|
|
138
|
+
#
|
|
139
|
+
# $ unisec dump rev 808080 --enc=utf8 --exact=false
|
|
140
|
+
# (U+40000) - f1 80 80 80
|
|
141
|
+
# (U+80000) - f2 80 80 80
|
|
142
|
+
# (U+C0000) - f3 80 80 80
|
|
143
|
+
# (U+100000) - f4 80 80 80
|
|
144
|
+
# ```
|
|
145
|
+
class Reverse < Dry::CLI::Command
|
|
146
|
+
desc 'Reverse search in hexadecimal dump'
|
|
147
|
+
|
|
148
|
+
argument :hexbytes, required: true,
|
|
149
|
+
desc: 'Byte(s) in hexadecimal to search for. Read from STDIN if equal to -.'
|
|
150
|
+
|
|
151
|
+
option :enc, default: 'utf8', values: %w[utf8 utf16be utf16le utf32be utf32le],
|
|
152
|
+
desc: 'The target encoding in which to search.'
|
|
153
|
+
|
|
154
|
+
option :exact, default: 'true', values: %w[true false],
|
|
155
|
+
desc: 'true (default) = exact search, false = "sub-string" search / the value is included ' \
|
|
156
|
+
'in the encoded value'
|
|
157
|
+
|
|
158
|
+
# Search X byte(s) hexadecimal value in Y encoding, basically which characters will give this resulting encoded value
|
|
159
|
+
# @param hexbytes [String] The target encoding in which to search.
|
|
160
|
+
def call(hexbytes: nil, **options)
|
|
161
|
+
hexbytes = $stdin.read.chomp if hexbytes == '-'
|
|
162
|
+
puts Unisec::Hexdump.display_reverse(hexbytes, options[:enc], exact: options[:exact].to_bool)
|
|
163
|
+
end
|
|
164
|
+
end
|
|
84
165
|
end
|
|
85
166
|
end
|
|
86
167
|
end
|
|
@@ -81,6 +81,37 @@ module Unisec
|
|
|
81
81
|
puts Unisec::Normalization.new(input).display_replace
|
|
82
82
|
end
|
|
83
83
|
end
|
|
84
|
+
|
|
85
|
+
# Command `unisec normalize reverse '<'`
|
|
86
|
+
#
|
|
87
|
+
# Example:
|
|
88
|
+
#
|
|
89
|
+
# ```plaintext
|
|
90
|
+
# $ unisec normalize reverse '"' --forms 'nfkc,nfkd'
|
|
91
|
+
# Original:
|
|
92
|
+
# " (U+0022)
|
|
93
|
+
# NFKC
|
|
94
|
+
# " (U+FF02)
|
|
95
|
+
# NFKD
|
|
96
|
+
# " (U+FF02)
|
|
97
|
+
# ```
|
|
98
|
+
class Reverse < Dry::CLI::Command
|
|
99
|
+
desc 'List reverse normalization candidates (what characters will transform into target after normalization)'
|
|
100
|
+
|
|
101
|
+
argument :target, required: true,
|
|
102
|
+
desc: 'Normalization target. Read from STDIN if equal to -.'
|
|
103
|
+
|
|
104
|
+
option :forms, default: %i[nfc nfd nfkc nfkd],
|
|
105
|
+
desc: 'Output only in the specified normalization form(s). ' \
|
|
106
|
+
'Separate by comma if multiple values.'
|
|
107
|
+
|
|
108
|
+
# Reverse normalize
|
|
109
|
+
# @param target [String] Normalization target
|
|
110
|
+
def call(target: nil, **options)
|
|
111
|
+
target = $stdin.read.chomp if target == '-'
|
|
112
|
+
puts Unisec::Normalization.display_reverse_normalize(target, forms: options[:forms])
|
|
113
|
+
end
|
|
114
|
+
end
|
|
84
115
|
end
|
|
85
116
|
end
|
|
86
117
|
end
|
data/lib/unisec/cli/planes.rb
CHANGED
|
@@ -93,6 +93,58 @@ module Unisec
|
|
|
93
93
|
with_count: options[:with_count].to_bool)
|
|
94
94
|
end
|
|
95
95
|
end
|
|
96
|
+
|
|
97
|
+
# Command `unisec planes reverse`
|
|
98
|
+
#
|
|
99
|
+
# Example:
|
|
100
|
+
#
|
|
101
|
+
# ```plaintext
|
|
102
|
+
# $ unisec planes reverse '…'
|
|
103
|
+
# Basic Multilingual Plane
|
|
104
|
+
# $ unisec planes reverse '🨂'
|
|
105
|
+
# Supplementary Multilingual Plane
|
|
106
|
+
# $ unisec planes reverse '𠀀'
|
|
107
|
+
# Supplementary Ideographic Plane
|
|
108
|
+
# $ unisec planes reverse '🇫🇷'
|
|
109
|
+
# Supplementary Multilingual Plane
|
|
110
|
+
# ```
|
|
111
|
+
class Reverse < Dry::CLI::Command
|
|
112
|
+
desc 'Search in which Unicode plane a given character is'
|
|
113
|
+
|
|
114
|
+
argument :char, required: true,
|
|
115
|
+
desc: 'Single character (only one code unit, so be careful with emojis, composed or joint ' \
|
|
116
|
+
'characters using several units), only the first code unit will be kept).'
|
|
117
|
+
|
|
118
|
+
# Display the Unicode plane name for a given character
|
|
119
|
+
# @param char [String] Single character (only one code unit, so be careful with emojis,
|
|
120
|
+
# composed or joint characters using several units, only the first code unit will be kept).
|
|
121
|
+
def call(char: nil, **)
|
|
122
|
+
Unisec::Planes.reverse_display(char)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Command `unisec planes block`
|
|
127
|
+
#
|
|
128
|
+
# Example:
|
|
129
|
+
#
|
|
130
|
+
# ```plaintext
|
|
131
|
+
# $ unisec planes block 'Basic Latin'
|
|
132
|
+
# Basic Multilingual Plane
|
|
133
|
+
# $ unisec planes block 'Miscellaneous Symbols and Pictographs'
|
|
134
|
+
# Supplementary Multilingual Plane
|
|
135
|
+
# ```
|
|
136
|
+
class Block < Dry::CLI::Command
|
|
137
|
+
desc 'Search in which Unicode plane a block is'
|
|
138
|
+
|
|
139
|
+
argument :block_arg, required: true,
|
|
140
|
+
desc: 'Block name (case insensitive)'
|
|
141
|
+
|
|
142
|
+
# Display the Unicode plane name for a given block
|
|
143
|
+
# @param block_arg [String] Block name (case insensitive).
|
|
144
|
+
def call(block_arg: nil, **)
|
|
145
|
+
Unisec::Planes.block_display(block_arg)
|
|
146
|
+
end
|
|
147
|
+
end
|
|
96
148
|
end
|
|
97
149
|
end
|
|
98
150
|
end
|
data/lib/unisec/hexdump.rb
CHANGED
|
@@ -85,6 +85,33 @@ module Unisec
|
|
|
85
85
|
str.encode('UTF-32LE').to_hex.scan(/.{8}/).join(' ')
|
|
86
86
|
end
|
|
87
87
|
|
|
88
|
+
# Search X byte(s) hexadecimal value in Y encoding, basically which characters will give this resulting encoded value
|
|
89
|
+
# @param hexbytes [String] Byte(s) in hexadecimal to search for
|
|
90
|
+
# @param enc [String] The target encoding in which to search. It uses Unisec CLI argument values (utf8 utf16be utf16le utf32be utf32le).
|
|
91
|
+
# @param exact [TrueClass|FalseClass] true (default) = exact search, false = "sub-string" search / the value is included in the encoded value
|
|
92
|
+
# @return [Array<String>] all matching source characters
|
|
93
|
+
# @example
|
|
94
|
+
# Unisec::Hexdump.reverse('61', 'utf8') # => ["a"]
|
|
95
|
+
# Unisec::Hexdump.reverse('a6', 'utf8', exact: true) # => []
|
|
96
|
+
# Unisec::Hexdump.reverse('a6', 'utf8', exact: false) # => ["¦", "æ", "Ħ", "Ŧ", "Ʀ", "Ǧ", … ]
|
|
97
|
+
# Unisec::Hexdump.reverse('0d0a', 'utf16be', exact: true) # => ["\u0D0A"] (ഊ)
|
|
98
|
+
def self.reverse(hexbytes, enc, exact: true)
|
|
99
|
+
chars = []
|
|
100
|
+
(0x000000..0x10FFFF).each do |i|
|
|
101
|
+
char = i.chr(Unisec::Utils::Arguments.argenc2enc(enc, target: 'class'))
|
|
102
|
+
encoded_value = Unisec::Hexdump.send(enc, char).delete(' ')
|
|
103
|
+
if exact && encoded_value == hexbytes # exact match
|
|
104
|
+
chars << char
|
|
105
|
+
break
|
|
106
|
+
elsif !exact && encoded_value.include?(hexbytes) # includes value
|
|
107
|
+
chars << char
|
|
108
|
+
end
|
|
109
|
+
rescue RangeError # skip invalid code points for selected encoding
|
|
110
|
+
next
|
|
111
|
+
end
|
|
112
|
+
chars
|
|
113
|
+
end
|
|
114
|
+
|
|
88
115
|
# Display a CLI-friendly output summurizing the hexdump in all Unicode encodings
|
|
89
116
|
# @return [String] CLI-ready output
|
|
90
117
|
# @example
|
|
@@ -101,5 +128,29 @@ module Unisec
|
|
|
101
128
|
"UTF-32BE: #{@utf32be}\n" \
|
|
102
129
|
"UTF-32LE: #{@utf32le}"
|
|
103
130
|
end
|
|
131
|
+
|
|
132
|
+
# Display a CLI-friendly output summurizing the reverse hexdump search results
|
|
133
|
+
# @param hexbytes [String] see {Unisec::Hexdump.reverse}
|
|
134
|
+
# @param enc [String] see {Unisec::Hexdump.reverse}
|
|
135
|
+
# @param exact [TrueClass|FalseClass] see {Unisec::Hexdump.reverse}
|
|
136
|
+
# @return [String] CLI-ready output
|
|
137
|
+
# @example
|
|
138
|
+
# puts Unisec::Hexdump.display_reverse('0d0a', 'utf16be', exact: true)
|
|
139
|
+
# # ഊ (U+0D0A) - 0d0a
|
|
140
|
+
# puts Unisec::Hexdump.display_reverse('808080', 'utf8', exact: false)
|
|
141
|
+
# # (U+40000) - f1 80 80 80
|
|
142
|
+
# # (U+80000) - f2 80 80 80
|
|
143
|
+
# # (U+C0000) - f3 80 80 80
|
|
144
|
+
# # (U+100000) - f4 80 80 80
|
|
145
|
+
def self.display_reverse(hexbytes, enc, exact: true)
|
|
146
|
+
res = Unisec::Hexdump.reverse(hexbytes, enc, exact: exact)
|
|
147
|
+
out = ''
|
|
148
|
+
res.each do |char|
|
|
149
|
+
cp = Utils::String.char2codepoint(char)
|
|
150
|
+
hxd = Unisec::Hexdump.send(enc, char)
|
|
151
|
+
out += "#{char.encode('UTF-8')} (#{cp}) - #{hxd}\n"
|
|
152
|
+
end
|
|
153
|
+
out
|
|
154
|
+
end
|
|
104
155
|
end
|
|
105
156
|
end
|
data/lib/unisec/normalization.rb
CHANGED
|
@@ -95,6 +95,35 @@ module Unisec
|
|
|
95
95
|
Normalization.replace_bypass(@original)
|
|
96
96
|
end
|
|
97
97
|
|
|
98
|
+
# Find the list of symbols that will transform into a given symbol after normalization
|
|
99
|
+
# @param target [String]
|
|
100
|
+
# @param forms [String|Symbol|Array<Symbol>]
|
|
101
|
+
# @return [Hash] (results won't include input)
|
|
102
|
+
# @example
|
|
103
|
+
# Unisec::Normalization.reverse_normalize('<') # => {nfc: [], nfd: [], nfkc: ["﹤", "<"], nfkd: ["﹤", "<"]}
|
|
104
|
+
# Unisec::Normalization.reverse_normalize('.', forms: [:nfkc, :nfkd]) # => {nfkc: ["․", "﹒", "."], nfkd: ["․", "﹒", "."]}
|
|
105
|
+
# Unisec::Normalization.reverse_normalize('ffi', forms: :nfkc) # => {nfkc: ["ffi"]}
|
|
106
|
+
# Unisec::Normalization.reverse_normalize('≯', forms: 'nfd') # => {nfd: ["≯"]}
|
|
107
|
+
# Unisec::Normalization.reverse_normalize('ô', forms: 'nfc,nfd') # => {nfc: [], nfd: []}
|
|
108
|
+
def self.reverse_normalize(target, forms: %i[nfc nfd nfkc nfkd])
|
|
109
|
+
forms = Utils::Arguments.to_array_of_sym(forms)
|
|
110
|
+
result = {}
|
|
111
|
+
forms.each do |form|
|
|
112
|
+
result[form] = []
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
(0x000000..0x10FFFF).each do |codepoint|
|
|
116
|
+
char = codepoint.chr(Encoding::UTF_8)
|
|
117
|
+
forms.each do |form|
|
|
118
|
+
result[form] << char if (char.unicode_normalize(form) == target) && (char != target)
|
|
119
|
+
end
|
|
120
|
+
rescue RangeError # skip UTF-16 surrogates and potential other invalid code points
|
|
121
|
+
next
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
result
|
|
125
|
+
end
|
|
126
|
+
|
|
98
127
|
# Display a CLI-friendly output summurizing all normalization forms
|
|
99
128
|
# @return [String] CLI-ready output
|
|
100
129
|
# @example
|
|
@@ -124,6 +153,18 @@ module Unisec
|
|
|
124
153
|
|
|
125
154
|
# Display a CLI-friendly output of the XSS payload to bypass HTML escape and
|
|
126
155
|
# what it does once normalized in NFKC & NFKD.
|
|
156
|
+
# @return [String] CLI-ready output
|
|
157
|
+
# @example
|
|
158
|
+
# $ puts Unisec::Normalization.new('<script>').display_replace
|
|
159
|
+
# # =>
|
|
160
|
+
# # Original: <script>
|
|
161
|
+
# # U+003C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+003E
|
|
162
|
+
# # Bypass payload: <script>
|
|
163
|
+
# # U+FF1C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+FF1E
|
|
164
|
+
# # NFKC: <script>
|
|
165
|
+
# # U+003C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+003E
|
|
166
|
+
# # NFKD: <script>
|
|
167
|
+
# # U+003C U+0073 U+0063 U+0072 U+0069 U+0070 U+0074 U+003E
|
|
127
168
|
def display_replace
|
|
128
169
|
colorize = lambda { |form_title, form_attr|
|
|
129
170
|
"#{Paint[form_title.to_s, :underline,
|
|
@@ -135,5 +176,36 @@ module Unisec
|
|
|
135
176
|
colorize.call('NFKC', Normalization.nfkc(payload)) +
|
|
136
177
|
colorize.call('NFKD', Normalization.nfkd(payload))
|
|
137
178
|
end
|
|
179
|
+
|
|
180
|
+
# Display a CLI-friendly output reverse normalization results
|
|
181
|
+
# @param target [String] see {Unisec::Normalization.reverse_normalize}
|
|
182
|
+
# @param forms [String|Symbol|Array<Symbol>] see {Unisec::Normalization.reverse_normalize}
|
|
183
|
+
# @return [String] CLI-ready output
|
|
184
|
+
# @example
|
|
185
|
+
# puts Unisec::Normalization.display_reverse_normalize('<')
|
|
186
|
+
# # =>
|
|
187
|
+
# # Original:
|
|
188
|
+
# # < (U+003C)
|
|
189
|
+
# # NFKC
|
|
190
|
+
# # ﹤ (U+FE64)
|
|
191
|
+
# # < (U+FF1C)
|
|
192
|
+
# # NFKD
|
|
193
|
+
# # ﹤ (U+FE64)
|
|
194
|
+
# # < (U+FF1C)
|
|
195
|
+
def self.display_reverse_normalize(target, forms: %i[nfc nfd nfkc nfkd]) # rubocop:disable Metrics/AbcSize
|
|
196
|
+
colorize_form = ->(form_title) { Paint[form_title, :underline, :bold] }
|
|
197
|
+
colorize_char = ->(char) { " #{char} (#{Paint[Unisec::Utils::String.chars2codepoints(char), :red]})\n" }
|
|
198
|
+
out = "#{colorize_form.call('Original')}:\n#{colorize_char.call(target)}"
|
|
199
|
+
res = Unisec::Normalization.reverse_normalize(target, forms: forms) # => {nfc: [], nfd: [], nfkc: ["﹤", "<"], nfkd: ["﹤", "<"]}
|
|
200
|
+
res.each_key do |k|
|
|
201
|
+
next if res[k].empty?
|
|
202
|
+
|
|
203
|
+
out += "#{colorize_form.call(k.to_s.upcase)}\n"
|
|
204
|
+
res[k].each do |v|
|
|
205
|
+
out += colorize_char.call(v)
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
out
|
|
209
|
+
end
|
|
138
210
|
end
|
|
139
211
|
end
|
data/lib/unisec/planes.rb
CHANGED
|
@@ -220,5 +220,71 @@ module Unisec
|
|
|
220
220
|
end
|
|
221
221
|
nil
|
|
222
222
|
end
|
|
223
|
+
|
|
224
|
+
# Returns the name of the Unicode plane containing the given character.
|
|
225
|
+
# @param char [String] Single character (only one code unit, so be careful with
|
|
226
|
+
# emojis, composed or joint characters using several units, only the first
|
|
227
|
+
# code unit will be kept).
|
|
228
|
+
# @return [String] Plane name or empty string if not found.
|
|
229
|
+
# @example
|
|
230
|
+
# Unisec::Planes.reverse('…') # => "Basic Multilingual Plane"
|
|
231
|
+
# Unisec::Planes.reverse('🨂') # => "Supplementary Multilingual Plane"
|
|
232
|
+
# Unisec::Planes.reverse('𠀀') # => "Supplementary Ideographic Plane"
|
|
233
|
+
# Unisec::Planes.reverse('🇫🇷') # => "Supplementary Multilingual Plane" (first unit kept)
|
|
234
|
+
def self.reverse(char)
|
|
235
|
+
return '' unless char.is_a?(String)
|
|
236
|
+
|
|
237
|
+
cp = Utils::String.convert_to_integer(char[0])
|
|
238
|
+
PLANES.each do |plane|
|
|
239
|
+
return plane[:name] if plane[:range].include?(cp)
|
|
240
|
+
end
|
|
241
|
+
'' # not found
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Display a CLI-friendly output showing the plane name for a given character.
|
|
245
|
+
# @param char [String] Single character (only one code unit, so be careful with
|
|
246
|
+
# emojis, composed or joint characters using several units, only the first
|
|
247
|
+
# code unit will be kept).
|
|
248
|
+
def self.reverse_display(char)
|
|
249
|
+
plane_name = reverse(char)
|
|
250
|
+
if plane_name.empty?
|
|
251
|
+
puts "no plane found for #{char.inspect}"
|
|
252
|
+
else
|
|
253
|
+
puts plane_name
|
|
254
|
+
end
|
|
255
|
+
nil
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# Returns the name of the Unicode plane containing the given block.
|
|
259
|
+
# @param block_arg [String] Block name (case insensitive).
|
|
260
|
+
# @return [String] Plane name or empty string if not found.
|
|
261
|
+
# @example
|
|
262
|
+
# Unisec::Planes.block('Basic Latin') # => "Basic Multilingual Plane"
|
|
263
|
+
# Unisec::Planes.block('Miscellaneous Symbols and Pictographs') # => "Supplementary Multilingual Plane"
|
|
264
|
+
def self.block(block_arg) # rubocop:disable Metrics/CyclomaticComplexity
|
|
265
|
+
# support only search by block name
|
|
266
|
+
return '' if block_arg.is_a?(Integer)
|
|
267
|
+
return '' if block_arg.is_a?(String) && (block_arg.size == 1 || block_arg.start_with?('U+'))
|
|
268
|
+
|
|
269
|
+
blk = Blocks.block(block_arg, with_count: false)
|
|
270
|
+
return '' unless blk # block name not found
|
|
271
|
+
|
|
272
|
+
PLANES.each do |plane|
|
|
273
|
+
return plane[:name] if plane[:range].cover?(blk[:range])
|
|
274
|
+
end
|
|
275
|
+
'' # not found
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# Display a CLI-friendly output showing the plane name for a given block.
|
|
279
|
+
# @param block_arg [String] Block name (case insensitive).
|
|
280
|
+
def self.block_display(block_arg)
|
|
281
|
+
plane_name = block(block_arg)
|
|
282
|
+
if plane_name.empty?
|
|
283
|
+
puts "no plane found for block #{block_arg.inspect}"
|
|
284
|
+
else
|
|
285
|
+
puts plane_name
|
|
286
|
+
end
|
|
287
|
+
nil
|
|
288
|
+
end
|
|
223
289
|
end
|
|
224
290
|
end
|
data/lib/unisec/properties.rb
CHANGED
|
@@ -75,9 +75,10 @@ module Unisec
|
|
|
75
75
|
end
|
|
76
76
|
{
|
|
77
77
|
age: props.age.join,
|
|
78
|
+
plane: Unisec::Planes.reverse(chr),
|
|
78
79
|
block: props.block.join,
|
|
79
80
|
category: categories[1],
|
|
80
|
-
subcategory: categories[0],
|
|
81
|
+
subcategory: "#{categories[0]} (#{cp.category})",
|
|
81
82
|
codepoint: Utils::String.char2codepoint(chr),
|
|
82
83
|
name: cp.name,
|
|
83
84
|
script: props.script.join,
|
|
@@ -119,8 +120,9 @@ module Unisec
|
|
|
119
120
|
data = Properties.char(chr)
|
|
120
121
|
display = ->(key, value) { puts Paint[key, :red, :bold].ljust(30) + " #{value}" }
|
|
121
122
|
display.call('Name:', data[:name])
|
|
122
|
-
display.call('Code Point:', data[:codepoint])
|
|
123
|
+
display.call('Code Point:', data[:codepoint] + " (#{Utils::String.convert(chr, :integer)})")
|
|
123
124
|
puts
|
|
125
|
+
display.call('Plane', data[:plane])
|
|
124
126
|
display.call('Block:', data[:block])
|
|
125
127
|
display.call('Category:', data[:category])
|
|
126
128
|
display.call('Sub-Category:', data[:subcategory])
|
data/lib/unisec/utils.rb
CHANGED
|
@@ -55,27 +55,31 @@ module Unisec
|
|
|
55
55
|
# About string conversion and manipulation.
|
|
56
56
|
module String
|
|
57
57
|
# Convert a string input into the chosen type.
|
|
58
|
-
# @param input [String] If the
|
|
59
|
-
#
|
|
60
|
-
#
|
|
58
|
+
# @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
|
|
59
|
+
# The input must represent a character encoded in hexadecimal, decimal, binary or standard code point format.
|
|
60
|
+
# See {convert_to_integer} and {convert_to_char} for detailed examples.
|
|
61
|
+
# @param target_type [Symbol] Convert to the chosen type. Currently only supports `:integer` and `:char`.
|
|
61
62
|
# @return [Variable] The type of the output depends on the chosen `target_type`.
|
|
62
63
|
# @example
|
|
63
64
|
# Unisec::Utils::String.convert('0x1f4a9', :integer) # => 128169
|
|
65
|
+
# Unisec::Utils::String.convert('0x1f4a9', :char) # => "💩"
|
|
64
66
|
def self.convert(input, target_type)
|
|
65
67
|
case target_type
|
|
66
68
|
when :integer
|
|
67
69
|
convert_to_integer(input)
|
|
70
|
+
when :char
|
|
71
|
+
convert_to_char(input)
|
|
68
72
|
else
|
|
69
73
|
raise TypeError, "Target type \"#{target_type}\" not avaible"
|
|
70
74
|
end
|
|
71
75
|
end
|
|
72
76
|
|
|
73
|
-
# Internal method used for {
|
|
77
|
+
# Internal method used for {convert}.
|
|
74
78
|
#
|
|
75
79
|
# Convert a string input into integer.
|
|
76
|
-
# @param input [String]
|
|
77
|
-
#
|
|
78
|
-
# automatically based on the prefix.
|
|
80
|
+
# @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
|
|
81
|
+
# The input must represent a character encoded in hexadecimal, decimal, binary, standard code point format.
|
|
82
|
+
# The input type is determined automatically based on the prefix.
|
|
79
83
|
# @return [Integer]
|
|
80
84
|
# @example
|
|
81
85
|
# # Hexadecimal
|
|
@@ -86,10 +90,14 @@ module Unisec
|
|
|
86
90
|
# Unisec::Utils::String.convert_to_integer('0b11111010010101001') # => 128169
|
|
87
91
|
# # Unicode string
|
|
88
92
|
# Unisec::Utils::String.convert_to_integer('💩') # => 128169
|
|
93
|
+
# # Standardized format of hexadecimal code point
|
|
94
|
+
# Unisec::Utils::String.convert_to_integer('U+1F4A9') # => 128169
|
|
89
95
|
def self.convert_to_integer(input)
|
|
90
96
|
case autodetect(input)
|
|
91
97
|
when :hexadecimal
|
|
92
98
|
input.hex2dec(prefix: '0x').to_i
|
|
99
|
+
when :stdcp
|
|
100
|
+
input.hex2dec(prefix: 'U+').to_i
|
|
93
101
|
when :decimal
|
|
94
102
|
input.to_i
|
|
95
103
|
when :binary
|
|
@@ -101,11 +109,38 @@ module Unisec
|
|
|
101
109
|
end
|
|
102
110
|
end
|
|
103
111
|
|
|
112
|
+
# Internal method used for {convert}.
|
|
113
|
+
#
|
|
114
|
+
# Convert a string input into a character.
|
|
115
|
+
# @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
|
|
116
|
+
# The input must represent a character encoded in hexadecimal, decimal, binary, standard code point format.
|
|
117
|
+
# The input type is determined automatically based on the prefix.
|
|
118
|
+
# @return [String]
|
|
119
|
+
# @example
|
|
120
|
+
# # Hexadecimal
|
|
121
|
+
# Unisec::Utils::String.convert_to_char('0x1f4a9') # => "💩"
|
|
122
|
+
# # Decimal
|
|
123
|
+
# Unisec::Utils::String.convert_to_char('0d128169') # => "💩"
|
|
124
|
+
# # Binary
|
|
125
|
+
# Unisec::Utils::String.convert_to_char('0b11111010010101001') # => "💩"
|
|
126
|
+
# # Unicode string
|
|
127
|
+
# Unisec::Utils::String.convert_to_char('💩') # => "💩"
|
|
128
|
+
# # Standardized format of hexadecimal code point
|
|
129
|
+
# Unisec::Utils::String.convert_to_char('U+1F4A9') # => "💩"
|
|
130
|
+
def self.convert_to_char(input)
|
|
131
|
+
case autodetect(input)
|
|
132
|
+
when :hexadecimal, :stdcp, :decimal, :binary, :string
|
|
133
|
+
[convert(input, :integer)].pack('U')
|
|
134
|
+
else
|
|
135
|
+
raise TypeError, "Input \"#{input}\" is not of the expected type"
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
104
139
|
# Internal method used for {.convert}.
|
|
105
140
|
#
|
|
106
141
|
# Autodetect the representation type of the string input.
|
|
107
142
|
# @param str [String] Input.
|
|
108
|
-
# @return [Symbol] the detected type: `:hexadecimal`, `:decimal`, `:binary`, `:string
|
|
143
|
+
# @return [Symbol] the detected type: `:hexadecimal`, `:decimal`, `:binary`, `:string`, :stdcp.
|
|
109
144
|
# @example
|
|
110
145
|
# # Hexadecimal
|
|
111
146
|
# Unisec::Utils::String.autodetect('0x1f4a9') # => :hexadecimal
|
|
@@ -115,10 +150,14 @@ module Unisec
|
|
|
115
150
|
# Unisec::Utils::String.autodetect('0b11111010010101001') # => :binary
|
|
116
151
|
# # Unicode string
|
|
117
152
|
# Unisec::Utils::String.autodetect('💩') # => :string
|
|
153
|
+
# # Standardized format of hexadecimal code point
|
|
154
|
+
# Unisec::Utils::String.autodetect('U+1F4A9') # => :stdcp
|
|
118
155
|
def self.autodetect(str)
|
|
119
156
|
case str
|
|
120
|
-
when /0x[0-9a-fA-F]
|
|
157
|
+
when /0x[0-9a-fA-F]+/
|
|
121
158
|
:hexadecimal
|
|
159
|
+
when /U\+[0-9A-F]+/
|
|
160
|
+
:stdcp
|
|
122
161
|
when /0d[0-9]+/
|
|
123
162
|
:decimal
|
|
124
163
|
when /0b[0-1]+/
|
|
@@ -141,8 +180,9 @@ module Unisec
|
|
|
141
180
|
# Display the code point in Unicode format for a given character (code point as string)
|
|
142
181
|
# @param chr [String] Unicode code point (as character / string)
|
|
143
182
|
# @return [String] code point in Unicode format
|
|
183
|
+
# @todo Replace this method by target type :stdcp in String.convert()
|
|
144
184
|
# @example
|
|
145
|
-
# Unisec::
|
|
185
|
+
# Unisec::Utils::String.char2codepoint('💎') # => "U+1F48E"
|
|
146
186
|
def self.char2codepoint(chr)
|
|
147
187
|
Integer.deccp2stdhexcp(chr.codepoints.first)
|
|
148
188
|
end
|
|
@@ -151,8 +191,8 @@ module Unisec
|
|
|
151
191
|
# @param chrs [String] Unicode code points (as characters / string)
|
|
152
192
|
# @return [String] code points in Unicode format
|
|
153
193
|
# @example
|
|
154
|
-
# Unisec::
|
|
155
|
-
# Unisec::
|
|
194
|
+
# Unisec::Utils::String.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
|
|
195
|
+
# Unisec::Utils::String.chars2codepoints("🧑🌾") # => "U+1F9D1 U+200D U+1F33E"
|
|
156
196
|
def self.chars2codepoints(chrs)
|
|
157
197
|
out = []
|
|
158
198
|
chrs.each_char do |chr|
|
|
@@ -161,6 +201,15 @@ module Unisec
|
|
|
161
201
|
out.join(' ')
|
|
162
202
|
end
|
|
163
203
|
|
|
204
|
+
# Display the code points in integer format for the given characters (code points as string)
|
|
205
|
+
# @param chrs [String] Unicode code points (as characters / string)
|
|
206
|
+
# @return [String] code points in integer format
|
|
207
|
+
# @example
|
|
208
|
+
# Unisec::Utils::String.chars2intcodepoints('I 💕 Ruby 💎') # => "73 32 128149 32 82 117 98 121 32 128142"
|
|
209
|
+
def self.chars2intcodepoints(chrs)
|
|
210
|
+
chrs.codepoints.join(' ')
|
|
211
|
+
end
|
|
212
|
+
|
|
164
213
|
# Convert a string of hex encoded Unicode code points range to actual
|
|
165
214
|
# integer Ruby range.
|
|
166
215
|
# @param range_str [String] Unicode code points range as in data/Blocks.txt
|
|
@@ -170,22 +219,13 @@ module Unisec
|
|
|
170
219
|
def self.to_range(range_str)
|
|
171
220
|
::Range.new(*range_str.split('..').map { |x| x.hex2dec.to_i })
|
|
172
221
|
end
|
|
173
|
-
|
|
174
|
-
# Convert from standardized format hexadecimal code point to decimal code point
|
|
175
|
-
# @param std_hex_cp [String] Code point in standardized hexadecimal format
|
|
176
|
-
# @return [Integer] Code point in decimal format
|
|
177
|
-
# @example
|
|
178
|
-
# Unisec::Utils::String.stdhexcp2deccp('U+2026') # => 8230
|
|
179
|
-
def self.stdhexcp2deccp(std_hex_cp)
|
|
180
|
-
hex = "0x#{std_hex_cp[2..]}" # replace U+ prefix with 0x
|
|
181
|
-
convert_to_integer(hex)
|
|
182
|
-
end
|
|
183
222
|
end
|
|
184
223
|
|
|
185
224
|
module Integer
|
|
186
225
|
# Convert from decimal code point to standardized format hexadecimal code point
|
|
187
226
|
# @param int_cp [Integer] Code point in decimal format
|
|
188
227
|
# @return [String] code point in Unicode format
|
|
228
|
+
# @todo Replace this method by the Integer.convert()
|
|
189
229
|
# @example
|
|
190
230
|
# Unisec::Utils::Integer.deccp2stdhexcp(128640) # => "U+1F680"
|
|
191
231
|
def self.deccp2stdhexcp(int_cp)
|
|
@@ -196,12 +236,54 @@ module Unisec
|
|
|
196
236
|
module Range
|
|
197
237
|
# Convert a (integer) range to a range of Unicode code points
|
|
198
238
|
# @param range [::Range]
|
|
199
|
-
# @return [String]
|
|
239
|
+
# @return [::String]
|
|
200
240
|
# @example
|
|
201
241
|
# Unisec::Utils::Range.range2codepoint_range(1048576..1114111) # => "U+100000 - U+10FFFF"
|
|
202
242
|
def self.range2codepoint_range(range)
|
|
203
243
|
"#{Integer.deccp2stdhexcp(range.begin)} - #{Integer.deccp2stdhexcp(range.end)}"
|
|
204
244
|
end
|
|
205
245
|
end
|
|
246
|
+
|
|
247
|
+
module Arguments
|
|
248
|
+
# Converts an argument that is a string, a string of arguments separated by comma, a symbol to an array of symbol.
|
|
249
|
+
# Useful for methods that are expected to work on array of symbols but can receive various format of imputs (e.g. from CLI).
|
|
250
|
+
# @param input [::String|Symbol] (anything else will be returned untransformed)
|
|
251
|
+
# @return [Array<Symbol>] (or anything else if input type is not respected)
|
|
252
|
+
# @example
|
|
253
|
+
# Unisec::Utils::Arguments.to_array_of_sym("arg") # => [:arg]
|
|
254
|
+
# Unisec::Utils::Arguments.to_array_of_sym("a,b,c") # => [:a, :b, :c]
|
|
255
|
+
# Unisec::Utils::Arguments.to_array_of_sym(:snake) # => [:snake]
|
|
256
|
+
# Unisec::Utils::Arguments.to_array_of_sym([:a, :b, :c]) # => [:a, :b, :c]
|
|
257
|
+
def self.to_array_of_sym(input)
|
|
258
|
+
case input
|
|
259
|
+
when ::String # a,b,c => [:a, :b, :c]
|
|
260
|
+
input.split(',').map(&:to_sym)
|
|
261
|
+
when ::Symbol # :a => [:a]
|
|
262
|
+
[input]
|
|
263
|
+
else
|
|
264
|
+
input
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
# Converts encoding name from CLI to encoding name in standard format or Ruby Class
|
|
269
|
+
# @param argenc [::String] Encoding name as used as argument in Unisec CLI (authorized values are: utf8 utf16be utf16le utf32be utf32le).
|
|
270
|
+
# @param target [::String] 'standard' for standard encoding name, 'class' for Ruby class naming
|
|
271
|
+
# @return [::String|Class]
|
|
272
|
+
# @example
|
|
273
|
+
# Unisec::Utils::Arguments.argenc2enc('utf8', target: 'standard') # => "UTF-8"
|
|
274
|
+
# Unisec::Utils::Arguments.argenc2enc('utf16be', target: 'class') # => #<Encoding:UTF-16BE (autoload)>
|
|
275
|
+
def self.argenc2enc(argenc, target: 'standard')
|
|
276
|
+
argument_encodings = %w[utf8 utf16be utf16le utf32be utf32le]
|
|
277
|
+
raise ArgumentError unless argument_encodings.include?(argenc)
|
|
278
|
+
|
|
279
|
+
if target == 'standard'
|
|
280
|
+
argenc.upcase.insert(3, '-')
|
|
281
|
+
elsif target == 'class'
|
|
282
|
+
Encoding.const_get(argenc.upcase.insert(3, '_')) # const_get safe thanks to input whitelist
|
|
283
|
+
else
|
|
284
|
+
raise ArgumentError
|
|
285
|
+
end
|
|
286
|
+
end
|
|
287
|
+
end
|
|
206
288
|
end
|
|
207
289
|
end
|
data/lib/unisec/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: unisec
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.9
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Alexandre ZANNI
|
|
@@ -29,14 +29,28 @@ dependencies:
|
|
|
29
29
|
requirements:
|
|
30
30
|
- - "~>"
|
|
31
31
|
- !ruby/object:Gem::Version
|
|
32
|
-
version: '1.
|
|
32
|
+
version: '1.4'
|
|
33
33
|
type: :runtime
|
|
34
34
|
prerelease: false
|
|
35
35
|
version_requirements: !ruby/object:Gem::Requirement
|
|
36
36
|
requirements:
|
|
37
37
|
- - "~>"
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
|
-
version: '1.
|
|
39
|
+
version: '1.4'
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: dry-cli-completion
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - "~>"
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: 2.0.0
|
|
47
|
+
type: :runtime
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - "~>"
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: 2.0.0
|
|
40
54
|
- !ruby/object:Gem::Dependency
|
|
41
55
|
name: paint
|
|
42
56
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -71,14 +85,14 @@ dependencies:
|
|
|
71
85
|
requirements:
|
|
72
86
|
- - "~>"
|
|
73
87
|
- !ruby/object:Gem::Version
|
|
74
|
-
version: '1.
|
|
88
|
+
version: '1.13'
|
|
75
89
|
type: :runtime
|
|
76
90
|
prerelease: false
|
|
77
91
|
version_requirements: !ruby/object:Gem::Requirement
|
|
78
92
|
requirements:
|
|
79
93
|
- - "~>"
|
|
80
94
|
- !ruby/object:Gem::Version
|
|
81
|
-
version: '1.
|
|
95
|
+
version: '1.13'
|
|
82
96
|
description: 'Toolkit for security research manipulating Unicode: confusables, homoglyphs,
|
|
83
97
|
hexdump, code point, UTF-8, UTF-16, UTF-32, properties, regexp search, size, grapheme,
|
|
84
98
|
surrogates, version, ICU, CLDR, UCD, BiDi, normalization'
|
|
@@ -137,7 +151,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
137
151
|
requirements:
|
|
138
152
|
- - ">="
|
|
139
153
|
- !ruby/object:Gem::Version
|
|
140
|
-
version: 3.
|
|
154
|
+
version: 3.3.0
|
|
141
155
|
- - "<"
|
|
142
156
|
- !ruby/object:Gem::Version
|
|
143
157
|
version: '5.0'
|
|
@@ -147,7 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
147
161
|
- !ruby/object:Gem::Version
|
|
148
162
|
version: '0'
|
|
149
163
|
requirements: []
|
|
150
|
-
rubygems_version: 4.0.
|
|
164
|
+
rubygems_version: 4.0.10
|
|
151
165
|
specification_version: 4
|
|
152
166
|
summary: Unicode Security Toolkit
|
|
153
167
|
test_files: []
|