unisec 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +2 -1
- data/data/Blocks.txt +383 -0
- data/data/DerivedName.txt +1659 -12
- data/lib/unisec/bidi.rb +8 -8
- data/lib/unisec/blocks.rb +209 -0
- data/lib/unisec/cli/blocks.rb +93 -0
- data/lib/unisec/cli/cli.rb +12 -3
- data/lib/unisec/cli/dump.rb +87 -0
- data/lib/unisec/cli/normalization.rb +71 -39
- data/lib/unisec/cli/planes.rb +99 -0
- data/lib/unisec/cli/rugrep.rb +1 -1
- data/lib/unisec/confusables.rb +3 -1
- data/lib/unisec/decdump.rb +118 -0
- data/lib/unisec/hexdump.rb +1 -1
- data/lib/unisec/normalization.rb +46 -1
- data/lib/unisec/planes.rb +224 -0
- data/lib/unisec/properties.rb +11 -42
- data/lib/unisec/rugrep.rb +3 -2
- data/lib/unisec/utils.rb +94 -0
- data/lib/unisec/version.rb +1 -1
- data/lib/unisec/versions.rb +5 -0
- data/lib/unisec.rb +3 -0
- metadata +24 -27
- data/lib/unisec/cli/hexdump.rb +0 -47
data/lib/unisec/utils.rb
CHANGED
|
@@ -20,6 +20,35 @@ class Integer
|
|
|
20
20
|
end
|
|
21
21
|
end
|
|
22
22
|
|
|
23
|
+
class String
|
|
24
|
+
# Convert a string to a boolean
|
|
25
|
+
# @return [TrueClass|FalseClass]
|
|
26
|
+
# @example
|
|
27
|
+
# "true".to_bool # => true
|
|
28
|
+
def to_bool
|
|
29
|
+
case to_s.chomp.downcase
|
|
30
|
+
when 'true', 'yes', 'y', '1'
|
|
31
|
+
true
|
|
32
|
+
when 'false', 'no', 'n', '0'
|
|
33
|
+
false
|
|
34
|
+
else
|
|
35
|
+
raise ArgumentError, "invalid value for Boolean: #{str.inspect}"
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
class Range
|
|
41
|
+
# Is a range included in another range? Are all values of range B included in range A?
|
|
42
|
+
# @param range [Range]
|
|
43
|
+
# @return [TrueClass|FalseClass]
|
|
44
|
+
# @example
|
|
45
|
+
# (1..10).include_range?(2..11) # => false
|
|
46
|
+
# (1..10).include_range?(2..4) # => true
|
|
47
|
+
def include_range?(range)
|
|
48
|
+
self.begin <= range.begin && self.end >= range.end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
23
52
|
module Unisec
|
|
24
53
|
# Generic stuff not Unicode-related that can be re-used.
|
|
25
54
|
module Utils
|
|
@@ -108,6 +137,71 @@ module Unisec
|
|
|
108
137
|
def self.grapheme_reverse(str)
|
|
109
138
|
str.grapheme_clusters.reverse.join
|
|
110
139
|
end
|
|
140
|
+
|
|
141
|
+
# Display the code point in Unicode format for a given character (code point as string)
|
|
142
|
+
# @param chr [String] Unicode code point (as character / string)
|
|
143
|
+
# @return [String] code point in Unicode format
|
|
144
|
+
# @example
|
|
145
|
+
# Unisec::Properties.char2codepoint('💎') # => "U+1F48E"
|
|
146
|
+
def self.char2codepoint(chr)
|
|
147
|
+
Integer.deccp2stdhexcp(chr.codepoints.first)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Display the code points in Unicode format for the given characters (code points as string)
|
|
151
|
+
# @param chrs [String] Unicode code points (as characters / string)
|
|
152
|
+
# @return [String] code points in Unicode format
|
|
153
|
+
# @example
|
|
154
|
+
# Unisec::Properties.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
|
|
155
|
+
# Unisec::Properties.chars2codepoints("🧑🌾") # => "U+1F9D1 U+200D U+1F33E"
|
|
156
|
+
def self.chars2codepoints(chrs)
|
|
157
|
+
out = []
|
|
158
|
+
chrs.each_char do |chr|
|
|
159
|
+
out << char2codepoint(chr)
|
|
160
|
+
end
|
|
161
|
+
out.join(' ')
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Convert a string of hex encoded Unicode code points range to actual
|
|
165
|
+
# integer Ruby range.
|
|
166
|
+
# @param range_str [String] Unicode code points range as in data/Blocks.txt
|
|
167
|
+
# @return [Range]
|
|
168
|
+
# @example
|
|
169
|
+
# Unisec::Utils::String::to_range('0080..00FF') # => 128..255
|
|
170
|
+
def self.to_range(range_str)
|
|
171
|
+
::Range.new(*range_str.split('..').map { |x| x.hex2dec.to_i })
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Convert from standardized format hexadecimal code point to decimal code point
|
|
175
|
+
# @param std_hex_cp [String] Code point in standardized hexadecimal format
|
|
176
|
+
# @return [Integer] Code point in decimal format
|
|
177
|
+
# @example
|
|
178
|
+
# Unisec::Utils::String.stdhexcp2deccp('U+2026') # => 8230
|
|
179
|
+
def self.stdhexcp2deccp(std_hex_cp)
|
|
180
|
+
hex = "0x#{std_hex_cp[2..]}" # replace U+ prefix with 0x
|
|
181
|
+
convert_to_integer(hex)
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
module Integer
|
|
186
|
+
# Convert from decimal code point to standardized format hexadecimal code point
|
|
187
|
+
# @param int_cp [Integer] Code point in decimal format
|
|
188
|
+
# @return [String] code point in Unicode format
|
|
189
|
+
# @example
|
|
190
|
+
# Unisec::Utils::Integer.deccp2stdhexcp(128640) # => "U+1F680"
|
|
191
|
+
def self.deccp2stdhexcp(int_cp)
|
|
192
|
+
"U+#{format('%.4x', int_cp).upcase}"
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
module Range
|
|
197
|
+
# Convert a (integer) range to a range of Unicode code points
|
|
198
|
+
# @param range [::Range]
|
|
199
|
+
# @return [String]
|
|
200
|
+
# @example
|
|
201
|
+
# Unisec::Utils::Range.range2codepoint_range(1048576..1114111) # => "U+100000 - U+10FFFF"
|
|
202
|
+
def self.range2codepoint_range(range)
|
|
203
|
+
"#{Integer.deccp2stdhexcp(range.begin)} - #{Integer.deccp2stdhexcp(range.end)}"
|
|
204
|
+
end
|
|
111
205
|
end
|
|
112
206
|
end
|
|
113
207
|
end
|
data/lib/unisec/version.rb
CHANGED
data/lib/unisec/versions.rb
CHANGED
|
@@ -55,6 +55,10 @@ module Unisec
|
|
|
55
55
|
ucd_derivedname: {
|
|
56
56
|
version: Unisec::Rugrep.ucd_derivedname_version,
|
|
57
57
|
label: 'UCD (data/DerivedName.txt)'
|
|
58
|
+
},
|
|
59
|
+
ucd_blocks: {
|
|
60
|
+
version: Unisec::Blocks.ucd_blocks_version,
|
|
61
|
+
label: 'UCD (data/Blocks.txt)'
|
|
58
62
|
}
|
|
59
63
|
}
|
|
60
64
|
end
|
|
@@ -81,6 +85,7 @@ module Unisec
|
|
|
81
85
|
colorize.call(:twittercldr_cldr) +
|
|
82
86
|
colorize.call(:ruby_unicode_emoji) +
|
|
83
87
|
colorize.call(:ucd_derivedname) +
|
|
88
|
+
colorize.call(:ucd_blocks) +
|
|
84
89
|
Paint["\nGems:\n", :underline] +
|
|
85
90
|
colorize.call(:unisec) +
|
|
86
91
|
colorize.call(:twittercldr) +
|
data/lib/unisec.rb
CHANGED
|
@@ -3,9 +3,12 @@
|
|
|
3
3
|
require 'unisec/version'
|
|
4
4
|
|
|
5
5
|
require 'unisec/bidi'
|
|
6
|
+
require 'unisec/blocks'
|
|
6
7
|
require 'unisec/confusables'
|
|
8
|
+
require 'unisec/decdump'
|
|
7
9
|
require 'unisec/hexdump'
|
|
8
10
|
require 'unisec/normalization'
|
|
11
|
+
require 'unisec/planes'
|
|
9
12
|
require 'unisec/properties'
|
|
10
13
|
require 'unisec/rugrep'
|
|
11
14
|
require 'unisec/size'
|
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: unisec
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Alexandre ZANNI
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: ctf-party
|
|
@@ -16,14 +15,14 @@ dependencies:
|
|
|
16
15
|
requirements:
|
|
17
16
|
- - "~>"
|
|
18
17
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: '
|
|
18
|
+
version: '5.0'
|
|
20
19
|
type: :runtime
|
|
21
20
|
prerelease: false
|
|
22
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
22
|
requirements:
|
|
24
23
|
- - "~>"
|
|
25
24
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: '
|
|
25
|
+
version: '5.0'
|
|
27
26
|
- !ruby/object:Gem::Dependency
|
|
28
27
|
name: dry-cli
|
|
29
28
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -58,34 +57,28 @@ dependencies:
|
|
|
58
57
|
requirements:
|
|
59
58
|
- - "~>"
|
|
60
59
|
- !ruby/object:Gem::Version
|
|
61
|
-
version: '6.
|
|
62
|
-
- - ">="
|
|
63
|
-
- !ruby/object:Gem::Version
|
|
64
|
-
version: 6.11.5
|
|
60
|
+
version: '6.13'
|
|
65
61
|
type: :runtime
|
|
66
62
|
prerelease: false
|
|
67
63
|
version_requirements: !ruby/object:Gem::Requirement
|
|
68
64
|
requirements:
|
|
69
65
|
- - "~>"
|
|
70
66
|
- !ruby/object:Gem::Version
|
|
71
|
-
version: '6.
|
|
72
|
-
- - ">="
|
|
73
|
-
- !ruby/object:Gem::Version
|
|
74
|
-
version: 6.11.5
|
|
67
|
+
version: '6.13'
|
|
75
68
|
- !ruby/object:Gem::Dependency
|
|
76
69
|
name: unicode-confusable
|
|
77
70
|
requirement: !ruby/object:Gem::Requirement
|
|
78
71
|
requirements:
|
|
79
72
|
- - "~>"
|
|
80
73
|
- !ruby/object:Gem::Version
|
|
81
|
-
version: '1.
|
|
74
|
+
version: '1.12'
|
|
82
75
|
type: :runtime
|
|
83
76
|
prerelease: false
|
|
84
77
|
version_requirements: !ruby/object:Gem::Requirement
|
|
85
78
|
requirements:
|
|
86
79
|
- - "~>"
|
|
87
80
|
- !ruby/object:Gem::Version
|
|
88
|
-
version: '1.
|
|
81
|
+
version: '1.12'
|
|
89
82
|
description: 'Toolkit for security research manipulating Unicode: confusables, homoglyphs,
|
|
90
83
|
hexdump, code point, UTF-8, UTF-16, UTF-32, properties, regexp search, size, grapheme,
|
|
91
84
|
surrogates, version, ICU, CLDR, UCD, BiDi, normalization'
|
|
@@ -97,22 +90,28 @@ extra_rdoc_files: []
|
|
|
97
90
|
files:
|
|
98
91
|
- LICENSE
|
|
99
92
|
- bin/unisec
|
|
93
|
+
- data/Blocks.txt
|
|
100
94
|
- data/DerivedName.txt
|
|
101
95
|
- lib/unisec.rb
|
|
102
96
|
- lib/unisec/bidi.rb
|
|
97
|
+
- lib/unisec/blocks.rb
|
|
103
98
|
- lib/unisec/cli/bidi.rb
|
|
99
|
+
- lib/unisec/cli/blocks.rb
|
|
104
100
|
- lib/unisec/cli/cli.rb
|
|
105
101
|
- lib/unisec/cli/confusables.rb
|
|
106
|
-
- lib/unisec/cli/
|
|
102
|
+
- lib/unisec/cli/dump.rb
|
|
107
103
|
- lib/unisec/cli/normalization.rb
|
|
104
|
+
- lib/unisec/cli/planes.rb
|
|
108
105
|
- lib/unisec/cli/properties.rb
|
|
109
106
|
- lib/unisec/cli/rugrep.rb
|
|
110
107
|
- lib/unisec/cli/size.rb
|
|
111
108
|
- lib/unisec/cli/surrogates.rb
|
|
112
109
|
- lib/unisec/cli/versions.rb
|
|
113
110
|
- lib/unisec/confusables.rb
|
|
111
|
+
- lib/unisec/decdump.rb
|
|
114
112
|
- lib/unisec/hexdump.rb
|
|
115
113
|
- lib/unisec/normalization.rb
|
|
114
|
+
- lib/unisec/planes.rb
|
|
116
115
|
- lib/unisec/properties.rb
|
|
117
116
|
- lib/unisec/rugrep.rb
|
|
118
117
|
- lib/unisec/size.rb
|
|
@@ -120,18 +119,17 @@ files:
|
|
|
120
119
|
- lib/unisec/utils.rb
|
|
121
120
|
- lib/unisec/version.rb
|
|
122
121
|
- lib/unisec/versions.rb
|
|
123
|
-
homepage: https://github.com/
|
|
122
|
+
homepage: https://github.com/noraj/unisec
|
|
124
123
|
licenses:
|
|
125
124
|
- MIT
|
|
126
125
|
metadata:
|
|
127
126
|
yard.run: yard
|
|
128
|
-
bug_tracker_uri: https://github.com/
|
|
129
|
-
changelog_uri: https://github.com/
|
|
130
|
-
documentation_uri: https://
|
|
131
|
-
homepage_uri: https://github.com/
|
|
132
|
-
source_code_uri: https://github.com/
|
|
127
|
+
bug_tracker_uri: https://github.com/noraj/unisec/issues
|
|
128
|
+
changelog_uri: https://github.com/noraj/unisec/releases
|
|
129
|
+
documentation_uri: https://noraj.github.io/unisec/
|
|
130
|
+
homepage_uri: https://github.com/noraj/unisec
|
|
131
|
+
source_code_uri: https://github.com/noraj/unisec/
|
|
133
132
|
rubygems_mfa_required: 'true'
|
|
134
|
-
post_install_message:
|
|
135
133
|
rdoc_options: []
|
|
136
134
|
require_paths:
|
|
137
135
|
- lib
|
|
@@ -139,18 +137,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
139
137
|
requirements:
|
|
140
138
|
- - ">="
|
|
141
139
|
- !ruby/object:Gem::Version
|
|
142
|
-
version: 3.
|
|
140
|
+
version: 3.2.0
|
|
143
141
|
- - "<"
|
|
144
142
|
- !ruby/object:Gem::Version
|
|
145
|
-
version: '
|
|
143
|
+
version: '5.0'
|
|
146
144
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
147
145
|
requirements:
|
|
148
146
|
- - ">="
|
|
149
147
|
- !ruby/object:Gem::Version
|
|
150
148
|
version: '0'
|
|
151
149
|
requirements: []
|
|
152
|
-
rubygems_version:
|
|
153
|
-
signing_key:
|
|
150
|
+
rubygems_version: 4.0.3
|
|
154
151
|
specification_version: 4
|
|
155
152
|
summary: Unicode Security Toolkit
|
|
156
153
|
test_files: []
|
data/lib/unisec/cli/hexdump.rb
DELETED
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'dry/cli'
|
|
4
|
-
require 'unisec'
|
|
5
|
-
|
|
6
|
-
module Unisec
|
|
7
|
-
module CLI
|
|
8
|
-
module Commands
|
|
9
|
-
# CLI command `unisec hexdumps` for the class {Unisec::Hexdump} from the lib.
|
|
10
|
-
#
|
|
11
|
-
# Example:
|
|
12
|
-
#
|
|
13
|
-
# ```plaintext
|
|
14
|
-
# $ unisec hexdump "ACCEIS"
|
|
15
|
-
# UTF-8: 41 43 43 45 49 53
|
|
16
|
-
# UTF-16BE: 0041 0043 0043 0045 0049 0053
|
|
17
|
-
# UTF-16LE: 4100 4300 4300 4500 4900 5300
|
|
18
|
-
# UTF-32BE: 00000041 00000043 00000043 00000045 00000049 00000053
|
|
19
|
-
# UTF-32LE: 41000000 43000000 43000000 45000000 49000000 53000000
|
|
20
|
-
#
|
|
21
|
-
# $unisec hexdump "ACCEIS" --enc utf16le
|
|
22
|
-
# 4100 4300 4300 4500 4900 5300
|
|
23
|
-
# ```
|
|
24
|
-
class Hexdump < Dry::CLI::Command
|
|
25
|
-
desc 'Hexdump in all Unicode encodings'
|
|
26
|
-
|
|
27
|
-
argument :input, required: true,
|
|
28
|
-
desc: 'String input. Read from STDIN if equal to -.'
|
|
29
|
-
|
|
30
|
-
option :enc, default: nil, values: %w[utf8 utf16be utf16le utf32be utf32le],
|
|
31
|
-
desc: 'Output only in the specified encoding.'
|
|
32
|
-
|
|
33
|
-
# Hexdump of all Unicode encodings.
|
|
34
|
-
# @param input [String] Input string to encode
|
|
35
|
-
def call(input: nil, **options)
|
|
36
|
-
input = $stdin.read.chomp if input == '-'
|
|
37
|
-
if options[:enc].nil?
|
|
38
|
-
puts Unisec::Hexdump.new(input).display
|
|
39
|
-
else
|
|
40
|
-
# using send() is safe here thanks to the value whitelist
|
|
41
|
-
puts Unisec::Hexdump.send(options[:enc], input)
|
|
42
|
-
end
|
|
43
|
-
end
|
|
44
|
-
end
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
end
|