unisec 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/unisec/blocks.rb +1 -1
- data/lib/unisec/cli/cli.rb +2 -0
- data/lib/unisec/cli/dump.rb +46 -0
- data/lib/unisec/properties.rb +1 -1
- data/lib/unisec/utils.rb +62 -22
- data/lib/unisec/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: fe68d59956b20311ad5d9f5f64c10a6b0648d1c7c146b9fbacf3f25348385207
|
|
4
|
+
data.tar.gz: 8f62db4e8a2327e0ac36e1be53fb5c168fb856fd81e9cafec0af1297a451bed8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7eb59fcce432494896adc586f168835578da1ab54f6f64080d4ecc86d91bebd39d569f38e72a6c9c79ea23e303a34315c8aebaf9e2fa2b340d25f234731e82ab
|
|
7
|
+
data.tar.gz: 33711c517a93ea3e28b25cde223d94f1cc2cf2edc0db39a3af41d5c6268d5bac51bef4d534306a24df21ea283c325fe052e0723780dcdedd294930e6f1d8eeee
|
data/lib/unisec/blocks.rb
CHANGED
|
@@ -114,7 +114,7 @@ module Unisec
|
|
|
114
114
|
if block_arg.size == 1 # is a char (1 code unit, not one grapheme)
|
|
115
115
|
found = true if blk_range.include?(Utils::String.convert_to_integer(block_arg))
|
|
116
116
|
elsif block_arg.start_with?('U+') # string code point
|
|
117
|
-
found = true if blk_range.include?(Utils::String.
|
|
117
|
+
found = true if blk_range.include?(Utils::String.convert(block_arg, :integer))
|
|
118
118
|
elsif blk_name.downcase == block_arg.downcase # block name
|
|
119
119
|
found = true
|
|
120
120
|
end
|
data/lib/unisec/cli/cli.rb
CHANGED
|
@@ -27,6 +27,8 @@ module Unisec
|
|
|
27
27
|
register 'blocks search', Blocks::Search
|
|
28
28
|
register 'confusables list', Confusables::List
|
|
29
29
|
register 'confusables randomize', Confusables::Randomize
|
|
30
|
+
register 'dump codepoints integer', Dump::Codepoints::Integer
|
|
31
|
+
register 'dump codepoints standard', Dump::Codepoints::Standard
|
|
30
32
|
register 'dump dec', Dump::Dec
|
|
31
33
|
register 'dump hex', Dump::Hex
|
|
32
34
|
register 'grep', Grep
|
data/lib/unisec/cli/dump.rb
CHANGED
|
@@ -81,6 +81,52 @@ module Unisec
|
|
|
81
81
|
end
|
|
82
82
|
end
|
|
83
83
|
end
|
|
84
|
+
|
|
85
|
+
module Codepoints
|
|
86
|
+
# CLI command `unisec dump codepoints standard`.
|
|
87
|
+
#
|
|
88
|
+
# Example:
|
|
89
|
+
#
|
|
90
|
+
# ```plaintext
|
|
91
|
+
# $ unisec dump codepoints standard "unicode"
|
|
92
|
+
# U+0075 U+006E U+0069 U+0063 U+006F U+0064 U+0065
|
|
93
|
+
# ```
|
|
94
|
+
class Standard < Dry::CLI::Command
|
|
95
|
+
desc 'Code point dump (standard format)'
|
|
96
|
+
|
|
97
|
+
argument :input, required: true,
|
|
98
|
+
desc: 'String input. Read from STDIN if equal to -.'
|
|
99
|
+
|
|
100
|
+
# Code point dump (standard format).
|
|
101
|
+
# @param input [String] Input string to encode
|
|
102
|
+
def call(input: nil)
|
|
103
|
+
input = $stdin.read.chomp if input == '-'
|
|
104
|
+
puts Unisec::Utils::String.chars2codepoints(input)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# CLI command `unisec dump codepoints integer`.
|
|
109
|
+
#
|
|
110
|
+
# Example:
|
|
111
|
+
#
|
|
112
|
+
# ```plaintext
|
|
113
|
+
# $ unisec dump codepoints integer 'I 💕 Ruby 💎'
|
|
114
|
+
# 73 32 128149 32 82 117 98 121 32 128142
|
|
115
|
+
# ```
|
|
116
|
+
class Integer < Dry::CLI::Command
|
|
117
|
+
desc 'Code point dump (integer format)'
|
|
118
|
+
|
|
119
|
+
argument :input, required: true,
|
|
120
|
+
desc: 'String input. Read from STDIN if equal to -.'
|
|
121
|
+
|
|
122
|
+
# Code point dump (integer format).
|
|
123
|
+
# @param input [String] Input string to encode
|
|
124
|
+
def call(input: nil)
|
|
125
|
+
input = $stdin.read.chomp if input == '-'
|
|
126
|
+
puts Unisec::Utils::String.chars2intcodepoints(input)
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
84
130
|
end
|
|
85
131
|
end
|
|
86
132
|
end
|
data/lib/unisec/properties.rb
CHANGED
|
@@ -119,7 +119,7 @@ module Unisec
|
|
|
119
119
|
data = Properties.char(chr)
|
|
120
120
|
display = ->(key, value) { puts Paint[key, :red, :bold].ljust(30) + " #{value}" }
|
|
121
121
|
display.call('Name:', data[:name])
|
|
122
|
-
display.call('Code Point:', data[:codepoint])
|
|
122
|
+
display.call('Code Point:', data[:codepoint] + " (#{Utils::String.convert(chr, :integer)})")
|
|
123
123
|
puts
|
|
124
124
|
display.call('Block:', data[:block])
|
|
125
125
|
display.call('Category:', data[:category])
|
data/lib/unisec/utils.rb
CHANGED
|
@@ -55,27 +55,31 @@ module Unisec
|
|
|
55
55
|
# About string conversion and manipulation.
|
|
56
56
|
module String
|
|
57
57
|
# Convert a string input into the chosen type.
|
|
58
|
-
# @param input [String] If the
|
|
59
|
-
#
|
|
60
|
-
#
|
|
58
|
+
# @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
|
|
59
|
+
# The input must represent a character encoded in hexadecimal, decimal, binary or standard code point format.
|
|
60
|
+
# See {convert_to_integer} and {convert_to_char} for detailed examples.
|
|
61
|
+
# @param target_type [Symbol] Convert to the chosen type. Currently only supports `:integer` and `:char`.
|
|
61
62
|
# @return [Variable] The type of the output depends on the chosen `target_type`.
|
|
62
63
|
# @example
|
|
63
64
|
# Unisec::Utils::String.convert('0x1f4a9', :integer) # => 128169
|
|
65
|
+
# Unisec::Utils::String.convert('0x1f4a9', :char) # => "💩"
|
|
64
66
|
def self.convert(input, target_type)
|
|
65
67
|
case target_type
|
|
66
68
|
when :integer
|
|
67
69
|
convert_to_integer(input)
|
|
70
|
+
when :char
|
|
71
|
+
convert_to_char(input)
|
|
68
72
|
else
|
|
69
73
|
raise TypeError, "Target type \"#{target_type}\" not avaible"
|
|
70
74
|
end
|
|
71
75
|
end
|
|
72
76
|
|
|
73
|
-
# Internal method used for {
|
|
77
|
+
# Internal method used for {convert}.
|
|
74
78
|
#
|
|
75
79
|
# Convert a string input into integer.
|
|
76
|
-
# @param input [String]
|
|
77
|
-
#
|
|
78
|
-
# automatically based on the prefix.
|
|
80
|
+
# @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
|
|
81
|
+
# The input must represent a character encoded in hexadecimal, decimal, binary, standard code point format.
|
|
82
|
+
# The input type is determined automatically based on the prefix.
|
|
79
83
|
# @return [Integer]
|
|
80
84
|
# @example
|
|
81
85
|
# # Hexadecimal
|
|
@@ -86,10 +90,14 @@ module Unisec
|
|
|
86
90
|
# Unisec::Utils::String.convert_to_integer('0b11111010010101001') # => 128169
|
|
87
91
|
# # Unicode string
|
|
88
92
|
# Unisec::Utils::String.convert_to_integer('💩') # => 128169
|
|
93
|
+
# # Standardized format of hexadecimal code point
|
|
94
|
+
# Unisec::Utils::String.convert_to_integer('U+1F4A9') # => 128169
|
|
89
95
|
def self.convert_to_integer(input)
|
|
90
96
|
case autodetect(input)
|
|
91
97
|
when :hexadecimal
|
|
92
98
|
input.hex2dec(prefix: '0x').to_i
|
|
99
|
+
when :stdcp
|
|
100
|
+
input.hex2dec(prefix: 'U+').to_i
|
|
93
101
|
when :decimal
|
|
94
102
|
input.to_i
|
|
95
103
|
when :binary
|
|
@@ -101,11 +109,38 @@ module Unisec
|
|
|
101
109
|
end
|
|
102
110
|
end
|
|
103
111
|
|
|
112
|
+
# Internal method used for {convert}.
|
|
113
|
+
#
|
|
114
|
+
# Convert a string input into a character.
|
|
115
|
+
# @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
|
|
116
|
+
# The input must represent a character encoded in hexadecimal, decimal, binary, standard code point format.
|
|
117
|
+
# The input type is determined automatically based on the prefix.
|
|
118
|
+
# @return [String]
|
|
119
|
+
# @example
|
|
120
|
+
# # Hexadecimal
|
|
121
|
+
# Unisec::Utils::String.convert_to_char('0x1f4a9') # => "💩"
|
|
122
|
+
# # Decimal
|
|
123
|
+
# Unisec::Utils::String.convert_to_char('0d128169') # => "💩"
|
|
124
|
+
# # Binary
|
|
125
|
+
# Unisec::Utils::String.convert_to_char('0b11111010010101001') # => "💩"
|
|
126
|
+
# # Unicode string
|
|
127
|
+
# Unisec::Utils::String.convert_to_char('💩') # => "💩"
|
|
128
|
+
# # Standardized format of hexadecimal code point
|
|
129
|
+
# Unisec::Utils::String.convert_to_char('U+1F4A9') # => "💩"
|
|
130
|
+
def self.convert_to_char(input)
|
|
131
|
+
case autodetect(input)
|
|
132
|
+
when :hexadecimal, :stdcp, :decimal, :binary, :string
|
|
133
|
+
[convert(input, :integer)].pack('U')
|
|
134
|
+
else
|
|
135
|
+
raise TypeError, "Input \"#{input}\" is not of the expected type"
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
104
139
|
# Internal method used for {.convert}.
|
|
105
140
|
#
|
|
106
141
|
# Autodetect the representation type of the string input.
|
|
107
142
|
# @param str [String] Input.
|
|
108
|
-
# @return [Symbol] the detected type: `:hexadecimal`, `:decimal`, `:binary`, `:string
|
|
143
|
+
# @return [Symbol] the detected type: `:hexadecimal`, `:decimal`, `:binary`, `:string`, :stdcp.
|
|
109
144
|
# @example
|
|
110
145
|
# # Hexadecimal
|
|
111
146
|
# Unisec::Utils::String.autodetect('0x1f4a9') # => :hexadecimal
|
|
@@ -115,10 +150,14 @@ module Unisec
|
|
|
115
150
|
# Unisec::Utils::String.autodetect('0b11111010010101001') # => :binary
|
|
116
151
|
# # Unicode string
|
|
117
152
|
# Unisec::Utils::String.autodetect('💩') # => :string
|
|
153
|
+
# # Standardized format of hexadecimal code point
|
|
154
|
+
# Unisec::Utils::String.autodetect('U+1F4A9') # => :stdcp
|
|
118
155
|
def self.autodetect(str)
|
|
119
156
|
case str
|
|
120
|
-
when /0x[0-9a-fA-F]
|
|
157
|
+
when /0x[0-9a-fA-F]+/
|
|
121
158
|
:hexadecimal
|
|
159
|
+
when /U\+[0-9A-F]+/
|
|
160
|
+
:stdcp
|
|
122
161
|
when /0d[0-9]+/
|
|
123
162
|
:decimal
|
|
124
163
|
when /0b[0-1]+/
|
|
@@ -141,8 +180,9 @@ module Unisec
|
|
|
141
180
|
# Display the code point in Unicode format for a given character (code point as string)
|
|
142
181
|
# @param chr [String] Unicode code point (as character / string)
|
|
143
182
|
# @return [String] code point in Unicode format
|
|
183
|
+
# @todo Replace this method by target type :stdcp in String.convert()
|
|
144
184
|
# @example
|
|
145
|
-
# Unisec::
|
|
185
|
+
# Unisec::Utils::String.char2codepoint('💎') # => "U+1F48E"
|
|
146
186
|
def self.char2codepoint(chr)
|
|
147
187
|
Integer.deccp2stdhexcp(chr.codepoints.first)
|
|
148
188
|
end
|
|
@@ -151,8 +191,8 @@ module Unisec
|
|
|
151
191
|
# @param chrs [String] Unicode code points (as characters / string)
|
|
152
192
|
# @return [String] code points in Unicode format
|
|
153
193
|
# @example
|
|
154
|
-
# Unisec::
|
|
155
|
-
# Unisec::
|
|
194
|
+
# Unisec::Utils::String.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
|
|
195
|
+
# Unisec::Utils::String.chars2codepoints("🧑🌾") # => "U+1F9D1 U+200D U+1F33E"
|
|
156
196
|
def self.chars2codepoints(chrs)
|
|
157
197
|
out = []
|
|
158
198
|
chrs.each_char do |chr|
|
|
@@ -161,6 +201,15 @@ module Unisec
|
|
|
161
201
|
out.join(' ')
|
|
162
202
|
end
|
|
163
203
|
|
|
204
|
+
# Display the code points in integer format for the given characters (code points as string)
|
|
205
|
+
# @param chrs [String] Unicode code points (as characters / string)
|
|
206
|
+
# @return [String] code points in integer format
|
|
207
|
+
# @example
|
|
208
|
+
# Unisec::Utils::String.chars2intcodepoints('I 💕 Ruby 💎') # => "73 32 128149 32 82 117 98 121 32 128142"
|
|
209
|
+
def self.chars2intcodepoints(chrs)
|
|
210
|
+
chrs.codepoints.map(&:to_s).join(' ')
|
|
211
|
+
end
|
|
212
|
+
|
|
164
213
|
# Convert a string of hex encoded Unicode code points range to actual
|
|
165
214
|
# integer Ruby range.
|
|
166
215
|
# @param range_str [String] Unicode code points range as in data/Blocks.txt
|
|
@@ -170,22 +219,13 @@ module Unisec
|
|
|
170
219
|
def self.to_range(range_str)
|
|
171
220
|
::Range.new(*range_str.split('..').map { |x| x.hex2dec.to_i })
|
|
172
221
|
end
|
|
173
|
-
|
|
174
|
-
# Convert from standardized format hexadecimal code point to decimal code point
|
|
175
|
-
# @param std_hex_cp [String] Code point in standardized hexadecimal format
|
|
176
|
-
# @return [Integer] Code point in decimal format
|
|
177
|
-
# @example
|
|
178
|
-
# Unisec::Utils::String.stdhexcp2deccp('U+2026') # => 8230
|
|
179
|
-
def self.stdhexcp2deccp(std_hex_cp)
|
|
180
|
-
hex = "0x#{std_hex_cp[2..]}" # replace U+ prefix with 0x
|
|
181
|
-
convert_to_integer(hex)
|
|
182
|
-
end
|
|
183
222
|
end
|
|
184
223
|
|
|
185
224
|
module Integer
|
|
186
225
|
# Convert from decimal code point to standardized format hexadecimal code point
|
|
187
226
|
# @param int_cp [Integer] Code point in decimal format
|
|
188
227
|
# @return [String] code point in Unicode format
|
|
228
|
+
# @todo Replace this method by the Integer.convert()
|
|
189
229
|
# @example
|
|
190
230
|
# Unisec::Utils::Integer.deccp2stdhexcp(128640) # => "U+1F680"
|
|
191
231
|
def self.deccp2stdhexcp(int_cp)
|
data/lib/unisec/version.rb
CHANGED