unisec 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9081ac95de968e70cd91438a73b185efee13e33ccf16c40a2791da5963d3d67c
4
- data.tar.gz: 99a651d4efc5f6b36ae088ec254e0dc950b5d84db9105dc851f29d51609615f8
3
+ metadata.gz: fe68d59956b20311ad5d9f5f64c10a6b0648d1c7c146b9fbacf3f25348385207
4
+ data.tar.gz: 8f62db4e8a2327e0ac36e1be53fb5c168fb856fd81e9cafec0af1297a451bed8
5
5
  SHA512:
6
- metadata.gz: ab342720e300cd25e167385f70402e00ef240ed6c422f7aeea666774b4e477f423164fe3de49146bc0f4a3f2565f86effb1465f112085568c65b1625b9d911e5
7
- data.tar.gz: bc105f1430c812711727600365db10871e1bd69ad274c7ccc0a2b5e1362676304ce27e5e85c6a988986fe9a82bc6189bf3c14e6ecb577f327dc41bd6923f241d
6
+ metadata.gz: 7eb59fcce432494896adc586f168835578da1ab54f6f64080d4ecc86d91bebd39d569f38e72a6c9c79ea23e303a34315c8aebaf9e2fa2b340d25f234731e82ab
7
+ data.tar.gz: 33711c517a93ea3e28b25cde223d94f1cc2cf2edc0db39a3af41d5c6268d5bac51bef4d534306a24df21ea283c325fe052e0723780dcdedd294930e6f1d8eeee
data/lib/unisec/blocks.rb CHANGED
@@ -114,7 +114,7 @@ module Unisec
114
114
  if block_arg.size == 1 # is a char (1 code unit, not one grapheme)
115
115
  found = true if blk_range.include?(Utils::String.convert_to_integer(block_arg))
116
116
  elsif block_arg.start_with?('U+') # string code point
117
- found = true if blk_range.include?(Utils::String.stdhexcp2deccp(block_arg))
117
+ found = true if blk_range.include?(Utils::String.convert(block_arg, :integer))
118
118
  elsif blk_name.downcase == block_arg.downcase # block name
119
119
  found = true
120
120
  end
@@ -27,6 +27,8 @@ module Unisec
27
27
  register 'blocks search', Blocks::Search
28
28
  register 'confusables list', Confusables::List
29
29
  register 'confusables randomize', Confusables::Randomize
30
+ register 'dump codepoints integer', Dump::Codepoints::Integer
31
+ register 'dump codepoints standard', Dump::Codepoints::Standard
30
32
  register 'dump dec', Dump::Dec
31
33
  register 'dump hex', Dump::Hex
32
34
  register 'grep', Grep
@@ -81,6 +81,52 @@ module Unisec
81
81
  end
82
82
  end
83
83
  end
84
+
85
+ module Codepoints
86
+ # CLI command `unisec dump codepoints standard`.
87
+ #
88
+ # Example:
89
+ #
90
+ # ```plaintext
91
+ # $ unisec dump codepoints standard "unicode"
92
+ # U+0075 U+006E U+0069 U+0063 U+006F U+0064 U+0065
93
+ # ```
94
+ class Standard < Dry::CLI::Command
95
+ desc 'Code point dump (standard format)'
96
+
97
+ argument :input, required: true,
98
+ desc: 'String input. Read from STDIN if equal to -.'
99
+
100
+ # Code point dump (standard format).
101
+ # @param input [String] Input string to encode
102
+ def call(input: nil)
103
+ input = $stdin.read.chomp if input == '-'
104
+ puts Unisec::Utils::String.chars2codepoints(input)
105
+ end
106
+ end
107
+
108
+ # CLI command `unisec dump codepoints integer`.
109
+ #
110
+ # Example:
111
+ #
112
+ # ```plaintext
113
+ # $ unisec dump codepoints integer 'I 💕 Ruby 💎'
114
+ # 73 32 128149 32 82 117 98 121 32 128142
115
+ # ```
116
+ class Integer < Dry::CLI::Command
117
+ desc 'Code point dump (integer format)'
118
+
119
+ argument :input, required: true,
120
+ desc: 'String input. Read from STDIN if equal to -.'
121
+
122
+ # Code point dump (integer format).
123
+ # @param input [String] Input string to encode
124
+ def call(input: nil)
125
+ input = $stdin.read.chomp if input == '-'
126
+ puts Unisec::Utils::String.chars2intcodepoints(input)
127
+ end
128
+ end
129
+ end
84
130
  end
85
131
  end
86
132
  end
@@ -119,7 +119,7 @@ module Unisec
119
119
  data = Properties.char(chr)
120
120
  display = ->(key, value) { puts Paint[key, :red, :bold].ljust(30) + " #{value}" }
121
121
  display.call('Name:', data[:name])
122
- display.call('Code Point:', data[:codepoint])
122
+ display.call('Code Point:', data[:codepoint] + " (#{Utils::String.convert(chr, :integer)})")
123
123
  puts
124
124
  display.call('Block:', data[:block])
125
125
  display.call('Category:', data[:category])
data/lib/unisec/utils.rb CHANGED
@@ -55,27 +55,31 @@ module Unisec
55
55
  # About string conversion and manipulation.
56
56
  module String
57
57
  # Convert a string input into the chosen type.
58
- # @param input [String] If the target type is `:integer`, the string must represent a number encoded in
59
- # hexadecimal, decimal, binary. If it's a Unicode string, only the first code point will be taken into account.
60
- # @param target_type [Symbol] Convert to the chosen type. Currently only supports `:integer`.
58
+ # @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
59
+ # The input must represent a character encoded in hexadecimal, decimal, binary or standard code point format.
60
+ # See {convert_to_integer} and {convert_to_char} for detailed examples.
61
+ # @param target_type [Symbol] Convert to the chosen type. Currently only supports `:integer` and `:char`.
61
62
  # @return [Variable] The type of the output depends on the chosen `target_type`.
62
63
  # @example
63
64
  # Unisec::Utils::String.convert('0x1f4a9', :integer) # => 128169
65
+ # Unisec::Utils::String.convert('0x1f4a9', :char) # => "💩"
64
66
  def self.convert(input, target_type)
65
67
  case target_type
66
68
  when :integer
67
69
  convert_to_integer(input)
70
+ when :char
71
+ convert_to_char(input)
68
72
  else
69
73
  raise TypeError, "Target type \"#{target_type}\" not avaible"
70
74
  end
71
75
  end
72
76
 
73
- # Internal method used for {.convert}.
77
+ # Internal method used for {convert}.
74
78
  #
75
79
  # Convert a string input into integer.
76
- # @param input [String] The string must represent a number encoded in hexadecimal, decimal, binary. If it's a
77
- # Unicode string, only the first code point will be taken into account. The input type is determined
78
- # automatically based on the prefix.
80
+ # @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
81
+ # The input must represent a character encoded in hexadecimal, decimal, binary, standard code point format.
82
+ # The input type is determined automatically based on the prefix.
79
83
  # @return [Integer]
80
84
  # @example
81
85
  # # Hexadecimal
@@ -86,10 +90,14 @@ module Unisec
86
90
  # Unisec::Utils::String.convert_to_integer('0b11111010010101001') # => 128169
87
91
  # # Unicode string
88
92
  # Unisec::Utils::String.convert_to_integer('💩') # => 128169
93
+ # # Standardized format of hexadecimal code point
94
+ # Unisec::Utils::String.convert_to_integer('U+1F4A9') # => 128169
89
95
  def self.convert_to_integer(input)
90
96
  case autodetect(input)
91
97
  when :hexadecimal
92
98
  input.hex2dec(prefix: '0x').to_i
99
+ when :stdcp
100
+ input.hex2dec(prefix: 'U+').to_i
93
101
  when :decimal
94
102
  input.to_i
95
103
  when :binary
@@ -101,11 +109,38 @@ module Unisec
101
109
  end
102
110
  end
103
111
 
112
+ # Internal method used for {convert}.
113
+ #
114
+ # Convert a string input into a character.
115
+ # @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
116
+ # The input must represent a character encoded in hexadecimal, decimal, binary, standard code point format.
117
+ # The input type is determined automatically based on the prefix.
118
+ # @return [String]
119
+ # @example
120
+ # # Hexadecimal
121
+ # Unisec::Utils::String.convert_to_char('0x1f4a9') # => "💩"
122
+ # # Decimal
123
+ # Unisec::Utils::String.convert_to_char('0d128169') # => "💩"
124
+ # # Binary
125
+ # Unisec::Utils::String.convert_to_char('0b11111010010101001') # => "💩"
126
+ # # Unicode string
127
+ # Unisec::Utils::String.convert_to_char('💩') # => "💩"
128
+ # # Standardized format of hexadecimal code point
129
+ # Unisec::Utils::String.convert_to_char('U+1F4A9') # => "💩"
130
+ def self.convert_to_char(input)
131
+ case autodetect(input)
132
+ when :hexadecimal, :stdcp, :decimal, :binary, :string
133
+ [convert(input, :integer)].pack('U')
134
+ else
135
+ raise TypeError, "Input \"#{input}\" is not of the expected type"
136
+ end
137
+ end
138
+
104
139
  # Internal method used for {.convert}.
105
140
  #
106
141
  # Autodetect the representation type of the string input.
107
142
  # @param str [String] Input.
108
- # @return [Symbol] the detected type: `:hexadecimal`, `:decimal`, `:binary`, `:string`.
143
+ # @return [Symbol] the detected type: `:hexadecimal`, `:decimal`, `:binary`, `:string`, :stdcp.
109
144
  # @example
110
145
  # # Hexadecimal
111
146
  # Unisec::Utils::String.autodetect('0x1f4a9') # => :hexadecimal
@@ -115,10 +150,14 @@ module Unisec
115
150
  # Unisec::Utils::String.autodetect('0b11111010010101001') # => :binary
116
151
  # # Unicode string
117
152
  # Unisec::Utils::String.autodetect('💩') # => :string
153
+ # # Standardized format of hexadecimal code point
154
+ # Unisec::Utils::String.autodetect('U+1F4A9') # => :stdcp
118
155
  def self.autodetect(str)
119
156
  case str
120
- when /0x[0-9a-fA-F]/
157
+ when /0x[0-9a-fA-F]+/
121
158
  :hexadecimal
159
+ when /U\+[0-9A-F]+/
160
+ :stdcp
122
161
  when /0d[0-9]+/
123
162
  :decimal
124
163
  when /0b[0-1]+/
@@ -141,8 +180,9 @@ module Unisec
141
180
  # Display the code point in Unicode format for a given character (code point as string)
142
181
  # @param chr [String] Unicode code point (as character / string)
143
182
  # @return [String] code point in Unicode format
183
+ # @todo Replace this method by target type :stdcp in String.convert()
144
184
  # @example
145
- # Unisec::Properties.char2codepoint('💎') # => "U+1F48E"
185
+ # Unisec::Utils::String.char2codepoint('💎') # => "U+1F48E"
146
186
  def self.char2codepoint(chr)
147
187
  Integer.deccp2stdhexcp(chr.codepoints.first)
148
188
  end
@@ -151,8 +191,8 @@ module Unisec
151
191
  # @param chrs [String] Unicode code points (as characters / string)
152
192
  # @return [String] code points in Unicode format
153
193
  # @example
154
- # Unisec::Properties.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
155
- # Unisec::Properties.chars2codepoints("🧑‍🌾") # => "U+1F9D1 U+200D U+1F33E"
194
+ # Unisec::Utils::String.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
195
+ # Unisec::Utils::String.chars2codepoints("🧑‍🌾") # => "U+1F9D1 U+200D U+1F33E"
156
196
  def self.chars2codepoints(chrs)
157
197
  out = []
158
198
  chrs.each_char do |chr|
@@ -161,6 +201,15 @@ module Unisec
161
201
  out.join(' ')
162
202
  end
163
203
 
204
+ # Display the code points in integer format for the given characters (code points as string)
205
+ # @param chrs [String] Unicode code points (as characters / string)
206
+ # @return [String] code points in integer format
207
+ # @example
208
+ # Unisec::Utils::String.chars2intcodepoints('I 💕 Ruby 💎') # => "73 32 128149 32 82 117 98 121 32 128142"
209
+ def self.chars2intcodepoints(chrs)
210
+ chrs.codepoints.map(&:to_s).join(' ')
211
+ end
212
+
164
213
  # Convert a string of hex encoded Unicode code points range to actual
165
214
  # integer Ruby range.
166
215
  # @param range_str [String] Unicode code points range as in data/Blocks.txt
@@ -170,22 +219,13 @@ module Unisec
170
219
  def self.to_range(range_str)
171
220
  ::Range.new(*range_str.split('..').map { |x| x.hex2dec.to_i })
172
221
  end
173
-
174
- # Convert from standardized format hexadecimal code point to decimal code point
175
- # @param std_hex_cp [String] Code point in standardized hexadecimal format
176
- # @return [Integer] Code point in decimal format
177
- # @example
178
- # Unisec::Utils::String.stdhexcp2deccp('U+2026') # => 8230
179
- def self.stdhexcp2deccp(std_hex_cp)
180
- hex = "0x#{std_hex_cp[2..]}" # replace U+ prefix with 0x
181
- convert_to_integer(hex)
182
- end
183
222
  end
184
223
 
185
224
  module Integer
186
225
  # Convert from decimal code point to standardized format hexadecimal code point
187
226
  # @param int_cp [Integer] Code point in decimal format
188
227
  # @return [String] code point in Unicode format
228
+ # @todo Replace this method by the Integer.convert()
189
229
  # @example
190
230
  # Unisec::Utils::Integer.deccp2stdhexcp(128640) # => "U+1F680"
191
231
  def self.deccp2stdhexcp(int_cp)
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Unisec
4
4
  # Version of unisec library and app
5
- VERSION = '0.0.7'
5
+ VERSION = '0.0.8'
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unisec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexandre ZANNI