unisec 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ctf_party'
4
+ require 'paint'
5
+
6
+ module Unisec
7
+ # Decimal dump (decdump) of all Unicode encodings.
8
+ class Decdump
9
+ # UTF-8 decdump
10
+ # @return [String] UTF-8 decdump
11
+ attr_reader :utf8
12
+
13
+ # UTF-16BE decdump
14
+ # @return [String] UTF-16BE decdump
15
+ attr_reader :utf16be
16
+
17
+ # UTF-16LE decdump
18
+ # @return [String] UTF-16LE decdump
19
+ attr_reader :utf16le
20
+
21
+ # UTF-32BE decdump
22
+ # @return [String] UTF-32BE decdump
23
+ attr_reader :utf32be
24
+
25
+ # UTF-32LE decdump
26
+ # @return [String] UTF-32LE decdump
27
+ attr_reader :utf32le
28
+
29
+ # Init the decdump.
30
+ # @param str [String] Input string to encode
31
+ # @example
32
+ # ded = Unisec::Decdump.new('I 💕 Ruby 💎')
33
+ # ded.utf8 # => "073 032 240 159 146 149 032 082 117 098 121 032 240 159 146 142"
34
+ # ded.utf16be # => "|000 073| |000 032| |216 061| |220 149| |000 032| |000 082| |000 117| |000 098| |000 121| |000 032| |216 061| |220 142|"
35
+ # ded.utf32be # => "|000 000 000 073| |000 000 000 032| |000 001 244 149| |000 000 000 032| |000 000 000 082| |000 000 000 117| |000 000 000 098| |000 000 000 121| |000 000 000 032| |000 001 244 142|"
36
+ def initialize(str)
37
+ @utf8 = Decdump.utf8(str)
38
+ @utf16be = Decdump.utf16be(str)
39
+ @utf16le = Decdump.utf16le(str)
40
+ @utf32be = Decdump.utf32be(str)
41
+ @utf32le = Decdump.utf32le(str)
42
+ end
43
+
44
+ # Encode to UTF-8 in decdump format (spaced at every code unit = every byte)
45
+ # @param str [String] Input string to encode
46
+ # @return [String] decdump (UTF-8 encoded)
47
+ # @example
48
+ # Unisec::Decdump.utf8('🐋') # => "240 159 144 139"
49
+ def self.utf8(str)
50
+ str.encode('UTF-8').to_hex.scan(/.{2}/).map { |x| x.hex2dec(padding: 3) }.join(' ')
51
+ end
52
+
53
+ # Encode to UTF-16BE in decdump format (packed by code unit = every 2 bytes)
54
+ # @param str [String] Input string to encode
55
+ # @return [String] decdump (UTF-16BE encoded)
56
+ # @example
57
+ # Unisec::Decdump.utf16be('🐋') # => "|216 061| |220 011|"
58
+ def self.utf16be(str)
59
+ dec_chuncks = str.encode('UTF-16BE').to_hex.scan(/.{2}/).map do |x|
60
+ x.hex2dec(padding: 3)
61
+ end
62
+ dec_chuncks.join(' ').scan(/\d+ \d+/).map { |x| "|#{x}|" }.join(' ')
63
+ end
64
+
65
+ # Encode to UTF-16LE in decdump format (packed by code unit = every 2 bytes)
66
+ # @param str [String] Input string to encode
67
+ # @return [String] decdump (UTF-16LE encoded)
68
+ # @example
69
+ # Unisec::Decdump.utf16le('🐋') # => "|061 216| |011 220|"
70
+ def self.utf16le(str)
71
+ dec_chuncks = str.encode('UTF-16LE').to_hex.scan(/.{2}/).map do |x|
72
+ x.hex2dec(padding: 3)
73
+ end
74
+ dec_chuncks.join(' ').scan(/\d+ \d+/).map { |x| "|#{x}|" }.join(' ')
75
+ end
76
+
77
+ # Encode to UTF-32BE in decdump format (packed by code unit = every 4 bytes)
78
+ # @param str [String] Input string to encode
79
+ # @return [String] decdump (UTF-32BE encoded)
80
+ # @example
81
+ # Unisec::Decdump.utf32be('🐋') # => "|000 001 244 011|"
82
+ def self.utf32be(str)
83
+ dec_chuncks = str.encode('UTF-32BE').to_hex.scan(/.{2}/).map do |x|
84
+ x.hex2dec(padding: 3)
85
+ end
86
+ dec_chuncks.join(' ').scan(/\d+ \d+ \d+ \d+/).map { |x| "|#{x}|" }.join(' ')
87
+ end
88
+
89
+ # Encode to UTF-32LE in decdump format (packed by code unit = every 4 bytes)
90
+ # @param str [String] Input string to encode
91
+ # @return [String] decdump (UTF-32LE encoded)
92
+ # @example
93
+ # Unisec::Decdump.utf32le('🐋') # => "|011 244 001 000|"
94
+ def self.utf32le(str)
95
+ dec_chuncks = str.encode('UTF-32LE').to_hex.scan(/.{2}/).map do |x|
96
+ x.hex2dec(padding: 3)
97
+ end
98
+ dec_chuncks.join(' ').scan(/\d+ \d+ \d+ \d+/).map { |x| "|#{x}|" }.join(' ')
99
+ end
100
+
101
+ # Display a CLI-friendly output summurizing the decdump in all Unicode encodings
102
+ # @return [String] CLI-ready output
103
+ # @example
104
+ # puts Unisec::Decdump.new('K').display # =>
105
+ # # UTF-8: 226 132 170
106
+ # # UTF-16BE: |033 042|
107
+ # # UTF-16LE: |042 033|
108
+ # # UTF-32BE: |000 000 033 042|
109
+ # # UTF-32LE: |042 033 000 000|
110
+ def display
111
+ "UTF-8: #{@utf8}\n" \
112
+ "UTF-16BE: #{@utf16be}\n" \
113
+ "UTF-16LE: #{@utf16le}\n" \
114
+ "UTF-32BE: #{@utf32be}\n" \
115
+ "UTF-32LE: #{@utf32le}".gsub('|', Paint['|', :red])
116
+ end
117
+ end
118
+ end
@@ -3,7 +3,7 @@
3
3
  require 'ctf_party'
4
4
 
5
5
  module Unisec
6
- # Hexdump of all Unicode encodings.
6
+ # Hexadecimal dump (hexdump) of all Unicode encodings.
7
7
  class Hexdump
8
8
  # UTF-8 hexdump
9
9
  # @return [String] UTF-8 hexdump
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'ctf_party'
4
+ require 'paint'
5
+ require 'unisec/utils'
4
6
 
5
7
  module Unisec
6
8
  # Normalization Forms
@@ -111,7 +113,7 @@ module Unisec
111
113
  def display
112
114
  colorize = lambda { |form_title, form_attr|
113
115
  "#{Paint[form_title.to_s, :underline,
114
- :bold]}: #{form_attr}\n #{Paint[Unisec::Properties.chars2codepoints(form_attr), :red]}\n"
116
+ :bold]}: #{form_attr}\n #{Paint[Unisec::Utils::String.chars2codepoints(form_attr), :red]}\n"
115
117
  }
116
118
  colorize.call('Original', @original) +
117
119
  colorize.call('NFC', @nfc) +
@@ -125,7 +127,7 @@ module Unisec
125
127
  def display_replace
126
128
  colorize = lambda { |form_title, form_attr|
127
129
  "#{Paint[form_title.to_s, :underline,
128
- :bold]}: #{form_attr}\n #{Paint[Unisec::Properties.chars2codepoints(form_attr), :red]}\n"
130
+ :bold]}: #{form_attr}\n #{Paint[Unisec::Utils::String.chars2codepoints(form_attr), :red]}\n"
129
131
  }
130
132
  payload = replace_bypass
131
133
  colorize.call('Original', @original) +
@@ -0,0 +1,224 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'paint'
4
+ require 'unisec/utils'
5
+
6
+ module Unisec
7
+ # Operations about Unicode planes
8
+ class Planes # rubocop:disable Metrics/ClassLength
9
+ # Data about the planes
10
+ PLANES = [
11
+ { range: 0x0..0xffff, name: 'Basic Multilingual Plane' },
12
+ { range: 0x10000..0x1ffff, name: 'Supplementary Multilingual Plane' },
13
+ { range: 0x20000..0x2ffff, name: 'Supplementary Ideographic Plane' },
14
+ { range: 0x30000..0x3ffff, name: 'Tertiary Ideographic Plane' },
15
+ { range: 0x40000..0x4ffff, name: 'unassigned' },
16
+ { range: 0x50000..0x5ffff, name: 'unassigned' },
17
+ { range: 0x60000..0x6ffff, name: 'unassigned' },
18
+ { range: 0x70000..0x7ffff, name: 'unassigned' },
19
+ { range: 0x80000..0x8ffff, name: 'unassigned' },
20
+ { range: 0x90000..0x9ffff, name: 'unassigned' },
21
+ { range: 0xa0000..0xaffff, name: 'unassigned' },
22
+ { range: 0xb0000..0xbffff, name: 'unassigned' },
23
+ { range: 0xc0000..0xcffff, name: 'unassigned' },
24
+ { range: 0xd0000..0xdffff, name: 'unassigned' },
25
+ { range: 0xe0000..0xeffff, name: 'Supplement­ary Special-purpose Plane' },
26
+ { range: 0xf0000..0xfffff, name: 'supplement­ary Private Use Area planes' },
27
+ { range: 0x100000..0x10ffff, name: 'supplement­ary Private Use Area planes' }
28
+ ].freeze
29
+
30
+ # List Unicode planes name
31
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count? (warning: very slow, very unoptimized, see {Unisec::Blocks.list})
32
+ # @return [Array<Hash>] blocks name, range and character and blocks count
33
+ # as well as abbreviation
34
+ # @example
35
+ # Unisec::Planes.list # =>
36
+ # # [{range: 0..65535,
37
+ # # name: "Basic Multilingual Plane",
38
+ # # blocks:
39
+ # # [{range: 0..127, name: "Basic Latin", range_size: nil, char_count: nil},
40
+ # # {range: 128..255, name: "Latin-1 Supplement", range_size: nil, char_count: nil},
41
+ # # […]
42
+ def self.list(with_count: false)
43
+ PLANES.zip(plane2blocks(PLANES, with_count: with_count)).map do |base, extra|
44
+ base.merge(blocks: extra)
45
+ end
46
+ end
47
+
48
+ # List details about target plane including the list of associated blocks
49
+ # @param plane_arg [String|Integer] name or number of the plane
50
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count? (see {Unisec::Blocks.list})
51
+ # @return [Hash|Array<Hash>|nil] nil if no match, Hash of the plane if one match,
52
+ # Array of planes' Hash if several matches
53
+ # @example
54
+ # Unisec::Planes.plane(4) # =>
55
+ # # {range: 196608..262143,
56
+ # # name: "unassigned",
57
+ # # blocks:
58
+ # # [{range: 196608..201551, name: "CJK Unified Ideographs Extension G", range_size: nil, char_count: nil},
59
+ # # {range: 201552..205743, name: "CJK Unified Ideographs Extension H", range_size: nil, char_count: nil},
60
+ # # {range: 205744..210047, name: "CJK Unified Ideographs Extension J", range_size: nil, char_count: nil}]}
61
+ # Unisec::Planes.plane('Supplementary Ideographic Plane') # =>
62
+ # # {range: 131072..196607,
63
+ # # name: "Supplementary Ideographic Plane",
64
+ # # blocks:
65
+ # # [{range: 131072..173791, name: "CJK Unified Ideographs Extension B", range_size: nil, char_count: nil},
66
+ # # {range: 173824..177983, name: "CJK Unified Ideographs Extension C", range_size: nil, char_count: nil},
67
+ # # {range: 177984..178207, name: "CJK Unified Ideographs Extension D", range_size: nil, char_count: nil},
68
+ # # {range: 178208..183983, name: "CJK Unified Ideographs Extension E", range_size: nil, char_count: nil},
69
+ # # {range: 183984..191471, name: "CJK Unified Ideographs Extension F", range_size: nil, char_count: nil},
70
+ # # {range: 191472..192095, name: "CJK Unified Ideographs Extension I", range_size: nil, char_count: nil},
71
+ # # {range: 194560..195103, name: "CJK Compatibility Ideographs Supplement", range_size: nil, char_count: nil}]}
72
+ # Unisec::Planes.plane('unassigned') # =>
73
+ # # [{range: 262144..327679, name: "unassigned", blocks: []},
74
+ # # {range: 327680..393215, name: "unassigned", blocks: []},
75
+ # # {range: 393216..458751, name: "unassigned", blocks: []},
76
+ # # {range: 458752..524287, name: "unassigned", blocks: []},
77
+ # # {range: 524288..589823, name: "unassigned", blocks: []},
78
+ # # {range: 589824..655359, name: "unassigned", blocks: []},
79
+ # # {range: 655360..720895, name: "unassigned", blocks: []},
80
+ # # {range: 720896..786431, name: "unassigned", blocks: []},
81
+ # # {range: 786432..851967, name: "unassigned", blocks: []},
82
+ # # {range: 851968..917503, name: "unassigned", blocks: []}]
83
+ def self.plane(plane_arg, with_count: false) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength
84
+ case plane_arg
85
+ when Integer # search by plane number
86
+ res = PLANES[plane_arg]
87
+ when String # search by plane name
88
+ res = PLANES.select { |plane| plane[:name].downcase == plane_arg.downcase }
89
+ return nil if res.empty?
90
+
91
+ res = res.first if res.size == 1 # Hash if one, Array of Hash if multiples
92
+ else
93
+ raise ArgumentError
94
+ end
95
+ case res
96
+ when nil
97
+ nil # handle invalide search term
98
+ # Enrich plane data with blocks
99
+ when Hash # When 1 plane
100
+ res[:blocks] = plane2blocks(res, with_count: with_count)
101
+ res
102
+ when Array # When multiple planes
103
+ res.zip(plane2blocks(res, with_count: with_count)).map do |base, extra|
104
+ base.merge(blocks: extra)
105
+ end
106
+ end
107
+ end
108
+
109
+ # Find the blocks included in a given plane
110
+ # @param plane [Hash|Array<Hash>] plane hash or array of plane hash
111
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count? (see {Unisec::Blocks.list})
112
+ # @return [Array<Hash>] plane(s) enriched with blocks data
113
+ # @example
114
+ # Unisec::Planes.plane2blocks({ range: 0x20000..0x2ffff, name: 'Supplementary Ideographic Plane' }) # =>
115
+ # # [{range: 131072..173791, name: "CJK Unified Ideographs Extension B", range_size: nil, char_count: nil},
116
+ # # {range: 173824..177983, name: "CJK Unified Ideographs Extension C", range_size: nil, char_count: nil},
117
+ # # {range: 177984..178207, name: "CJK Unified Ideographs Extension D", range_size: nil, char_count: nil},
118
+ # # {range: 178208..183983, name: "CJK Unified Ideographs Extension E", range_size: nil, char_count: nil},
119
+ # # {range: 183984..191471, name: "CJK Unified Ideographs Extension F", range_size: nil, char_count: nil},
120
+ # # {range: 191472..192095, name: "CJK Unified Ideographs Extension I", range_size: nil, char_count: nil},
121
+ # # {range: 194560..195103, name: "CJK Compatibility Ideographs Supplement", range_size: nil, char_count: nil}]
122
+ def self.plane2blocks(plane, with_count: false)
123
+ blocks = []
124
+ case plane
125
+ when Hash
126
+ Unisec::Blocks.list(with_count: with_count).each do |block|
127
+ blocks << block if plane[:range].include_range?(block[:range])
128
+ end
129
+ when Array
130
+ plane.each do |pl|
131
+ blocks << plane2blocks(pl, with_count: with_count)
132
+ end
133
+ else
134
+ raise ArgumentError
135
+ end
136
+ blocks
137
+ end
138
+
139
+ # Abbreviate a plane name (based on uppercase letters)
140
+ # @param name [String] plane name (as in {PLANES} `:name`)
141
+ # @return [String] plane abbreviation
142
+ # @example
143
+ # Unisec::Planes.abbr('Basic Multilingual Plane') # => "BMP"
144
+ # Unisec::Planes.abbr('supplement­ary Private Use Area planes') # => "PUA"
145
+ def self.abbr(name)
146
+ name.scan(/\p{Upper}/).join
147
+ end
148
+
149
+ # Display a CLI-friendly output listing all planes
150
+ # @param with_blocks [TrueClass|FalseClass] display the blocks associated with each plane
151
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count? (see {Unisec::Blocks.list})
152
+ # @return [nil]
153
+ # @example
154
+ # Unisec::Planes.list_display(with_blocks: true, with_count: false)
155
+ # # Range: U+0000 - U+FFFF Name: Basic Multilingual Plane
156
+ # # Blocks:
157
+ # # Range: U+0000 - U+007F Name: Basic Latin
158
+ # # Range: U+0080 - U+00FF Name: Latin-1 Supplement
159
+ # # Range: U+0100 - U+017F Name: Latin Extended-A
160
+ # # […]
161
+ def self.list_display(with_blocks: false, with_count: false) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
162
+ planes = list(with_count: with_count)
163
+ display = ->(key, value, just) { print Paint[key, :red, :bold] + " #{value}".ljust(just) }
164
+ display_blk = ->(key, value, just) { print Paint[key, :magenta, :bold] + " #{value}".ljust(just) }
165
+ planes.each do |pla|
166
+ display.call('Range:', Utils::Range.range2codepoint_range(pla[:range]), 22)
167
+ display.call('Name:', pla[:name], 50)
168
+ if with_blocks
169
+ puts
170
+ display.call(' Blocks:', "\n", 0)
171
+ pla[:blocks].each do |block|
172
+ display_blk.call(' Range:', Utils::Range.range2codepoint_range(block[:range]), 22)
173
+ display_blk.call('Name:', block[:name], 50)
174
+ if with_count
175
+ display_blk.call('Range size:', block[:range_size], 8)
176
+ display_blk.call('Char count:', block[:char_count], 0)
177
+ end
178
+ puts
179
+ end
180
+ end
181
+ puts
182
+ end
183
+ nil
184
+ end
185
+
186
+ # Display a CLI-friendly output searchfing for a plane
187
+ # @param plane_arg [String|Integer] name or number of the plane
188
+ # @param with_blocks [TrueClass|FalseClass] display the blocks associated with each plane
189
+ # @param with_count [TrueClass|FalseClass] calculate block's range size & char count? (see {Unisec::Blocks.list})
190
+ # @return [nil]
191
+ # @example
192
+ # Unisec::Planes.plane_display(3, with_blocks: true)
193
+ # # Range: U+30000 - U+3FFFF Name: Tertiary Ideographic Plane
194
+ # # Blocks:
195
+ # # Range: U+30000 - U+3134F Name: CJK Unified Ideographs Extension G
196
+ # # Range: U+31350 - U+323AF Name: CJK Unified Ideographs Extension H
197
+ # # Range: U+323B0 - U+3347F Name: CJK Unified Ideographs Extension J
198
+ def self.plane_display(plane_arg, with_blocks: false, with_count: false) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
199
+ planes = plane(plane_arg, with_count: with_count)
200
+ planes = [planes] if planes.is_a?(Hash)
201
+ display = ->(key, value, just) { print Paint[key, :red, :bold] + " #{value}".ljust(just) }
202
+ display_blk = ->(key, value, just) { print Paint[key, :magenta, :bold] + " #{value}".ljust(just) }
203
+ planes.each do |pla|
204
+ display.call('Range:', Utils::Range.range2codepoint_range(pla[:range]), 22)
205
+ display.call('Name:', pla[:name], 50)
206
+ if with_blocks
207
+ puts
208
+ display.call(' Blocks:', "\n", 0)
209
+ pla[:blocks].each do |block|
210
+ display_blk.call(' Range:', Utils::Range.range2codepoint_range(block[:range]), 22)
211
+ display_blk.call('Name:', block[:name], 50)
212
+ if with_count
213
+ display_blk.call('Range size:', block[:range_size], 8)
214
+ display_blk.call('Char count:', block[:char_count], 0)
215
+ end
216
+ puts
217
+ end
218
+ end
219
+ puts
220
+ end
221
+ nil
222
+ end
223
+ end
224
+ end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'twitter_cldr'
4
4
  require 'paint'
5
+ require 'unisec/utils'
5
6
 
6
7
  module Unisec
7
8
  # Manipulate Unicode properties
@@ -50,7 +51,7 @@ module Unisec
50
51
  def self.codepoints_display(prop)
51
52
  codepoints = Properties.codepoints(prop)
52
53
  codepoints.each do |cp|
53
- puts "#{Properties.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
54
+ puts "#{Utils::Integer.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
54
55
  end
55
56
  nil
56
57
  end
@@ -77,7 +78,7 @@ module Unisec
77
78
  block: props.block.join,
78
79
  category: categories[1],
79
80
  subcategory: categories[0],
80
- codepoint: Properties.char2codepoint(chr),
81
+ codepoint: Utils::String.char2codepoint(chr),
81
82
  name: cp.name,
82
83
  script: props.script.join,
83
84
  case: {
@@ -127,22 +128,22 @@ module Unisec
127
128
  display.call('Since (age):', "Version #{data[:age]}")
128
129
  puts
129
130
  x = data.dig(:case, :twitter, :uppercase)
130
- display.call('Uppercase:', x + " (#{Properties.char2codepoint(x)})")
131
+ display.call('Uppercase:', x + " (#{Utils::String.char2codepoint(x)})")
131
132
  x = data.dig(:case, :twitter, :lowercase)
132
- display.call('Lowercase:', x + " (#{Properties.char2codepoint(x)})")
133
+ display.call('Lowercase:', x + " (#{Utils::String.char2codepoint(x)})")
133
134
  x = data.dig(:case, :twitter, :titlecase)
134
- display.call('Titlecase:', x + " (#{Properties.char2codepoint(x)})")
135
+ display.call('Titlecase:', x + " (#{Utils::String.char2codepoint(x)})")
135
136
  x = data.dig(:case, :twitter, :casefold)
136
- display.call('Casefold:', x + " (#{Properties.char2codepoint(x)})")
137
+ display.call('Casefold:', x + " (#{Utils::String.char2codepoint(x)})")
137
138
  puts
138
139
  x = data.dig(:normalization, :twitter, :nfkd)
139
- display.call('Normalization NFKD:', x + " (#{Properties.chars2codepoints(x)})")
140
+ display.call('Normalization NFKD:', x + " (#{Utils::String.chars2codepoints(x)})")
140
141
  x = data.dig(:normalization, :twitter, :nfkc)
141
- display.call('Normalization NFKC:', x + " (#{Properties.chars2codepoints(x)})")
142
+ display.call('Normalization NFKC:', x + " (#{Utils::String.chars2codepoints(x)})")
142
143
  x = data.dig(:normalization, :twitter, :nfd)
143
- display.call('Normalization NFD:', x + " (#{Properties.chars2codepoints(x)})")
144
+ display.call('Normalization NFD:', x + " (#{Utils::String.chars2codepoints(x)})")
144
145
  x = data.dig(:normalization, :twitter, :nfc)
145
- display.call('Normalization NFC:', x + " (#{Properties.chars2codepoints(x)})")
146
+ display.call('Normalization NFC:', x + " (#{Utils::String.chars2codepoints(x)})")
146
147
  if extended
147
148
  puts
148
149
  data[:other_properties].each do |k, v|
@@ -151,37 +152,5 @@ module Unisec
151
152
  end
152
153
  nil
153
154
  end
154
-
155
- # Display the code point in Unicode format for a given character (code point as string)
156
- # @param chr [String] Unicode code point (as character / string)
157
- # @return [String] code point in Unicode format
158
- # @example
159
- # Unisec::Properties.char2codepoint('💎') # => "U+1F48E"
160
- def self.char2codepoint(chr)
161
- Properties.deccp2stdhexcp(chr.codepoints.first)
162
- end
163
-
164
- # Display the code points in Unicode format for the given characters (code points as string)
165
- # @param chrs [String] Unicode code points (as characters / string)
166
- # @return [String] code points in Unicode format
167
- # @example
168
- # Unisec::Properties.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
169
- # Unisec::Properties.chars2codepoints("🧑‍🌾") # => "U+1F9D1 U+200D U+1F33E"
170
- def self.chars2codepoints(chrs)
171
- out = []
172
- chrs.each_char do |chr|
173
- out << Properties.char2codepoint(chr)
174
- end
175
- out.join(' ')
176
- end
177
-
178
- # Convert from decimal code point to standardized format hexadecimal code point
179
- # @param int_cp [Integer] Code point in decimal format
180
- # @return [String] code point in Unicode format
181
- # @example
182
- # Unisec::Properties.intcp2stdhexcp(128640) # => "U+1F680"
183
- def self.deccp2stdhexcp(int_cp)
184
- "U+#{format('%.4x', int_cp).upcase}"
185
- end
186
155
  end
187
156
  end
data/lib/unisec/rugrep.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'twitter_cldr'
4
4
  require 'paint'
5
+ require 'unisec/utils'
5
6
 
6
7
  module Unisec
7
8
  # Ruby grep : Ruby regular expression search for Unicode code point names
@@ -64,7 +65,7 @@ module Unisec
64
65
  def self.regrep_display(regexp)
65
66
  codepoints = regrep(regexp)
66
67
  codepoints.each do |cp|
67
- puts "#{Properties.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
68
+ puts "#{Utils::Integer.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
68
69
  end
69
70
  nil
70
71
  end
@@ -118,7 +119,7 @@ module Unisec
118
119
  def self.regrep_display_slow(regexp)
119
120
  codepoints = regrep_slow(regexp)
120
121
  codepoints.each do |cp|
121
- puts "#{Properties.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
122
+ puts "#{Utils::Integer.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
122
123
  end
123
124
  nil
124
125
  end
data/lib/unisec/utils.rb CHANGED
@@ -20,6 +20,35 @@ class Integer
20
20
  end
21
21
  end
22
22
 
23
+ class String
24
+ # Convert a string to a boolean
25
+ # @return [TrueClass|FalseClass]
26
+ # @example
27
+ # "true".to_bool # => true
28
+ def to_bool
29
+ case to_s.chomp.downcase
30
+ when 'true', 'yes', 'y', '1'
31
+ true
32
+ when 'false', 'no', 'n', '0'
33
+ false
34
+ else
35
+ raise ArgumentError, "invalid value for Boolean: #{str.inspect}"
36
+ end
37
+ end
38
+ end
39
+
40
+ class Range
41
+ # Is a range included in another range? Are all values of range B included in range A?
42
+ # @param range [Range]
43
+ # @return [TrueClass|FalseClass]
44
+ # @example
45
+ # (1..10).include_range?(2..11) # => false
46
+ # (1..10).include_range?(2..4) # => true
47
+ def include_range?(range)
48
+ self.begin <= range.begin && self.end >= range.end
49
+ end
50
+ end
51
+
23
52
  module Unisec
24
53
  # Generic stuff not Unicode-related that can be re-used.
25
54
  module Utils
@@ -108,6 +137,71 @@ module Unisec
108
137
  def self.grapheme_reverse(str)
109
138
  str.grapheme_clusters.reverse.join
110
139
  end
140
+
141
+ # Display the code point in Unicode format for a given character (code point as string)
142
+ # @param chr [String] Unicode code point (as character / string)
143
+ # @return [String] code point in Unicode format
144
+ # @example
145
+ # Unisec::Properties.char2codepoint('💎') # => "U+1F48E"
146
+ def self.char2codepoint(chr)
147
+ Integer.deccp2stdhexcp(chr.codepoints.first)
148
+ end
149
+
150
+ # Display the code points in Unicode format for the given characters (code points as string)
151
+ # @param chrs [String] Unicode code points (as characters / string)
152
+ # @return [String] code points in Unicode format
153
+ # @example
154
+ # Unisec::Properties.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
155
+ # Unisec::Properties.chars2codepoints("🧑‍🌾") # => "U+1F9D1 U+200D U+1F33E"
156
+ def self.chars2codepoints(chrs)
157
+ out = []
158
+ chrs.each_char do |chr|
159
+ out << char2codepoint(chr)
160
+ end
161
+ out.join(' ')
162
+ end
163
+
164
+ # Convert a string of hex encoded Unicode code points range to actual
165
+ # integer Ruby range.
166
+ # @param range_str [String] Unicode code points range as in data/Blocks.txt
167
+ # @return [Range]
168
+ # @example
169
+ # Unisec::Utils::String::to_range('0080..00FF') # => 128..255
170
+ def self.to_range(range_str)
171
+ ::Range.new(*range_str.split('..').map { |x| x.hex2dec.to_i })
172
+ end
173
+
174
+ # Convert from standardized format hexadecimal code point to decimal code point
175
+ # @param std_hex_cp [String] Code point in standardized hexadecimal format
176
+ # @return [Integer] Code point in decimal format
177
+ # @example
178
+ # Unisec::Utils::String.stdhexcp2deccp('U+2026') # => 8230
179
+ def self.stdhexcp2deccp(std_hex_cp)
180
+ hex = "0x#{std_hex_cp[2..]}" # replace U+ prefix with 0x
181
+ convert_to_integer(hex)
182
+ end
183
+ end
184
+
185
+ module Integer
186
+ # Convert from decimal code point to standardized format hexadecimal code point
187
+ # @param int_cp [Integer] Code point in decimal format
188
+ # @return [String] code point in Unicode format
189
+ # @example
190
+ # Unisec::Utils::Integer.deccp2stdhexcp(128640) # => "U+1F680"
191
+ def self.deccp2stdhexcp(int_cp)
192
+ "U+#{format('%.4x', int_cp).upcase}"
193
+ end
194
+ end
195
+
196
+ module Range
197
+ # Convert a (integer) range to a range of Unicode code points
198
+ # @param range [::Range]
199
+ # @return [String]
200
+ # @example
201
+ # Unisec::Utils::Range.range2codepoint_range(1048576..1114111) # => "U+100000 - U+10FFFF"
202
+ def self.range2codepoint_range(range)
203
+ "#{Integer.deccp2stdhexcp(range.begin)} - #{Integer.deccp2stdhexcp(range.end)}"
204
+ end
111
205
  end
112
206
  end
113
207
  end
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Unisec
4
4
  # Version of unisec library and app
5
- VERSION = '0.0.6'
5
+ VERSION = '0.0.7'
6
6
  end
@@ -55,6 +55,10 @@ module Unisec
55
55
  ucd_derivedname: {
56
56
  version: Unisec::Rugrep.ucd_derivedname_version,
57
57
  label: 'UCD (data/DerivedName.txt)'
58
+ },
59
+ ucd_blocks: {
60
+ version: Unisec::Blocks.ucd_blocks_version,
61
+ label: 'UCD (data/Blocks.txt)'
58
62
  }
59
63
  }
60
64
  end
@@ -81,6 +85,7 @@ module Unisec
81
85
  colorize.call(:twittercldr_cldr) +
82
86
  colorize.call(:ruby_unicode_emoji) +
83
87
  colorize.call(:ucd_derivedname) +
88
+ colorize.call(:ucd_blocks) +
84
89
  Paint["\nGems:\n", :underline] +
85
90
  colorize.call(:unisec) +
86
91
  colorize.call(:twittercldr) +
data/lib/unisec.rb CHANGED
@@ -3,9 +3,12 @@
3
3
  require 'unisec/version'
4
4
 
5
5
  require 'unisec/bidi'
6
+ require 'unisec/blocks'
6
7
  require 'unisec/confusables'
8
+ require 'unisec/decdump'
7
9
  require 'unisec/hexdump'
8
10
  require 'unisec/normalization'
11
+ require 'unisec/planes'
9
12
  require 'unisec/properties'
10
13
  require 'unisec/rugrep'
11
14
  require 'unisec/size'