unisec 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/unisec/utils.rb CHANGED
@@ -20,33 +20,66 @@ class Integer
20
20
  end
21
21
  end
22
22
 
23
+ class String
24
+ # Convert a string to a boolean
25
+ # @return [TrueClass|FalseClass]
26
+ # @example
27
+ # "true".to_bool # => true
28
+ def to_bool
29
+ case to_s.chomp.downcase
30
+ when 'true', 'yes', 'y', '1'
31
+ true
32
+ when 'false', 'no', 'n', '0'
33
+ false
34
+ else
35
+ raise ArgumentError, "invalid value for Boolean: #{str.inspect}"
36
+ end
37
+ end
38
+ end
39
+
40
+ class Range
41
+ # Is a range included in another range? Are all values of range B included in range A?
42
+ # @param range [Range]
43
+ # @return [TrueClass|FalseClass]
44
+ # @example
45
+ # (1..10).include_range?(2..11) # => false
46
+ # (1..10).include_range?(2..4) # => true
47
+ def include_range?(range)
48
+ self.begin <= range.begin && self.end >= range.end
49
+ end
50
+ end
51
+
23
52
  module Unisec
24
53
  # Generic stuff not Unicode-related that can be re-used.
25
54
  module Utils
26
55
  # About string conversion and manipulation.
27
56
  module String
28
57
  # Convert a string input into the chosen type.
29
- # @param input [String] If the target type is `:integer`, the string must represent a number encoded in
30
- # hexadecimal, decimal, binary. If it's a Unicode string, only the first code point will be taken into account.
31
- # @param target_type [Symbol] Convert to the chosen type. Currently only supports `:integer`.
58
+ # @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
59
+ # The input must represent a character encoded in hexadecimal, decimal, binary or standard code point format.
60
+ # See {convert_to_integer} and {convert_to_char} for detailed examples.
61
+ # @param target_type [Symbol] Convert to the chosen type. Currently only supports `:integer` and `:char`.
32
62
  # @return [Variable] The type of the output depends on the chosen `target_type`.
33
63
  # @example
34
64
  # Unisec::Utils::String.convert('0x1f4a9', :integer) # => 128169
65
+ # Unisec::Utils::String.convert('0x1f4a9', :char) # => "💩"
35
66
  def self.convert(input, target_type)
36
67
  case target_type
37
68
  when :integer
38
69
  convert_to_integer(input)
70
+ when :char
71
+ convert_to_char(input)
39
72
  else
40
73
  raise TypeError, "Target type \"#{target_type}\" not avaible"
41
74
  end
42
75
  end
43
76
 
44
- # Internal method used for {.convert}.
77
+ # Internal method used for {convert}.
45
78
  #
46
79
  # Convert a string input into integer.
47
- # @param input [String] The string must represent a number encoded in hexadecimal, decimal, binary. If it's a
48
- # Unicode string, only the first code point will be taken into account. The input type is determined
49
- # automatically based on the prefix.
80
+ # @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
81
+ # The input must represent a character encoded in hexadecimal, decimal, binary, standard code point format.
82
+ # The input type is determined automatically based on the prefix.
50
83
  # @return [Integer]
51
84
  # @example
52
85
  # # Hexadecimal
@@ -57,10 +90,14 @@ module Unisec
57
90
  # Unisec::Utils::String.convert_to_integer('0b11111010010101001') # => 128169
58
91
  # # Unicode string
59
92
  # Unisec::Utils::String.convert_to_integer('💩') # => 128169
93
+ # # Standardized format of hexadecimal code point
94
+ # Unisec::Utils::String.convert_to_integer('U+1F4A9') # => 128169
60
95
  def self.convert_to_integer(input)
61
96
  case autodetect(input)
62
97
  when :hexadecimal
63
98
  input.hex2dec(prefix: '0x').to_i
99
+ when :stdcp
100
+ input.hex2dec(prefix: 'U+').to_i
64
101
  when :decimal
65
102
  input.to_i
66
103
  when :binary
@@ -72,11 +109,38 @@ module Unisec
72
109
  end
73
110
  end
74
111
 
112
+ # Internal method used for {convert}.
113
+ #
114
+ # Convert a string input into a character.
115
+ # @param input [String] If the input is a Unicode string, only the first code point will be taken into account.
116
+ # The input must represent a character encoded in hexadecimal, decimal, binary, standard code point format.
117
+ # The input type is determined automatically based on the prefix.
118
+ # @return [String]
119
+ # @example
120
+ # # Hexadecimal
121
+ # Unisec::Utils::String.convert_to_char('0x1f4a9') # => "💩"
122
+ # # Decimal
123
+ # Unisec::Utils::String.convert_to_char('0d128169') # => "💩"
124
+ # # Binary
125
+ # Unisec::Utils::String.convert_to_char('0b11111010010101001') # => "💩"
126
+ # # Unicode string
127
+ # Unisec::Utils::String.convert_to_char('💩') # => "💩"
128
+ # # Standardized format of hexadecimal code point
129
+ # Unisec::Utils::String.convert_to_char('U+1F4A9') # => "💩"
130
+ def self.convert_to_char(input)
131
+ case autodetect(input)
132
+ when :hexadecimal, :stdcp, :decimal, :binary, :string
133
+ [convert(input, :integer)].pack('U')
134
+ else
135
+ raise TypeError, "Input \"#{input}\" is not of the expected type"
136
+ end
137
+ end
138
+
75
139
  # Internal method used for {.convert}.
76
140
  #
77
141
  # Autodetect the representation type of the string input.
78
142
  # @param str [String] Input.
79
- # @return [Symbol] the detected type: `:hexadecimal`, `:decimal`, `:binary`, `:string`.
143
+ # @return [Symbol] the detected type: `:hexadecimal`, `:decimal`, `:binary`, `:string`, :stdcp.
80
144
  # @example
81
145
  # # Hexadecimal
82
146
  # Unisec::Utils::String.autodetect('0x1f4a9') # => :hexadecimal
@@ -86,10 +150,14 @@ module Unisec
86
150
  # Unisec::Utils::String.autodetect('0b11111010010101001') # => :binary
87
151
  # # Unicode string
88
152
  # Unisec::Utils::String.autodetect('💩') # => :string
153
+ # # Standardized format of hexadecimal code point
154
+ # Unisec::Utils::String.autodetect('U+1F4A9') # => :stdcp
89
155
  def self.autodetect(str)
90
156
  case str
91
- when /0x[0-9a-fA-F]/
157
+ when /0x[0-9a-fA-F]+/
92
158
  :hexadecimal
159
+ when /U\+[0-9A-F]+/
160
+ :stdcp
93
161
  when /0d[0-9]+/
94
162
  :decimal
95
163
  when /0b[0-1]+/
@@ -108,6 +176,72 @@ module Unisec
108
176
  def self.grapheme_reverse(str)
109
177
  str.grapheme_clusters.reverse.join
110
178
  end
179
+
180
+ # Display the code point in Unicode format for a given character (code point as string)
181
+ # @param chr [String] Unicode code point (as character / string)
182
+ # @return [String] code point in Unicode format
183
+ # @todo Replace this method by target type :stdcp in String.convert()
184
+ # @example
185
+ # Unisec::Utils::String.char2codepoint('💎') # => "U+1F48E"
186
+ def self.char2codepoint(chr)
187
+ Integer.deccp2stdhexcp(chr.codepoints.first)
188
+ end
189
+
190
+ # Display the code points in Unicode format for the given characters (code points as string)
191
+ # @param chrs [String] Unicode code points (as characters / string)
192
+ # @return [String] code points in Unicode format
193
+ # @example
194
+ # Unisec::Utils::String.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
195
+ # Unisec::Utils::String.chars2codepoints("🧑‍🌾") # => "U+1F9D1 U+200D U+1F33E"
196
+ def self.chars2codepoints(chrs)
197
+ out = []
198
+ chrs.each_char do |chr|
199
+ out << char2codepoint(chr)
200
+ end
201
+ out.join(' ')
202
+ end
203
+
204
+ # Display the code points in integer format for the given characters (code points as string)
205
+ # @param chrs [String] Unicode code points (as characters / string)
206
+ # @return [String] code points in integer format
207
+ # @example
208
+ # Unisec::Utils::String.chars2intcodepoints('I 💕 Ruby 💎') # => "73 32 128149 32 82 117 98 121 32 128142"
209
+ def self.chars2intcodepoints(chrs)
210
+ chrs.codepoints.map(&:to_s).join(' ')
211
+ end
212
+
213
+ # Convert a string of hex encoded Unicode code points range to actual
214
+ # integer Ruby range.
215
+ # @param range_str [String] Unicode code points range as in data/Blocks.txt
216
+ # @return [Range]
217
+ # @example
218
+ # Unisec::Utils::String::to_range('0080..00FF') # => 128..255
219
+ def self.to_range(range_str)
220
+ ::Range.new(*range_str.split('..').map { |x| x.hex2dec.to_i })
221
+ end
222
+ end
223
+
224
+ module Integer
225
+ # Convert from decimal code point to standardized format hexadecimal code point
226
+ # @param int_cp [Integer] Code point in decimal format
227
+ # @return [String] code point in Unicode format
228
+ # @todo Replace this method by the Integer.convert()
229
+ # @example
230
+ # Unisec::Utils::Integer.deccp2stdhexcp(128640) # => "U+1F680"
231
+ def self.deccp2stdhexcp(int_cp)
232
+ "U+#{format('%.4x', int_cp).upcase}"
233
+ end
234
+ end
235
+
236
+ module Range
237
+ # Convert a (integer) range to a range of Unicode code points
238
+ # @param range [::Range]
239
+ # @return [String]
240
+ # @example
241
+ # Unisec::Utils::Range.range2codepoint_range(1048576..1114111) # => "U+100000 - U+10FFFF"
242
+ def self.range2codepoint_range(range)
243
+ "#{Integer.deccp2stdhexcp(range.begin)} - #{Integer.deccp2stdhexcp(range.end)}"
244
+ end
111
245
  end
112
246
  end
113
247
  end
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Unisec
4
4
  # Version of unisec library and app
5
- VERSION = '0.0.6'
5
+ VERSION = '0.0.8'
6
6
  end
@@ -55,6 +55,10 @@ module Unisec
55
55
  ucd_derivedname: {
56
56
  version: Unisec::Rugrep.ucd_derivedname_version,
57
57
  label: 'UCD (data/DerivedName.txt)'
58
+ },
59
+ ucd_blocks: {
60
+ version: Unisec::Blocks.ucd_blocks_version,
61
+ label: 'UCD (data/Blocks.txt)'
58
62
  }
59
63
  }
60
64
  end
@@ -81,6 +85,7 @@ module Unisec
81
85
  colorize.call(:twittercldr_cldr) +
82
86
  colorize.call(:ruby_unicode_emoji) +
83
87
  colorize.call(:ucd_derivedname) +
88
+ colorize.call(:ucd_blocks) +
84
89
  Paint["\nGems:\n", :underline] +
85
90
  colorize.call(:unisec) +
86
91
  colorize.call(:twittercldr) +
data/lib/unisec.rb CHANGED
@@ -3,9 +3,12 @@
3
3
  require 'unisec/version'
4
4
 
5
5
  require 'unisec/bidi'
6
+ require 'unisec/blocks'
6
7
  require 'unisec/confusables'
8
+ require 'unisec/decdump'
7
9
  require 'unisec/hexdump'
8
10
  require 'unisec/normalization'
11
+ require 'unisec/planes'
9
12
  require 'unisec/properties'
10
13
  require 'unisec/rugrep'
11
14
  require 'unisec/size'
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unisec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexandre ZANNI
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-05-17 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: ctf-party
@@ -16,14 +15,14 @@ dependencies:
16
15
  requirements:
17
16
  - - "~>"
18
17
  - !ruby/object:Gem::Version
19
- version: '3.0'
18
+ version: '5.0'
20
19
  type: :runtime
21
20
  prerelease: false
22
21
  version_requirements: !ruby/object:Gem::Requirement
23
22
  requirements:
24
23
  - - "~>"
25
24
  - !ruby/object:Gem::Version
26
- version: '3.0'
25
+ version: '5.0'
27
26
  - !ruby/object:Gem::Dependency
28
27
  name: dry-cli
29
28
  requirement: !ruby/object:Gem::Requirement
@@ -58,34 +57,28 @@ dependencies:
58
57
  requirements:
59
58
  - - "~>"
60
59
  - !ruby/object:Gem::Version
61
- version: '6.11'
62
- - - ">="
63
- - !ruby/object:Gem::Version
64
- version: 6.11.5
60
+ version: '6.13'
65
61
  type: :runtime
66
62
  prerelease: false
67
63
  version_requirements: !ruby/object:Gem::Requirement
68
64
  requirements:
69
65
  - - "~>"
70
66
  - !ruby/object:Gem::Version
71
- version: '6.11'
72
- - - ">="
73
- - !ruby/object:Gem::Version
74
- version: 6.11.5
67
+ version: '6.13'
75
68
  - !ruby/object:Gem::Dependency
76
69
  name: unicode-confusable
77
70
  requirement: !ruby/object:Gem::Requirement
78
71
  requirements:
79
72
  - - "~>"
80
73
  - !ruby/object:Gem::Version
81
- version: '1.9'
74
+ version: '1.12'
82
75
  type: :runtime
83
76
  prerelease: false
84
77
  version_requirements: !ruby/object:Gem::Requirement
85
78
  requirements:
86
79
  - - "~>"
87
80
  - !ruby/object:Gem::Version
88
- version: '1.9'
81
+ version: '1.12'
89
82
  description: 'Toolkit for security research manipulating Unicode: confusables, homoglyphs,
90
83
  hexdump, code point, UTF-8, UTF-16, UTF-32, properties, regexp search, size, grapheme,
91
84
  surrogates, version, ICU, CLDR, UCD, BiDi, normalization'
@@ -97,22 +90,28 @@ extra_rdoc_files: []
97
90
  files:
98
91
  - LICENSE
99
92
  - bin/unisec
93
+ - data/Blocks.txt
100
94
  - data/DerivedName.txt
101
95
  - lib/unisec.rb
102
96
  - lib/unisec/bidi.rb
97
+ - lib/unisec/blocks.rb
103
98
  - lib/unisec/cli/bidi.rb
99
+ - lib/unisec/cli/blocks.rb
104
100
  - lib/unisec/cli/cli.rb
105
101
  - lib/unisec/cli/confusables.rb
106
- - lib/unisec/cli/hexdump.rb
102
+ - lib/unisec/cli/dump.rb
107
103
  - lib/unisec/cli/normalization.rb
104
+ - lib/unisec/cli/planes.rb
108
105
  - lib/unisec/cli/properties.rb
109
106
  - lib/unisec/cli/rugrep.rb
110
107
  - lib/unisec/cli/size.rb
111
108
  - lib/unisec/cli/surrogates.rb
112
109
  - lib/unisec/cli/versions.rb
113
110
  - lib/unisec/confusables.rb
111
+ - lib/unisec/decdump.rb
114
112
  - lib/unisec/hexdump.rb
115
113
  - lib/unisec/normalization.rb
114
+ - lib/unisec/planes.rb
116
115
  - lib/unisec/properties.rb
117
116
  - lib/unisec/rugrep.rb
118
117
  - lib/unisec/size.rb
@@ -120,18 +119,17 @@ files:
120
119
  - lib/unisec/utils.rb
121
120
  - lib/unisec/version.rb
122
121
  - lib/unisec/versions.rb
123
- homepage: https://github.com/Acceis/unisec
122
+ homepage: https://github.com/noraj/unisec
124
123
  licenses:
125
124
  - MIT
126
125
  metadata:
127
126
  yard.run: yard
128
- bug_tracker_uri: https://github.com/Acceis/unisec/issues
129
- changelog_uri: https://github.com/Acceis/unisec/releases
130
- documentation_uri: https://acceis.github.io/unisec/
131
- homepage_uri: https://github.com/Acceis/unisec
132
- source_code_uri: https://github.com/Acceis/unisec/
127
+ bug_tracker_uri: https://github.com/noraj/unisec/issues
128
+ changelog_uri: https://github.com/noraj/unisec/releases
129
+ documentation_uri: https://noraj.github.io/unisec/
130
+ homepage_uri: https://github.com/noraj/unisec
131
+ source_code_uri: https://github.com/noraj/unisec/
133
132
  rubygems_mfa_required: 'true'
134
- post_install_message:
135
133
  rdoc_options: []
136
134
  require_paths:
137
135
  - lib
@@ -139,18 +137,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
139
137
  requirements:
140
138
  - - ">="
141
139
  - !ruby/object:Gem::Version
142
- version: 3.0.0
140
+ version: 3.2.0
143
141
  - - "<"
144
142
  - !ruby/object:Gem::Version
145
- version: '4.0'
143
+ version: '5.0'
146
144
  required_rubygems_version: !ruby/object:Gem::Requirement
147
145
  requirements:
148
146
  - - ">="
149
147
  - !ruby/object:Gem::Version
150
148
  version: '0'
151
149
  requirements: []
152
- rubygems_version: 3.5.3
153
- signing_key:
150
+ rubygems_version: 4.0.3
154
151
  specification_version: 4
155
152
  summary: Unicode Security Toolkit
156
153
  test_files: []
@@ -1,47 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'dry/cli'
4
- require 'unisec'
5
-
6
- module Unisec
7
- module CLI
8
- module Commands
9
- # CLI command `unisec hexdumps` for the class {Unisec::Hexdump} from the lib.
10
- #
11
- # Example:
12
- #
13
- # ```plaintext
14
- # $ unisec hexdump "ACCEIS"
15
- # UTF-8: 41 43 43 45 49 53
16
- # UTF-16BE: 0041 0043 0043 0045 0049 0053
17
- # UTF-16LE: 4100 4300 4300 4500 4900 5300
18
- # UTF-32BE: 00000041 00000043 00000043 00000045 00000049 00000053
19
- # UTF-32LE: 41000000 43000000 43000000 45000000 49000000 53000000
20
- #
21
- # $unisec hexdump "ACCEIS" --enc utf16le
22
- # 4100 4300 4300 4500 4900 5300
23
- # ```
24
- class Hexdump < Dry::CLI::Command
25
- desc 'Hexdump in all Unicode encodings'
26
-
27
- argument :input, required: true,
28
- desc: 'String input. Read from STDIN if equal to -.'
29
-
30
- option :enc, default: nil, values: %w[utf8 utf16be utf16le utf32be utf32le],
31
- desc: 'Output only in the specified encoding.'
32
-
33
- # Hexdump of all Unicode encodings.
34
- # @param input [String] Input string to encode
35
- def call(input: nil, **options)
36
- input = $stdin.read.chomp if input == '-'
37
- if options[:enc].nil?
38
- puts Unisec::Hexdump.new(input).display
39
- else
40
- # using send() is safe here thanks to the value whitelist
41
- puts Unisec::Hexdump.send(options[:enc], input)
42
- end
43
- end
44
- end
45
- end
46
- end
47
- end