unisec 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1496d192c32a345de077d1643d041f15f960095f4931165a19b1250a52c5897e
4
+ data.tar.gz: 553ce1f9fa9d21895d31c144dc15fc73483a7301ececa1046138eec9b51a8707
5
+ SHA512:
6
+ metadata.gz: ae33d34f6bdf6ae0c5a3dd97ffdb3beaf9b2b7a3d3d48697502b272a8db4fc17a6f91bec5596b12c83d7cc609f0b3f7c9a80b7da09d5584537388868364810a5
7
+ data.tar.gz: c440d1868a5a97a8d6a126c66541d5627ea039086f85d90347dcb52910c32ebc9d1c97f28ce3e0fef62b209f0c982e24414edce98a32406bbc1f802f42bd8ef4
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Alexandre ZANNI at ACCEIS
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/bin/unisec ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'unisec'
5
+ require 'unisec/cli/cli'
6
+
7
+ Dry::CLI.new(Unisec::CLI::Commands).call
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'unisec/cli/surrogates'
4
+ require 'unisec/cli/hexdump'
5
+ require 'unisec/cli/properties'
6
+ require 'unisec/cli/confusables'
7
+
8
+ module Unisec
9
+ # Module used to create the CLI for the executable
10
+ module CLI
11
+ # Registered commands for the CLI
12
+ module Commands
13
+ extend Dry::CLI::Registry
14
+
15
+ # Mapping between the (sub-)commands as seen by the user
16
+ # on the command-line interface and the CLI modules in the lib
17
+ register 'surrogates to', Surrogates::To
18
+ register 'surrogates from', Surrogates::From
19
+ register 'hexdump', Hexdump
20
+ register 'properties list', Properties::List
21
+ register 'properties codepoints', Properties::Codepoints
22
+ register 'properties char', Properties::Char
23
+ register 'confusables list', Confusables::List
24
+ register 'confusables randomize', Confusables::Randomize
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+ require 'unisec/utils'
6
+
7
+ module Unisec
8
+ module CLI
9
+ module Commands
10
+ # CLI sub-commands `unisec confusables xxx` for the class {Unisec::Confusables} from the lib.
11
+ module Confusables
12
+ # Command `unisec confusables list`
13
+ #
14
+ # Example:
15
+ #
16
+ # ```plaintext
17
+ # $ unisec confusables list '!'
18
+ # U+FF01 ! FULLWIDTH EXCLAMATION MARK
19
+ # U+01C3 ǃ LATIN LETTER RETROFLEX CLICK
20
+ # …
21
+ # ```
22
+ class List < Dry::CLI::Command
23
+ desc 'List confusables characters for a given character'
24
+
25
+ argument :character, required: true, desc: 'Unicode code point (as string)'
26
+ option :map, default: true, values: %w[true false],
27
+ desc: 'Allows partial mapping, includes confusable where the given chart is a part of'
28
+
29
+ # List confusables characters for a given character
30
+ # @param character [String] the character to search confusables for
31
+ # @option options [Boolean] :map allows partial mapping, includes confusable where the given chart is a
32
+ # part of
33
+ def call(character: nil, **options)
34
+ to_bool = ->(str) { ['true', true].include?(str) }
35
+ Unisec::Confusables.list_display(character, map: to_bool.call(options.fetch(:map)))
36
+ end
37
+ end
38
+
39
+ # Command `unisec confusables randomize`
40
+ #
41
+ # Example:
42
+ #
43
+ # ```plaintext
44
+ # $ unisec confusables randomize noraj
45
+ # Original: noraj
46
+ # Transformed: ռ໐𝘳𝜶𝙟
47
+ # …
48
+ # ```
49
+ class Randomize < Dry::CLI::Command
50
+ desc 'Replace all characters from a string with random confusables when possible'
51
+
52
+ argument :str, required: true, desc: 'Unicode string'
53
+
54
+ # Replace all characters from a string with random confusables when possible
55
+ # @param str [String] Unicode string
56
+ def call(str: nil, **)
57
+ Unisec::Confusables.randomize_display(str)
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+
6
+ module Unisec
7
+ module CLI
8
+ module Commands
9
+ # CLI command `unisec hexdumps` for the class {Unisec::Hexdump} from the lib.
10
+ #
11
+ # Example:
12
+ #
13
+ # ```plaintext
14
+ # $ unisec hexdump "ACCEIS"
15
+ # UTF-8: 41 43 43 45 49 53
16
+ # UTF-16BE: 0041 0043 0043 0045 0049 0053
17
+ # UTF-16LE: 4100 4300 4300 4500 4900 5300
18
+ # UTF-32BE: 00000041 00000043 00000043 00000045 00000049 00000053
19
+ # UTF-32LE: 41000000 43000000 43000000 45000000 49000000 53000000
20
+ # ```
21
+ class Hexdump < Dry::CLI::Command
22
+ desc 'Hexdump in all Unicode encodings'
23
+
24
+ argument :input, required: true,
25
+ desc: 'String input'
26
+
27
+ # Hexdump of all Unicode encodings.
28
+ # @param input [String] Input string to encode
29
+ def call(input: nil, **)
30
+ puts Unisec::Hexdump.new(input).display
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+ require 'unisec/utils'
6
+
7
+ module Unisec
8
+ module CLI
9
+ module Commands
10
+ # CLI sub-commands `unisec properties xxx` for the class {Unisec::Properties} from the lib.
11
+ module Properties
12
+ # Command `unisec properties list`
13
+ #
14
+ # Example:
15
+ #
16
+ # ```plaintext
17
+ # $ unisec properties list
18
+ # ASCII_Hex_Digit
19
+ # Age
20
+ # Alphabetic
21
+ # …
22
+ # ```
23
+ class List < Dry::CLI::Command
24
+ desc 'List all Unicode properties'
25
+
26
+ # List Unicode properties name
27
+ def call(**)
28
+ Unisec::Properties.list.each do |p|
29
+ puts p
30
+ end
31
+ end
32
+ end
33
+
34
+ # Command `unisec properties codepoints`
35
+ #
36
+ # Example:
37
+ #
38
+ # ```plaintext
39
+ # $ unisec properties codepoints Bidi_Control
40
+ # U+61C ؜ ARABIC LETTER MARK
41
+ # …
42
+ # ```
43
+ class Codepoints < Dry::CLI::Command
44
+ desc 'List all code points for a given property'
45
+
46
+ argument :property, required: true, desc: 'Unicode property name'
47
+
48
+ # List code points matching a Unicode property
49
+ # @param property [String] property name
50
+ def call(property: nil, **)
51
+ Unisec::Properties.codepoints_display(property)
52
+ end
53
+ end
54
+
55
+ # Command `unisec properties char`
56
+ #
57
+ # Example:
58
+ #
59
+ # ```plaintext
60
+ # $ unisec properties char é
61
+ # Name: LATIN SMALL LETTER E WITH ACUTE
62
+ # Code Point: U+00E9
63
+ #
64
+ # Block: Latin-1 Supplement
65
+ # …
66
+ # ```
67
+ class Char < Dry::CLI::Command
68
+ desc 'Returns all properties of a given Unicode character (code point as string)'
69
+
70
+ argument :character, required: true, desc: 'Unicode character'
71
+ option :extended, default: false, values: %w[true false], desc: 'Show all properties'
72
+
73
+ # Returns all properties of a given Unicode character (code point as string)
74
+ # @param character [String] Unicode code point (as character / string)
75
+ # @option options [Boolean] :extended Show all properties
76
+ def call(character: nil, **options)
77
+ to_bool = ->(str) { str == 'true' }
78
+ Unisec::Properties.char_display(character, extended: to_bool.call(options.fetch(:extended)))
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+ require 'unisec/utils'
6
+
7
+ module Unisec
8
+ module CLI
9
+ module Commands
10
+ # CLI sub-commands `unisec surrogates xxx` for the class {Unisec::Surrogates} from the lib.
11
+ module Surrogates
12
+ # Command `unisec surrogates from`
13
+ #
14
+ # Example:
15
+ #
16
+ # ```plaintext
17
+ # $ unisec surrogates from 0xD801 0xDC37
18
+ # Char: 𐐷
19
+ # Code Point: 0x10437, 0d66615, 0b10000010000110111
20
+ # High Surrogate: 0xD801, 0d55297, 0b1101100000000001
21
+ # Low Surrogate: 0xDC37, 0d56375, 0b1101110000110111
22
+ # ```
23
+ class From < Dry::CLI::Command
24
+ desc 'Code point ⬅️ Surrogates'
25
+
26
+ argument :high, required: true,
27
+ desc: 'High surrogate (in hexadecimal (0xXXXX), decimal (0dXXXX), binary (0bXXXX) or as text)'
28
+ argument :low, required: true,
29
+ desc: 'Low surrogate (in hexadecimal (0xXXXX), decimal (0dXXXX), binary (0bXXXX) or as text)'
30
+
31
+ # Calculate the Unicode code point based on the surrogates.
32
+ # @param high [String] decimal high surrogate
33
+ # @param low [String] decimal low surrogate
34
+ def call(high: nil, low: nil, **)
35
+ puts Unisec::Surrogates.new(Unisec::Utils::String.convert(high, :integer),
36
+ Unisec::Utils::String.convert(low, :integer)).display
37
+ end
38
+ end
39
+
40
+ # Command `unisec surrogates to`
41
+ #
42
+ # Example:
43
+ #
44
+ # ```plaintext
45
+ # $ unisec surrogates to 0x1F4A9
46
+ # Char: 💩
47
+ # Code Point: 0x1F4A9, 0d128169, 0b11111010010101001
48
+ # High Surrogate: 0xD83D, 0d55357, 0b1101100000111101
49
+ # Low Surrogate: 0xDCA9, 0d56489, 0b1101110010101001
50
+ # ```
51
+ class To < Dry::CLI::Command
52
+ desc 'Code point ➡️ Surrogates'
53
+
54
+ argument :codepoint, required: true,
55
+ desc: 'One code point (character) (in hexadecimal (0xXXXX), decimal (0dXXXX), binary ' \
56
+ '(0bXXXX) or as text)'
57
+
58
+ # Calculate the surrogates based on the Unicode code point.
59
+ # @param codepoint [String] decimal codepoint
60
+ def call(codepoint: nil, **)
61
+ puts Unisec::Surrogates.new(Unisec::Utils::String.convert(codepoint, :integer)).display
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'unicode/confusable'
4
+ require 'twitter_cldr'
5
+
6
+ module Unisec
7
+ # Operations about Unicode confusable characters (homoglyphs).
8
+ class Confusables
9
+ # List confusables characters for a given character
10
+ # @param chr [String] the character to search confusables for
11
+ # @param map [Boolean] allows partial mapping, includes confusable where the given chart is a part of
12
+ # @return [Array<String>] list of confusables
13
+ # @example
14
+ # Unisec::Confusables.list('!') # => ["!", "ǃ", "ⵑ", "‼", "⁉", "⁈"]
15
+ # Unisec::Confusables.list('!', map: false) # => ["!", "ǃ", "ⵑ"]
16
+ def self.list(chr, map: true)
17
+ Unicode::Confusable.list(chr, map)
18
+ end
19
+
20
+ # Display a CLI-friendly output listing all confusables corresponding to a character (code point)
21
+ # @param chr [String] the character to search confusables for
22
+ # @param map [Boolean] allows partial mapping, includes confusable where the given chart is a part of
23
+ def self.list_display(chr, map: true)
24
+ Confusables.list(chr, map: map).each do |confu|
25
+ puts "#{Properties.char2codepoint(confu).ljust(9)} #{confu.ljust(4)} " \
26
+ "#{TwitterCldr::Shared::CodePoint.get(confu.codepoints.first).name}"
27
+ end
28
+ nil
29
+ end
30
+
31
+ # Replace all characters with random confusables when possible.
32
+ # @param str [String] Unicode string
33
+ # @return [String] input randomized with confusables
34
+ # @example
35
+ # Unisec::Confusables.randomize('noraj') # => "𝓃ⲟ𝓇𝒶j"
36
+ # Unisec::Confusables.randomize('noraj') # => "𝗻૦𝚛⍺𝐣"
37
+ # Unisec::Confusables.randomize('noraj') # => "𝔫𞺄𝕣⍺j"
38
+ def self.randomize(str)
39
+ out = ''
40
+ str.each_char do |chr|
41
+ confu = Confusables.list(chr, map: false).sample
42
+ out += confu.nil? ? chr : confu
43
+ end
44
+ out
45
+ end
46
+
47
+ # Display a CLI-friendly output of a string where characters are replaces with random confusables
48
+ # @param str [String] Unicode string
49
+ def self.randomize_display(str)
50
+ display = ->(key, value) { puts Paint[key, :red, :bold].ljust(23) + " #{value}" }
51
+ display.call('Original:', str)
52
+ display.call('Transformed:', Unisec::Confusables.randomize(str))
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ctf_party'
4
+
5
+ module Unisec
6
+ # Hexdump of all Unicode encodings.
7
+ class Hexdump
8
+ # UTF-8 hexdump
9
+ # @return [String] UTF-8 hexdump
10
+ attr_reader :utf8
11
+
12
+ # UTF-16BE hexdump
13
+ # @return [String] UTF-16BE hexdump
14
+ attr_reader :utf16be
15
+
16
+ # UTF-16LE hexdump
17
+ # @return [String] UTF-16LE hexdump
18
+ attr_reader :utf16le
19
+
20
+ # UTF-32BE hexdump
21
+ # @return [String] UTF-32BE hexdump
22
+ attr_reader :utf32be
23
+
24
+ # UTF-32LE hexdump
25
+ # @return [String] UTF-32LE hexdump
26
+ attr_reader :utf32le
27
+
28
+ # Init the hexdump.
29
+ # @param str [String] Input string to encode
30
+ # @example
31
+ # hxd = Unisec::Hexdump.new('I 💕 Ruby 💎')
32
+ # hxd.utf8 # => "49 20 f0 9f 92 95 20 52 75 62 79 20 f0 9f 92 8e"
33
+ # hxd.utf16be # => "0049 0020 d83d dc95 0020 0052 0075 0062 0079 0020 d83d dc8e"
34
+ # hxd.utf32be # => "00000049 00000020 0001f495 00000020 00000052 00000075 00000062 00000079 00000020 0001f48e"
35
+ def initialize(str)
36
+ @utf8 = Hexdump.utf8(str)
37
+ @utf16be = Hexdump.utf16be(str)
38
+ @utf16le = Hexdump.utf16le(str)
39
+ @utf32be = Hexdump.utf32be(str)
40
+ @utf32le = Hexdump.utf32le(str)
41
+ end
42
+
43
+ # Encode to UTF-8 in hexdump format (spaced at every code unit = every byte)
44
+ # @param str [String] Input string to encode
45
+ # @return [String] hexdump (UTF-8 encoded)
46
+ # @example
47
+ # Unisec::Hexdump.utf8('🐋') # => "f0 9f 90 8b"
48
+ def self.utf8(str)
49
+ str.encode('UTF-8').to_hex.scan(/.{2}/).join(' ')
50
+ end
51
+
52
+ # Encode to UTF-16BE in hexdump format (spaced at every code unit = every 2 bytes)
53
+ # @param str [String] Input string to encode
54
+ # @return [String] hexdump (UTF-16BE encoded)
55
+ # @example
56
+ # Unisec::Hexdump.utf16be('🐋') # => "d83d dc0b"
57
+ def self.utf16be(str)
58
+ str.encode('UTF-16BE').to_hex.scan(/.{4}/).join(' ')
59
+ end
60
+
61
+ # Encode to UTF-16LE in hexdump format (spaced at every code unit = every 2 bytes)
62
+ # @param str [String] Input string to encode
63
+ # @return [String] hexdump (UTF-16LE encoded)
64
+ # @example
65
+ # Unisec::Hexdump.utf16le('🐋') # => "3dd8 0bdc"
66
+ def self.utf16le(str)
67
+ str.encode('UTF-16LE').to_hex.scan(/.{4}/).join(' ')
68
+ end
69
+
70
+ # Encode to UTF-32BE in hexdump format (spaced at every code unit = every 4 bytes)
71
+ # @param str [String] Input string to encode
72
+ # @return [String] hexdump (UTF-32BE encoded)
73
+ # @example
74
+ # Unisec::Hexdump.utf32be('🐋') # => "0001f40b"
75
+ def self.utf32be(str)
76
+ str.encode('UTF-32BE').to_hex.scan(/.{8}/).join(' ')
77
+ end
78
+
79
+ # Encode to UTF-32LE in hexdump format (spaced at every code unit = every 4 bytes)
80
+ # @param str [String] Input string to encode
81
+ # @return [String] hexdump (UTF-32LE encoded)
82
+ # @example
83
+ # Unisec::Hexdump.utf32le('🐋') # => "0bf40100"
84
+ def self.utf32le(str)
85
+ str.encode('UTF-32LE').to_hex.scan(/.{8}/).join(' ')
86
+ end
87
+
88
+ # Display a CLI-friendly output summurizing the hexdump in all Unicode encodings
89
+ # @example
90
+ # puts Unisec::Hexdump.new('K').display # =>
91
+ # # UTF-8: e2 84 aa
92
+ # # UTF-16BE: 212a
93
+ # # UTF-16LE: 2a21
94
+ # # UTF-32BE: 0000212a
95
+ # # UTF-32LE: 2a210000
96
+ def display
97
+ "UTF-8: #{@utf8}\n" \
98
+ "UTF-16BE: #{@utf16be}\n" \
99
+ "UTF-16LE: #{@utf16le}\n" \
100
+ "UTF-32BE: #{@utf32be}\n" \
101
+ "UTF-32LE: #{@utf32le}"
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,178 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'twitter_cldr'
4
+ require 'paint'
5
+
6
+ module Unisec
7
+ # Manipulate Unicode properties
8
+ class Properties
9
+ # List Unicode properties name
10
+ # @return [Array<String>] properties name
11
+ # @example
12
+ # Unisec::Properties.list # => ["ASCII_Hex_Digit", "Age", "Alphabetic", … ]
13
+ def self.list
14
+ TwitterCldr::Shared::CodePoint.properties.property_names
15
+ end
16
+
17
+ # List all code points for a given property
18
+ # @param prop [String] the property name
19
+ # @return [Array<Hash>] Array of code points (`{char: String, codepoint: Integer, name: String}`)
20
+ # @example
21
+ # Unisec::Properties.codepoints('Quotation_Mark')
22
+ # # =>
23
+ # # [{:char=>"\"", :codepoint=>34, :name=>"QUOTATION MARK"},
24
+ # # {:char=>"'", :codepoint=>39, :name=>"APOSTROPHE"},
25
+ # # … ]
26
+ def self.codepoints(prop)
27
+ cp = TwitterCldr::Shared::CodePoint
28
+ out = []
29
+ ranges = cp.properties.code_points_for_property(prop).ranges
30
+ ranges.each do |range|
31
+ range.each do |i|
32
+ codepoint = cp.get(i)
33
+ out << {
34
+ char: TwitterCldr::Utils::CodePoints.to_string([codepoint.code_point]),
35
+ codepoint: codepoint.code_point,
36
+ name: codepoint.name
37
+ }
38
+ end
39
+ end
40
+ out
41
+ end
42
+
43
+ # Display a CLI-friendly output listing all code points corresponding to a property.
44
+ # @example
45
+ # Unisec::Properties.codepoints_display('Quotation_Mark')
46
+ # # =>
47
+ # # U+0022 " QUOTATION MARK
48
+ # # U+0027 ' APOSTROPHE
49
+ # # …
50
+ def self.codepoints_display(prop)
51
+ codepoints = Properties.codepoints(prop)
52
+ codepoints.each do |cp|
53
+ puts "#{Properties.char2codepoint(cp[:char]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
54
+ end
55
+ nil
56
+ end
57
+
58
+ # Returns all properties of a given unicode character (code point)
59
+ # @param chr [String] Unicode code point (as character / string)
60
+ # @return [Hash] All properties of the given code point
61
+ # @example
62
+ # Unisec::Properties.char('é')
63
+ # # =>
64
+ # # {:age=>"1.1",
65
+ # # … }
66
+ def self.char(chr)
67
+ cp_num = TwitterCldr::Utils::CodePoints.from_string(chr)
68
+ cp = TwitterCldr::Shared::CodePoint.get(cp_num.first)
69
+ props = cp.properties
70
+ props_hash = props.properties_hash.dup
71
+ %w[Age Block General_Category Script].each { |p| props_hash.delete(p) } # Remaining properties
72
+ categories = props.general_category.map do |cat|
73
+ TwitterCldr::Shared::PropertyValueAliases.long_alias_for('gc', cat)
74
+ end
75
+ {
76
+ age: props.age.join,
77
+ block: props.block.join,
78
+ category: categories[1],
79
+ subcategory: categories[0],
80
+ codepoint: Properties.char2codepoint(chr),
81
+ name: cp.name,
82
+ script: props.script.join,
83
+ case: {
84
+ ruby: {
85
+ lowercase: chr.downcase,
86
+ uppercase: chr.upcase
87
+ },
88
+ twitter: {
89
+ lowercase: chr.localize.downcase.to_s,
90
+ uppercase: chr.localize.upcase.to_s,
91
+ titlecase: chr.localize.titlecase.to_s,
92
+ casefold: chr.localize.casefold.to_s
93
+ }
94
+ },
95
+ normalization: {
96
+ ruby: {
97
+ nfkd: chr.unicode_normalize(:nfkd),
98
+ nfkc: chr.unicode_normalize(:nfkc),
99
+ nfd: chr.unicode_normalize(:nfd),
100
+ nfc: chr.unicode_normalize(:nfc)
101
+ },
102
+ twitter: {
103
+ nfkd: chr.localize.normalize(using: :NFKD).to_s,
104
+ nfkc: chr.localize.normalize(using: :NFKC).to_s,
105
+ nfd: chr.localize.normalize(using: :NFD).to_s,
106
+ nfc: chr.localize.normalize(using: :NFC).to_s
107
+ }
108
+ },
109
+ other_properties: props_hash
110
+ }
111
+ end
112
+
113
+ # Display a CLI-friendly output listing all properties corresponding to character (code point)
114
+ # @param chr [String] Unicode code point (as character / string)
115
+ # @param extended [String] By default, it will only show common properties, with extended set to `true` it will
116
+ # show all of them.
117
+ def self.char_display(chr, extended: false)
118
+ data = Properties.char(chr)
119
+ display = ->(key, value) { puts Paint[key, :red, :bold].ljust(30) + " #{value}" }
120
+ display.call('Name:', data[:name])
121
+ display.call('Code Point:', data[:codepoint])
122
+ puts
123
+ display.call('Block:', data[:block])
124
+ display.call('Category:', data[:category])
125
+ display.call('Sub-Category:', data[:subcategory])
126
+ display.call('Script:', data[:script])
127
+ display.call('Since (age):', "Version #{data[:age]}")
128
+ puts
129
+ x = data.dig(:case, :twitter, :uppercase)
130
+ display.call('Uppercase:', x + " (#{Properties.char2codepoint(x)})")
131
+ x = data.dig(:case, :twitter, :lowercase)
132
+ display.call('Lowercase:', x + " (#{Properties.char2codepoint(x)})")
133
+ x = data.dig(:case, :twitter, :titlecase)
134
+ display.call('Titlecase:', x + " (#{Properties.char2codepoint(x)})")
135
+ x = data.dig(:case, :twitter, :casefold)
136
+ display.call('Casefold:', x + " (#{Properties.char2codepoint(x)})")
137
+ puts
138
+ x = data.dig(:normalization, :twitter, :nfkd)
139
+ display.call('Normalization NFKD:', x + " (#{Properties.chars2codepoints(x)})")
140
+ x = data.dig(:normalization, :twitter, :nfkc)
141
+ display.call('Normalization NFKC:', x + " (#{Properties.chars2codepoints(x)})")
142
+ x = data.dig(:normalization, :twitter, :nfd)
143
+ display.call('Normalization NFD:', x + " (#{Properties.chars2codepoints(x)})")
144
+ x = data.dig(:normalization, :twitter, :nfc)
145
+ display.call('Normalization NFC:', x + " (#{Properties.chars2codepoints(x)})")
146
+ if extended
147
+ puts
148
+ data[:other_properties].each do |k, v|
149
+ display.call(k, v&.join)
150
+ end
151
+ end
152
+ nil
153
+ end
154
+
155
+ # Display the code point in Unicode format for a given character (code point as string)
156
+ # @param chr [String] Unicode code point (as character / string)
157
+ # @return [String] code point in Unicode format
158
+ # @example
159
+ # Unisec::Properties.char2codepoint('💎') # => "U+1F48E"
160
+ def self.char2codepoint(chr)
161
+ "U+#{format('%.4x', chr.codepoints.first).upcase}"
162
+ end
163
+
164
+ # Display the code points in Unicode format for the given characters (code points as string)
165
+ # @param chrs [String] Unicode code points (as characters / string)
166
+ # @return [String] code points in Unicode format
167
+ # @example
168
+ # Unisec::Properties.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
169
+ # Unisec::Properties.chars2codepoints("🧑‍🌾") # => "U+1F9D1 U+200D U+1F33E"
170
+ def self.chars2codepoints(chrs)
171
+ out = []
172
+ chrs.each_char do |chr|
173
+ out << Properties.char2codepoint(chr)
174
+ end
175
+ out.join(' ')
176
+ end
177
+ end
178
+ end
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'unisec/utils'
4
+ require 'ctf_party'
5
+
6
+ module Unisec
7
+ # UTF-16 surrogates conversion.
8
+ class Surrogates
9
+ # Unicode code point
10
+ # @return [Integer] decimal codepoint
11
+ attr_reader :cp
12
+
13
+ # High surrogate (1st code unit of a surrogate pair). Also called lead surrogate.
14
+ # @return [Integer] decimal high surrogate
15
+ attr_reader :hs
16
+
17
+ # Low surrogate (2nd code unit of a surrogate pair). Also called trail surrogate.
18
+ # @return [Integer] decimal low surrogate
19
+ attr_reader :ls
20
+
21
+ # Init the surrogate pair.
22
+ # @param args [Integer] If one argument is provided, it's evaluated as the
23
+ # code point and the two surrogates will be calculated automatically.
24
+ # If two arguments are provided, they are evaluated as a surrogate pair (high
25
+ # then low) and the code point will be calculated.
26
+ # @example
27
+ # surr = Unisec::Surrogates.new(128169)
28
+ # # => #<Unisec::Surrogates:0x00007f96920a7ca8 @cp=128169, @hs=55357, @ls=56489>
29
+ # surr.cp # => 128169
30
+ # surr.hs # => 55357
31
+ # surr.ls # => 56489
32
+ # Unisec::Surrogates.new(55357, 56489)
33
+ # # => #<Unisec::Surrogates:0x00007f96920689b8 @cp=128169, @hs=55357, @ls=56489>
34
+ def initialize(*args)
35
+ if args.size == 1
36
+ @cp = args[0]
37
+ @hs = high_surrogate
38
+ @ls = low_surrogate
39
+ elsif args.size == 2
40
+ @hs = args[0]
41
+ @ls = args[1]
42
+ @cp = code_point
43
+ else
44
+ raise ArgumentError
45
+ end
46
+ end
47
+
48
+ # Calculate the high surrogate based on the Unicode code point.
49
+ # @param codepoint [Integer] decimal codepoint
50
+ # @return [Integer] decimal high surrogate
51
+ # @example
52
+ # Unisec::Surrogates.high_surrogate(128169) # => 55357
53
+ def self.high_surrogate(codepoint)
54
+ (((codepoint - 0x10000) / 0x400).floor + 0xd800)
55
+ end
56
+
57
+ # Calculate the low surrogate based on the Unicode code point.
58
+ # @param codepoint [Integer] decimal codepoint
59
+ # @return [Integer] decimal low surrogate
60
+ # @example
61
+ # Unisec::Surrogates.low_surrogate(128169) # => 56489
62
+ def self.low_surrogate(codepoint)
63
+ (((codepoint - 0x10000) % 0x400) + 0xdc00)
64
+ end
65
+
66
+ # Calculate the Unicode code point based on the surrogates.
67
+ # @param hs [Integer] decimal high surrogate
68
+ # @param ls [Integer] decimal low surrogate
69
+ # @return [Integer] decimal code point
70
+ # @example
71
+ # Unisec::Surrogates.code_point(55357, 56489) # => 128169
72
+ def self.code_point(hs, ls)
73
+ (((hs - 0xd800) * 0x400) + ls - 0xdc00 + 0x10000)
74
+ end
75
+
76
+ # Same as accessing {.hs}. Calculate the {.high_surrogate}.
77
+ # @return [Integer] decimal high surrogate
78
+ # @example
79
+ # surr = Unisec::Surrogates.new(128169)
80
+ # surr.high_surrogate # => 55357
81
+ def high_surrogate
82
+ @hs = Surrogates.high_surrogate(@cp)
83
+ end
84
+
85
+ # Same as accessing {.ls}. Calculate the {.low_surrogate}.
86
+ # @return [Integer] decimal low surrogate
87
+ # @example
88
+ # surr = Unisec::Surrogates.new(128169)
89
+ # surr.low_surrogate # => 56489
90
+ def low_surrogate
91
+ @ls = Surrogates.low_surrogate(@cp)
92
+ end
93
+
94
+ # Same as accessing {.cp}. Calculate the {.code_point}.
95
+ # @return [Integer] decimal code point
96
+ # surr = Unisec::Surrogates.new(55357, 56489)
97
+ # surr.code_point # => 128169
98
+ def code_point
99
+ @cp = Surrogates.code_point(@hs, @ls)
100
+ end
101
+
102
+ # Display a CLI-friendly output summurizing everithing about the surrogates:
103
+ # the corresponding character, code point, high and low surrogates
104
+ # (each displayed as hexadecimal, decimal and binary).
105
+ # @example
106
+ # surr = Unisec::Surrogates.new(128169)
107
+ # puts surr.display # =>
108
+ # # Char: 💩
109
+ # # Code Point: 0x1F4A9, 0d128169, 0b11111010010101001
110
+ # # High Surrogate: 0xD83D, 0d55357, 0b1101100000111101
111
+ # # Low Surrogate: 0xDCA9, 0d56489, 0b1101110010101001
112
+ def display
113
+ "Char: #{[@cp].pack('U*')}\n" \
114
+ "Code Point: 0x#{@cp.to_hex}, 0d#{@cp}, 0b#{@cp.to_bin}\n" \
115
+ "High Surrogate: 0x#{@hs.to_hex}, 0d#{@hs}, 0b#{@hs.to_bin}\n" \
116
+ "Low Surrogate: 0x#{@ls.to_hex}, 0d#{@ls}, 0b#{@ls.to_bin}"
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ctf_party'
4
+
5
+ class Integer
6
+ # Convert an integer to an hexadecimal string
7
+ # @return [String] The interger converted to hexadecimal and casted to an upper case string
8
+ # @example
9
+ # 42.to_hex # => "2A"
10
+ def to_hex
11
+ to_s(16).upcase
12
+ end
13
+
14
+ # Convert an integer to an binary string
15
+ # @return [String] The interger converted to binary and casted to a string
16
+ # @example
17
+ # 42.to_bin # => "101010"
18
+ def to_bin
19
+ to_s(2)
20
+ end
21
+ end
22
+
23
+ module Unisec
24
+ # Generic stuff not Unicode-related that can be re-used.
25
+ module Utils
26
+ # About string conversion and manipulation.
27
+ module String
28
+ # Convert a string input into the chosen type.
29
+ # @param input [String] If the target type is `:integer`, the string must represent a number encoded in
30
+ # hexadecimal, decimal, binary. If it's a Unicode string, only the first code point will be taken into account.
31
+ # @param target_type [Symbol] Convert to the chosen type. Currently only supports `:integer`.
32
+ # @return [Variable] The type of the output depends on the chosen `target_type`.
33
+ # @example
34
+ # Unisec::Utils::String.convert('0x1f4a9', :integer) # => 128169
35
+ def self.convert(input, target_type)
36
+ case target_type
37
+ when :integer
38
+ convert_to_integer(input)
39
+ else
40
+ raise TypeError, "Target type \"#{target_type}\" not avaible"
41
+ end
42
+ end
43
+
44
+ # Internal method used for {.convert}.
45
+ #
46
+ # Convert a string input into integer.
47
+ # @param input [String] The string must represent a number encoded in hexadecimal, decimal, binary. If it's a
48
+ # Unicode string, only the first code point will be taken into account. The input type is determined
49
+ # automatically based on the prefix.
50
+ # @return [Integer]
51
+ # @example
52
+ # # Hexadecimal
53
+ # Unisec::Utils::String.convert_to_integer('0x1f4a9') # => 128169
54
+ # # Decimal
55
+ # Unisec::Utils::String.convert_to_integer('0d128169') # => 128169
56
+ # # Binary
57
+ # Unisec::Utils::String.convert_to_integer('0b11111010010101001') # => 128169
58
+ # # Unicode string
59
+ # Unisec::Utils::String.convert_to_integer('💩') # => 128169
60
+ def self.convert_to_integer(input)
61
+ case autodetect(input)
62
+ when :hexadecimal
63
+ input.hex2dec(prefix: '0x').to_i
64
+ when :decimal
65
+ input.to_i
66
+ when :binary
67
+ input.bin2hex.hex2dec.to_i
68
+ when :string
69
+ input.codepoints.first
70
+ else
71
+ raise TypeError, "Input \"#{input}\" is not of the expected type"
72
+ end
73
+ end
74
+
75
+ # Internal method used for {.convert}.
76
+ #
77
+ # Autodetect the representation type of the string input.
78
+ # @param str [String] Input.
79
+ # @return [Symbol] the detected type: `:hexadecimal`, `:decimal`, `:binary`, `:string`.
80
+ # @example
81
+ # # Hexadecimal
82
+ # Unisec::Utils::String.autodetect('0x1f4a9') # => :hexadecimal
83
+ # # Decimal
84
+ # Unisec::Utils::String.autodetect('0d128169') # => :decimal
85
+ # # Binary
86
+ # Unisec::Utils::String.autodetect('0b11111010010101001') # => :binary
87
+ # # Unicode string
88
+ # Unisec::Utils::String.autodetect('💩') # => :string
89
+ def self.autodetect(str)
90
+ case str
91
+ when /0x[0-9a-fA-F]/
92
+ :hexadecimal
93
+ when /0d[0-9]+/
94
+ :decimal
95
+ when /0b[0-1]+/
96
+ :binary
97
+ else
98
+ :string
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Unisec
4
+ # Version of unisec library and app
5
+ VERSION = '0.0.1'
6
+ end
data/lib/unisec.rb ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'unisec/version'
4
+
5
+ require 'unisec/surrogates'
6
+ require 'unisec/hexdump'
7
+ require 'unisec/properties'
8
+ require 'unisec/confusables'
metadata ADDED
@@ -0,0 +1,143 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: unisec
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Alexandre ZANNI
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-07-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: ctf-party
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.3'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: dry-cli
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: paint
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.3'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: twitter_cldr
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '6.11'
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: 6.11.5
65
+ type: :runtime
66
+ prerelease: false
67
+ version_requirements: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - "~>"
70
+ - !ruby/object:Gem::Version
71
+ version: '6.11'
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: 6.11.5
75
+ - !ruby/object:Gem::Dependency
76
+ name: unicode-confusable
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '1.9'
82
+ type: :runtime
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '1.9'
89
+ description: Toolkit for security research manipulating Unicode
90
+ email: alexandre.zanni@europe.com
91
+ executables:
92
+ - unisec
93
+ extensions: []
94
+ extra_rdoc_files: []
95
+ files:
96
+ - LICENSE
97
+ - bin/unisec
98
+ - lib/unisec.rb
99
+ - lib/unisec/cli/cli.rb
100
+ - lib/unisec/cli/confusables.rb
101
+ - lib/unisec/cli/hexdump.rb
102
+ - lib/unisec/cli/properties.rb
103
+ - lib/unisec/cli/surrogates.rb
104
+ - lib/unisec/confusables.rb
105
+ - lib/unisec/hexdump.rb
106
+ - lib/unisec/properties.rb
107
+ - lib/unisec/surrogates.rb
108
+ - lib/unisec/utils.rb
109
+ - lib/unisec/version.rb
110
+ homepage: https://github.com/Acceis/unisec
111
+ licenses:
112
+ - MIT
113
+ metadata:
114
+ yard.run: yard
115
+ bug_tracker_uri: https://github.com/Acceis/unisec/issues
116
+ changelog_uri: https://github.com/Acceis/unisec/releases
117
+ documentation_uri: https://acceis.github.io/unisec/
118
+ homepage_uri: https://github.com/Acceis/unisec
119
+ source_code_uri: https://github.com/Acceis/unisec/
120
+ rubygems_mfa_required: 'true'
121
+ post_install_message:
122
+ rdoc_options: []
123
+ require_paths:
124
+ - lib
125
+ required_ruby_version: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - ">="
128
+ - !ruby/object:Gem::Version
129
+ version: 3.0.0
130
+ - - "<"
131
+ - !ruby/object:Gem::Version
132
+ version: '4.0'
133
+ required_rubygems_version: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ">="
136
+ - !ruby/object:Gem::Version
137
+ version: '0'
138
+ requirements: []
139
+ rubygems_version: 3.4.1
140
+ signing_key:
141
+ specification_version: 4
142
+ summary: Unicode Security Toolkit
143
+ test_files: []