unisec 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1496d192c32a345de077d1643d041f15f960095f4931165a19b1250a52c5897e
4
+ data.tar.gz: 553ce1f9fa9d21895d31c144dc15fc73483a7301ececa1046138eec9b51a8707
5
+ SHA512:
6
+ metadata.gz: ae33d34f6bdf6ae0c5a3dd97ffdb3beaf9b2b7a3d3d48697502b272a8db4fc17a6f91bec5596b12c83d7cc609f0b3f7c9a80b7da09d5584537388868364810a5
7
+ data.tar.gz: c440d1868a5a97a8d6a126c66541d5627ea039086f85d90347dcb52910c32ebc9d1c97f28ce3e0fef62b209f0c982e24414edce98a32406bbc1f802f42bd8ef4
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Alexandre ZANNI at ACCEIS
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/bin/unisec ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'unisec'
5
+ require 'unisec/cli/cli'
6
+
7
+ Dry::CLI.new(Unisec::CLI::Commands).call
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'unisec/cli/surrogates'
4
+ require 'unisec/cli/hexdump'
5
+ require 'unisec/cli/properties'
6
+ require 'unisec/cli/confusables'
7
+
8
+ module Unisec
9
+ # Module used to create the CLI for the executable
10
+ module CLI
11
+ # Registered commands for the CLI
12
+ module Commands
13
+ extend Dry::CLI::Registry
14
+
15
+ # Mapping between the (sub-)commands as seen by the user
16
+ # on the command-line interface and the CLI modules in the lib
17
+ register 'surrogates to', Surrogates::To
18
+ register 'surrogates from', Surrogates::From
19
+ register 'hexdump', Hexdump
20
+ register 'properties list', Properties::List
21
+ register 'properties codepoints', Properties::Codepoints
22
+ register 'properties char', Properties::Char
23
+ register 'confusables list', Confusables::List
24
+ register 'confusables randomize', Confusables::Randomize
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+ require 'unisec/utils'
6
+
7
+ module Unisec
8
+ module CLI
9
+ module Commands
10
+ # CLI sub-commands `unisec confusables xxx` for the class {Unisec::Confusables} from the lib.
11
+ module Confusables
12
+ # Command `unisec confusables list`
13
+ #
14
+ # Example:
15
+ #
16
+ # ```plaintext
17
+ # $ unisec confusables list '!'
18
+ # U+FF01 ! FULLWIDTH EXCLAMATION MARK
19
+ # U+01C3 ǃ LATIN LETTER RETROFLEX CLICK
20
+ # …
21
+ # ```
22
+ class List < Dry::CLI::Command
23
+ desc 'List confusables characters for a given character'
24
+
25
+ argument :character, required: true, desc: 'Unicode code point (as string)'
26
+ option :map, default: true, values: %w[true false],
27
+ desc: 'Allows partial mapping, includes confusable where the given chart is a part of'
28
+
29
+ # List confusables characters for a given character
30
+ # @param character [String] the character to search confusables for
31
+ # @option options [Boolean] :map allows partial mapping, includes confusable where the given chart is a
32
+ # part of
33
+ def call(character: nil, **options)
34
+ to_bool = ->(str) { ['true', true].include?(str) }
35
+ Unisec::Confusables.list_display(character, map: to_bool.call(options.fetch(:map)))
36
+ end
37
+ end
38
+
39
+ # Command `unisec confusables randomize`
40
+ #
41
+ # Example:
42
+ #
43
+ # ```plaintext
44
+ # $ unisec confusables randomize noraj
45
+ # Original: noraj
46
+ # Transformed: ռ໐𝘳𝜶𝙟
47
+ # …
48
+ # ```
49
+ class Randomize < Dry::CLI::Command
50
+ desc 'Replace all characters from a string with random confusables when possible'
51
+
52
+ argument :str, required: true, desc: 'Unicode string'
53
+
54
+ # Replace all characters from a string with random confusables when possible
55
+ # @param str [String] Unicode string
56
+ def call(str: nil, **)
57
+ Unisec::Confusables.randomize_display(str)
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+
6
+ module Unisec
7
+ module CLI
8
+ module Commands
9
+ # CLI command `unisec hexdumps` for the class {Unisec::Hexdump} from the lib.
10
+ #
11
+ # Example:
12
+ #
13
+ # ```plaintext
14
+ # $ unisec hexdump "ACCEIS"
15
+ # UTF-8: 41 43 43 45 49 53
16
+ # UTF-16BE: 0041 0043 0043 0045 0049 0053
17
+ # UTF-16LE: 4100 4300 4300 4500 4900 5300
18
+ # UTF-32BE: 00000041 00000043 00000043 00000045 00000049 00000053
19
+ # UTF-32LE: 41000000 43000000 43000000 45000000 49000000 53000000
20
+ # ```
21
+ class Hexdump < Dry::CLI::Command
22
+ desc 'Hexdump in all Unicode encodings'
23
+
24
+ argument :input, required: true,
25
+ desc: 'String input'
26
+
27
+ # Hexdump of all Unicode encodings.
28
+ # @param input [String] Input string to encode
29
+ def call(input: nil, **)
30
+ puts Unisec::Hexdump.new(input).display
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+ require 'unisec/utils'
6
+
7
+ module Unisec
8
+ module CLI
9
+ module Commands
10
+ # CLI sub-commands `unisec properties xxx` for the class {Unisec::Properties} from the lib.
11
+ module Properties
12
+ # Command `unisec properties list`
13
+ #
14
+ # Example:
15
+ #
16
+ # ```plaintext
17
+ # $ unisec properties list
18
+ # ASCII_Hex_Digit
19
+ # Age
20
+ # Alphabetic
21
+ # …
22
+ # ```
23
+ class List < Dry::CLI::Command
24
+ desc 'List all Unicode properties'
25
+
26
+ # List Unicode properties name
27
+ def call(**)
28
+ Unisec::Properties.list.each do |p|
29
+ puts p
30
+ end
31
+ end
32
+ end
33
+
34
+ # Command `unisec properties codepoints`
35
+ #
36
+ # Example:
37
+ #
38
+ # ```plaintext
39
+ # $ unisec properties codepoints Bidi_Control
40
+ # U+61C ؜ ARABIC LETTER MARK
41
+ # …
42
+ # ```
43
+ class Codepoints < Dry::CLI::Command
44
+ desc 'List all code points for a given property'
45
+
46
+ argument :property, required: true, desc: 'Unicode property name'
47
+
48
+ # List code points matching a Unicode property
49
+ # @param property [String] property name
50
+ def call(property: nil, **)
51
+ Unisec::Properties.codepoints_display(property)
52
+ end
53
+ end
54
+
55
+ # Command `unisec properties char`
56
+ #
57
+ # Example:
58
+ #
59
+ # ```plaintext
60
+ # $ unisec properties char é
61
+ # Name: LATIN SMALL LETTER E WITH ACUTE
62
+ # Code Point: U+00E9
63
+ #
64
+ # Block: Latin-1 Supplement
65
+ # …
66
+ # ```
67
+ class Char < Dry::CLI::Command
68
+ desc 'Returns all properties of a given Unicode character (code point as string)'
69
+
70
+ argument :character, required: true, desc: 'Unicode character'
71
+ option :extended, default: false, values: %w[true false], desc: 'Show all properties'
72
+
73
+ # Returns all properties of a given Unicode character (code point as string)
74
+ # @param character [String] Unicode code point (as character / string)
75
+ # @option options [Boolean] :extended Show all properties
76
+ def call(character: nil, **options)
77
+ to_bool = ->(str) { str == 'true' }
78
+ Unisec::Properties.char_display(character, extended: to_bool.call(options.fetch(:extended)))
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+ require 'unisec/utils'
6
+
7
+ module Unisec
8
+ module CLI
9
+ module Commands
10
+ # CLI sub-commands `unisec surrogates xxx` for the class {Unisec::Surrogates} from the lib.
11
+ module Surrogates
12
+ # Command `unisec surrogates from`
13
+ #
14
+ # Example:
15
+ #
16
+ # ```plaintext
17
+ # $ unisec surrogates from 0xD801 0xDC37
18
+ # Char: 𐐷
19
+ # Code Point: 0x10437, 0d66615, 0b10000010000110111
20
+ # High Surrogate: 0xD801, 0d55297, 0b1101100000000001
21
+ # Low Surrogate: 0xDC37, 0d56375, 0b1101110000110111
22
+ # ```
23
+ class From < Dry::CLI::Command
24
+ desc 'Code point ⬅️ Surrogates'
25
+
26
+ argument :high, required: true,
27
+ desc: 'High surrogate (in hexadecimal (0xXXXX), decimal (0dXXXX), binary (0bXXXX) or as text)'
28
+ argument :low, required: true,
29
+ desc: 'Low surrogate (in hexadecimal (0xXXXX), decimal (0dXXXX), binary (0bXXXX) or as text)'
30
+
31
+ # Calculate the Unicode code point based on the surrogates.
32
+ # @param high [String] decimal high surrogate
33
+ # @param low [String] decimal low surrogate
34
+ def call(high: nil, low: nil, **)
35
+ puts Unisec::Surrogates.new(Unisec::Utils::String.convert(high, :integer),
36
+ Unisec::Utils::String.convert(low, :integer)).display
37
+ end
38
+ end
39
+
40
+ # Command `unisec surrogates to`
41
+ #
42
+ # Example:
43
+ #
44
+ # ```plaintext
45
+ # $ unisec surrogates to 0x1F4A9
46
+ # Char: 💩
47
+ # Code Point: 0x1F4A9, 0d128169, 0b11111010010101001
48
+ # High Surrogate: 0xD83D, 0d55357, 0b1101100000111101
49
+ # Low Surrogate: 0xDCA9, 0d56489, 0b1101110010101001
50
+ # ```
51
+ class To < Dry::CLI::Command
52
+ desc 'Code point ➡️ Surrogates'
53
+
54
+ argument :codepoint, required: true,
55
+ desc: 'One code point (character) (in hexadecimal (0xXXXX), decimal (0dXXXX), binary ' \
56
+ '(0bXXXX) or as text)'
57
+
58
+ # Calculate the surrogates based on the Unicode code point.
59
+ # @param codepoint [String] decimal codepoint
60
+ def call(codepoint: nil, **)
61
+ puts Unisec::Surrogates.new(Unisec::Utils::String.convert(codepoint, :integer)).display
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'unicode/confusable'
4
+ require 'twitter_cldr'
5
+
6
+ module Unisec
7
+ # Operations about Unicode confusable characters (homoglyphs).
8
+ class Confusables
9
+ # List confusables characters for a given character
10
+ # @param chr [String] the character to search confusables for
11
+ # @param map [Boolean] allows partial mapping, includes confusable where the given chart is a part of
12
+ # @return [Array<String>] list of confusables
13
+ # @example
14
+ # Unisec::Confusables.list('!') # => ["!", "ǃ", "ⵑ", "‼", "⁉", "⁈"]
15
+ # Unisec::Confusables.list('!', map: false) # => ["!", "ǃ", "ⵑ"]
16
+ def self.list(chr, map: true)
17
+ Unicode::Confusable.list(chr, map)
18
+ end
19
+
20
+ # Display a CLI-friendly output listing all confusables corresponding to a character (code point)
21
+ # @param chr [String] the character to search confusables for
22
+ # @param map [Boolean] allows partial mapping, includes confusable where the given chart is a part of
23
+ def self.list_display(chr, map: true)
24
+ Confusables.list(chr, map: map).each do |confu|
25
+ puts "#{Properties.char2codepoint(confu).ljust(9)} #{confu.ljust(4)} " \
26
+ "#{TwitterCldr::Shared::CodePoint.get(confu.codepoints.first).name}"
27
+ end
28
+ nil
29
+ end
30
+
31
+ # Replace all characters with random confusables when possible.
32
+ # @param str [String] Unicode string
33
+ # @return [String] input randomized with confusables
34
+ # @example
35
+ # Unisec::Confusables.randomize('noraj') # => "𝓃ⲟ𝓇𝒶j"
36
+ # Unisec::Confusables.randomize('noraj') # => "𝗻૦𝚛⍺𝐣"
37
+ # Unisec::Confusables.randomize('noraj') # => "𝔫𞺄𝕣⍺j"
38
+ def self.randomize(str)
39
+ out = ''
40
+ str.each_char do |chr|
41
+ confu = Confusables.list(chr, map: false).sample
42
+ out += confu.nil? ? chr : confu
43
+ end
44
+ out
45
+ end
46
+
47
+ # Display a CLI-friendly output of a string where characters are replaces with random confusables
48
+ # @param str [String] Unicode string
49
+ def self.randomize_display(str)
50
+ display = ->(key, value) { puts Paint[key, :red, :bold].ljust(23) + " #{value}" }
51
+ display.call('Original:', str)
52
+ display.call('Transformed:', Unisec::Confusables.randomize(str))
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ctf_party'
4
+
5
+ module Unisec
6
+ # Hexdump of all Unicode encodings.
7
+ class Hexdump
8
+ # UTF-8 hexdump
9
+ # @return [String] UTF-8 hexdump
10
+ attr_reader :utf8
11
+
12
+ # UTF-16BE hexdump
13
+ # @return [String] UTF-16BE hexdump
14
+ attr_reader :utf16be
15
+
16
+ # UTF-16LE hexdump
17
+ # @return [String] UTF-16LE hexdump
18
+ attr_reader :utf16le
19
+
20
+ # UTF-32BE hexdump
21
+ # @return [String] UTF-32BE hexdump
22
+ attr_reader :utf32be
23
+
24
+ # UTF-32LE hexdump
25
+ # @return [String] UTF-32LE hexdump
26
+ attr_reader :utf32le
27
+
28
+ # Init the hexdump.
29
+ # @param str [String] Input string to encode
30
+ # @example
31
+ # hxd = Unisec::Hexdump.new('I 💕 Ruby 💎')
32
+ # hxd.utf8 # => "49 20 f0 9f 92 95 20 52 75 62 79 20 f0 9f 92 8e"
33
+ # hxd.utf16be # => "0049 0020 d83d dc95 0020 0052 0075 0062 0079 0020 d83d dc8e"
34
+ # hxd.utf32be # => "00000049 00000020 0001f495 00000020 00000052 00000075 00000062 00000079 00000020 0001f48e"
35
+ def initialize(str)
36
+ @utf8 = Hexdump.utf8(str)
37
+ @utf16be = Hexdump.utf16be(str)
38
+ @utf16le = Hexdump.utf16le(str)
39
+ @utf32be = Hexdump.utf32be(str)
40
+ @utf32le = Hexdump.utf32le(str)
41
+ end
42
+
43
+ # Encode to UTF-8 in hexdump format (spaced at every code unit = every byte)
44
+ # @param str [String] Input string to encode
45
+ # @return [String] hexdump (UTF-8 encoded)
46
+ # @example
47
+ # Unisec::Hexdump.utf8('🐋') # => "f0 9f 90 8b"
48
+ def self.utf8(str)
49
+ str.encode('UTF-8').to_hex.scan(/.{2}/).join(' ')
50
+ end
51
+
52
+ # Encode to UTF-16BE in hexdump format (spaced at every code unit = every 2 bytes)
53
+ # @param str [String] Input string to encode
54
+ # @return [String] hexdump (UTF-16BE encoded)
55
+ # @example
56
+ # Unisec::Hexdump.utf16be('🐋') # => "d83d dc0b"
57
+ def self.utf16be(str)
58
+ str.encode('UTF-16BE').to_hex.scan(/.{4}/).join(' ')
59
+ end
60
+
61
+ # Encode to UTF-16LE in hexdump format (spaced at every code unit = every 2 bytes)
62
+ # @param str [String] Input string to encode
63
+ # @return [String] hexdump (UTF-16LE encoded)
64
+ # @example
65
+ # Unisec::Hexdump.utf16le('🐋') # => "3dd8 0bdc"
66
+ def self.utf16le(str)
67
+ str.encode('UTF-16LE').to_hex.scan(/.{4}/).join(' ')
68
+ end
69
+
70
+ # Encode to UTF-32BE in hexdump format (spaced at every code unit = every 4 bytes)
71
+ # @param str [String] Input string to encode
72
+ # @return [String] hexdump (UTF-32BE encoded)
73
+ # @example
74
+ # Unisec::Hexdump.utf32be('🐋') # => "0001f40b"
75
+ def self.utf32be(str)
76
+ str.encode('UTF-32BE').to_hex.scan(/.{8}/).join(' ')
77
+ end
78
+
79
+ # Encode to UTF-32LE in hexdump format (spaced at every code unit = every 4 bytes)
80
+ # @param str [String] Input string to encode
81
+ # @return [String] hexdump (UTF-32LE encoded)
82
+ # @example
83
+ # Unisec::Hexdump.utf32le('🐋') # => "0bf40100"
84
+ def self.utf32le(str)
85
+ str.encode('UTF-32LE').to_hex.scan(/.{8}/).join(' ')
86
+ end
87
+
88
+ # Display a CLI-friendly output summurizing the hexdump in all Unicode encodings
89
+ # @example
90
+ # puts Unisec::Hexdump.new('K').display # =>
91
+ # # UTF-8: e2 84 aa
92
+ # # UTF-16BE: 212a
93
+ # # UTF-16LE: 2a21
94
+ # # UTF-32BE: 0000212a
95
+ # # UTF-32LE: 2a210000
96
+ def display
97
+ "UTF-8: #{@utf8}\n" \
98
+ "UTF-16BE: #{@utf16be}\n" \
99
+ "UTF-16LE: #{@utf16le}\n" \
100
+ "UTF-32BE: #{@utf32be}\n" \
101
+ "UTF-32LE: #{@utf32le}"
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,178 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'twitter_cldr'
4
+ require 'paint'
5
+
6
+ module Unisec
7
+ # Manipulate Unicode properties
8
+ class Properties
9
+ # List Unicode properties name
10
+ # @return [Array<String>] properties name
11
+ # @example
12
+ # Unisec::Properties.list # => ["ASCII_Hex_Digit", "Age", "Alphabetic", … ]
13
+ def self.list
14
+ TwitterCldr::Shared::CodePoint.properties.property_names
15
+ end
16
+
17
+ # List all code points for a given property
18
+ # @param prop [String] the property name
19
+ # @return [Array<Hash>] Array of code points (`{char: String, codepoint: Integer, name: String}`)
20
+ # @example
21
+ # Unisec::Properties.codepoints('Quotation_Mark')
22
+ # # =>
23
+ # # [{:char=>"\"", :codepoint=>34, :name=>"QUOTATION MARK"},
24
+ # # {:char=>"'", :codepoint=>39, :name=>"APOSTROPHE"},
25
+ # # … ]
26
+ def self.codepoints(prop)
27
+ cp = TwitterCldr::Shared::CodePoint
28
+ out = []
29
+ ranges = cp.properties.code_points_for_property(prop).ranges
30
+ ranges.each do |range|
31
+ range.each do |i|
32
+ codepoint = cp.get(i)
33
+ out << {
34
+ char: TwitterCldr::Utils::CodePoints.to_string([codepoint.code_point]),
35
+ codepoint: codepoint.code_point,
36
+ name: codepoint.name
37
+ }
38
+ end
39
+ end
40
+ out
41
+ end
42
+
43
+ # Display a CLI-friendly output listing all code points corresponding to a property.
44
+ # @example
45
+ # Unisec::Properties.codepoints_display('Quotation_Mark')
46
+ # # =>
47
+ # # U+0022 " QUOTATION MARK
48
+ # # U+0027 ' APOSTROPHE
49
+ # # …
50
+ def self.codepoints_display(prop)
51
+ codepoints = Properties.codepoints(prop)
52
+ codepoints.each do |cp|
53
+ puts "#{Properties.char2codepoint(cp[:char]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
54
+ end
55
+ nil
56
+ end
57
+
58
+ # Returns all properties of a given unicode character (code point)
59
+ # @param chr [String] Unicode code point (as character / string)
60
+ # @return [Hash] All properties of the given code point
61
+ # @example
62
+ # Unisec::Properties.char('é')
63
+ # # =>
64
+ # # {:age=>"1.1",
65
+ # # … }
66
+ def self.char(chr)
67
+ cp_num = TwitterCldr::Utils::CodePoints.from_string(chr)
68
+ cp = TwitterCldr::Shared::CodePoint.get(cp_num.first)
69
+ props = cp.properties
70
+ props_hash = props.properties_hash.dup
71
+ %w[Age Block General_Category Script].each { |p| props_hash.delete(p) } # Remaining properties
72
+ categories = props.general_category.map do |cat|
73
+ TwitterCldr::Shared::PropertyValueAliases.long_alias_for('gc', cat)
74
+ end
75
+ {
76
+ age: props.age.join,
77
+ block: props.block.join,
78
+ category: categories[1],
79
+ subcategory: categories[0],
80
+ codepoint: Properties.char2codepoint(chr),
81
+ name: cp.name,
82
+ script: props.script.join,
83
+ case: {
84
+ ruby: {
85
+ lowercase: chr.downcase,
86
+ uppercase: chr.upcase
87
+ },
88
+ twitter: {
89
+ lowercase: chr.localize.downcase.to_s,
90
+ uppercase: chr.localize.upcase.to_s,
91
+ titlecase: chr.localize.titlecase.to_s,
92
+ casefold: chr.localize.casefold.to_s
93
+ }
94
+ },
95
+ normalization: {
96
+ ruby: {
97
+ nfkd: chr.unicode_normalize(:nfkd),
98
+ nfkc: chr.unicode_normalize(:nfkc),
99
+ nfd: chr.unicode_normalize(:nfd),
100
+ nfc: chr.unicode_normalize(:nfc)
101
+ },
102
+ twitter: {
103
+ nfkd: chr.localize.normalize(using: :NFKD).to_s,
104
+ nfkc: chr.localize.normalize(using: :NFKC).to_s,
105
+ nfd: chr.localize.normalize(using: :NFD).to_s,
106
+ nfc: chr.localize.normalize(using: :NFC).to_s
107
+ }
108
+ },
109
+ other_properties: props_hash
110
+ }
111
+ end
112
+
113
+ # Display a CLI-friendly output listing all properties corresponding to character (code point)
114
+ # @param chr [String] Unicode code point (as character / string)
115
+ # @param extended [String] By default, it will only show common properties, with extended set to `true` it will
116
+ # show all of them.
117
+ def self.char_display(chr, extended: false)
118
+ data = Properties.char(chr)
119
+ display = ->(key, value) { puts Paint[key, :red, :bold].ljust(30) + " #{value}" }
120
+ display.call('Name:', data[:name])
121
+ display.call('Code Point:', data[:codepoint])
122
+ puts
123
+ display.call('Block:', data[:block])
124
+ display.call('Category:', data[:category])
125
+ display.call('Sub-Category:', data[:subcategory])
126
+ display.call('Script:', data[:script])
127
+ display.call('Since (age):', "Version #{data[:age]}")
128
+ puts
129
+ x = data.dig(:case, :twitter, :uppercase)
130
+ display.call('Uppercase:', x + " (#{Properties.char2codepoint(x)})")
131
+ x = data.dig(:case, :twitter, :lowercase)
132
+ display.call('Lowercase:', x + " (#{Properties.char2codepoint(x)})")
133
+ x = data.dig(:case, :twitter, :titlecase)
134
+ display.call('Titlecase:', x + " (#{Properties.char2codepoint(x)})")
135
+ x = data.dig(:case, :twitter, :casefold)
136
+ display.call('Casefold:', x + " (#{Properties.char2codepoint(x)})")
137
+ puts
138
+ x = data.dig(:normalization, :twitter, :nfkd)
139
+ display.call('Normalization NFKD:', x + " (#{Properties.chars2codepoints(x)})")
140
+ x = data.dig(:normalization, :twitter, :nfkc)
141
+ display.call('Normalization NFKC:', x + " (#{Properties.chars2codepoints(x)})")
142
+ x = data.dig(:normalization, :twitter, :nfd)
143
+ display.call('Normalization NFD:', x + " (#{Properties.chars2codepoints(x)})")
144
+ x = data.dig(:normalization, :twitter, :nfc)
145
+ display.call('Normalization NFC:', x + " (#{Properties.chars2codepoints(x)})")
146
+ if extended
147
+ puts
148
+ data[:other_properties].each do |k, v|
149
+ display.call(k, v&.join)
150
+ end
151
+ end
152
+ nil
153
+ end
154
+
155
+ # Display the code point in Unicode format for a given character (code point as string)
156
+ # @param chr [String] Unicode code point (as character / string)
157
+ # @return [String] code point in Unicode format
158
+ # @example
159
+ # Unisec::Properties.char2codepoint('💎') # => "U+1F48E"
160
+ def self.char2codepoint(chr)
161
+ "U+#{format('%.4x', chr.codepoints.first).upcase}"
162
+ end
163
+
164
+ # Display the code points in Unicode format for the given characters (code points as string)
165
+ # @param chrs [String] Unicode code points (as characters / string)
166
+ # @return [String] code points in Unicode format
167
+ # @example
168
+ # Unisec::Properties.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
169
+ # Unisec::Properties.chars2codepoints("🧑‍🌾") # => "U+1F9D1 U+200D U+1F33E"
170
+ def self.chars2codepoints(chrs)
171
+ out = []
172
+ chrs.each_char do |chr|
173
+ out << Properties.char2codepoint(chr)
174
+ end
175
+ out.join(' ')
176
+ end
177
+ end
178
+ end
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'unisec/utils'
4
+ require 'ctf_party'
5
+
6
+ module Unisec
7
+ # UTF-16 surrogates conversion.
8
+ class Surrogates
9
+ # Unicode code point
10
+ # @return [Integer] decimal codepoint
11
+ attr_reader :cp
12
+
13
+ # High surrogate (1st code unit of a surrogate pair). Also called lead surrogate.
14
+ # @return [Integer] decimal high surrogate
15
+ attr_reader :hs
16
+
17
+ # Low surrogate (2nd code unit of a surrogate pair). Also called trail surrogate.
18
+ # @return [Integer] decimal low surrogate
19
+ attr_reader :ls
20
+
21
+ # Init the surrogate pair.
22
+ # @param args [Integer] If one argument is provided, it's evaluated as the
23
+ # code point and the two surrogates will be calculated automatically.
24
+ # If two arguments are provided, they are evaluated as a surrogate pair (high
25
+ # then low) and the code point will be calculated.
26
+ # @example
27
+ # surr = Unisec::Surrogates.new(128169)
28
+ # # => #<Unisec::Surrogates:0x00007f96920a7ca8 @cp=128169, @hs=55357, @ls=56489>
29
+ # surr.cp # => 128169
30
+ # surr.hs # => 55357
31
+ # surr.ls # => 56489
32
+ # Unisec::Surrogates.new(55357, 56489)
33
+ # # => #<Unisec::Surrogates:0x00007f96920689b8 @cp=128169, @hs=55357, @ls=56489>
34
+ def initialize(*args)
35
+ if args.size == 1
36
+ @cp = args[0]
37
+ @hs = high_surrogate
38
+ @ls = low_surrogate
39
+ elsif args.size == 2
40
+ @hs = args[0]
41
+ @ls = args[1]
42
+ @cp = code_point
43
+ else
44
+ raise ArgumentError
45
+ end
46
+ end
47
+
48
+ # Calculate the high surrogate based on the Unicode code point.
49
+ # @param codepoint [Integer] decimal codepoint
50
+ # @return [Integer] decimal high surrogate
51
+ # @example
52
+ # Unisec::Surrogates.high_surrogate(128169) # => 55357
53
+ def self.high_surrogate(codepoint)
54
+ (((codepoint - 0x10000) / 0x400).floor + 0xd800)
55
+ end
56
+
57
+ # Calculate the low surrogate based on the Unicode code point.
58
+ # @param codepoint [Integer] decimal codepoint
59
+ # @return [Integer] decimal low surrogate
60
+ # @example
61
+ # Unisec::Surrogates.low_surrogate(128169) # => 56489
62
+ def self.low_surrogate(codepoint)
63
+ (((codepoint - 0x10000) % 0x400) + 0xdc00)
64
+ end
65
+
66
+ # Calculate the Unicode code point based on the surrogates.
67
+ # @param hs [Integer] decimal high surrogate
68
+ # @param ls [Integer] decimal low surrogate
69
+ # @return [Integer] decimal code point
70
+ # @example
71
+ # Unisec::Surrogates.code_point(55357, 56489) # => 128169
72
+ def self.code_point(hs, ls)
73
+ (((hs - 0xd800) * 0x400) + ls - 0xdc00 + 0x10000)
74
+ end
75
+
76
+ # Same as accessing {.hs}. Calculate the {.high_surrogate}.
77
+ # @return [Integer] decimal high surrogate
78
+ # @example
79
+ # surr = Unisec::Surrogates.new(128169)
80
+ # surr.high_surrogate # => 55357
81
+ def high_surrogate
82
+ @hs = Surrogates.high_surrogate(@cp)
83
+ end
84
+
85
+ # Same as accessing {.ls}. Calculate the {.low_surrogate}.
86
+ # @return [Integer] decimal low surrogate
87
+ # @example
88
+ # surr = Unisec::Surrogates.new(128169)
89
+ # surr.low_surrogate # => 56489
90
+ def low_surrogate
91
+ @ls = Surrogates.low_surrogate(@cp)
92
+ end
93
+
94
+ # Same as accessing {.cp}. Calculate the {.code_point}.
95
+ # @return [Integer] decimal code point
96
+ # surr = Unisec::Surrogates.new(55357, 56489)
97
+ # surr.code_point # => 128169
98
+ def code_point
99
+ @cp = Surrogates.code_point(@hs, @ls)
100
+ end
101
+
102
+ # Display a CLI-friendly output summurizing everithing about the surrogates:
103
+ # the corresponding character, code point, high and low surrogates
104
+ # (each displayed as hexadecimal, decimal and binary).
105
+ # @example
106
+ # surr = Unisec::Surrogates.new(128169)
107
+ # puts surr.display # =>
108
+ # # Char: 💩
109
+ # # Code Point: 0x1F4A9, 0d128169, 0b11111010010101001
110
+ # # High Surrogate: 0xD83D, 0d55357, 0b1101100000111101
111
+ # # Low Surrogate: 0xDCA9, 0d56489, 0b1101110010101001
112
+ def display
113
+ "Char: #{[@cp].pack('U*')}\n" \
114
+ "Code Point: 0x#{@cp.to_hex}, 0d#{@cp}, 0b#{@cp.to_bin}\n" \
115
+ "High Surrogate: 0x#{@hs.to_hex}, 0d#{@hs}, 0b#{@hs.to_bin}\n" \
116
+ "Low Surrogate: 0x#{@ls.to_hex}, 0d#{@ls}, 0b#{@ls.to_bin}"
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ctf_party'
4
+
5
+ class Integer
6
+ # Convert an integer to an hexadecimal string
7
+ # @return [String] The interger converted to hexadecimal and casted to an upper case string
8
+ # @example
9
+ # 42.to_hex # => "2A"
10
+ def to_hex
11
+ to_s(16).upcase
12
+ end
13
+
14
+ # Convert an integer to an binary string
15
+ # @return [String] The interger converted to binary and casted to a string
16
+ # @example
17
+ # 42.to_bin # => "101010"
18
+ def to_bin
19
+ to_s(2)
20
+ end
21
+ end
22
+
23
+ module Unisec
24
+ # Generic stuff not Unicode-related that can be re-used.
25
+ module Utils
26
+ # About string conversion and manipulation.
27
+ module String
28
+ # Convert a string input into the chosen type.
29
+ # @param input [String] If the target type is `:integer`, the string must represent a number encoded in
30
+ # hexadecimal, decimal, binary. If it's a Unicode string, only the first code point will be taken into account.
31
+ # @param target_type [Symbol] Convert to the chosen type. Currently only supports `:integer`.
32
+ # @return [Variable] The type of the output depends on the chosen `target_type`.
33
+ # @example
34
+ # Unisec::Utils::String.convert('0x1f4a9', :integer) # => 128169
35
+ def self.convert(input, target_type)
36
+ case target_type
37
+ when :integer
38
+ convert_to_integer(input)
39
+ else
40
+ raise TypeError, "Target type \"#{target_type}\" not avaible"
41
+ end
42
+ end
43
+
44
+ # Internal method used for {.convert}.
45
+ #
46
+ # Convert a string input into integer.
47
+ # @param input [String] The string must represent a number encoded in hexadecimal, decimal, binary. If it's a
48
+ # Unicode string, only the first code point will be taken into account. The input type is determined
49
+ # automatically based on the prefix.
50
+ # @return [Integer]
51
+ # @example
52
+ # # Hexadecimal
53
+ # Unisec::Utils::String.convert_to_integer('0x1f4a9') # => 128169
54
+ # # Decimal
55
+ # Unisec::Utils::String.convert_to_integer('0d128169') # => 128169
56
+ # # Binary
57
+ # Unisec::Utils::String.convert_to_integer('0b11111010010101001') # => 128169
58
+ # # Unicode string
59
+ # Unisec::Utils::String.convert_to_integer('💩') # => 128169
60
+ def self.convert_to_integer(input)
61
+ case autodetect(input)
62
+ when :hexadecimal
63
+ input.hex2dec(prefix: '0x').to_i
64
+ when :decimal
65
+ input.to_i
66
+ when :binary
67
+ input.bin2hex.hex2dec.to_i
68
+ when :string
69
+ input.codepoints.first
70
+ else
71
+ raise TypeError, "Input \"#{input}\" is not of the expected type"
72
+ end
73
+ end
74
+
75
+ # Internal method used for {.convert}.
76
+ #
77
+ # Autodetect the representation type of the string input.
78
+ # @param str [String] Input.
79
+ # @return [Symbol] the detected type: `:hexadecimal`, `:decimal`, `:binary`, `:string`.
80
+ # @example
81
+ # # Hexadecimal
82
+ # Unisec::Utils::String.autodetect('0x1f4a9') # => :hexadecimal
83
+ # # Decimal
84
+ # Unisec::Utils::String.autodetect('0d128169') # => :decimal
85
+ # # Binary
86
+ # Unisec::Utils::String.autodetect('0b11111010010101001') # => :binary
87
+ # # Unicode string
88
+ # Unisec::Utils::String.autodetect('💩') # => :string
89
+ def self.autodetect(str)
90
+ case str
91
+ when /0x[0-9a-fA-F]/
92
+ :hexadecimal
93
+ when /0d[0-9]+/
94
+ :decimal
95
+ when /0b[0-1]+/
96
+ :binary
97
+ else
98
+ :string
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Unisec
4
+ # Version of unisec library and app
5
+ VERSION = '0.0.1'
6
+ end
data/lib/unisec.rb ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'unisec/version'
4
+
5
+ require 'unisec/surrogates'
6
+ require 'unisec/hexdump'
7
+ require 'unisec/properties'
8
+ require 'unisec/confusables'
metadata ADDED
@@ -0,0 +1,143 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: unisec
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Alexandre ZANNI
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-07-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: ctf-party
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.3'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: dry-cli
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: paint
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.3'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: twitter_cldr
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '6.11'
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: 6.11.5
65
+ type: :runtime
66
+ prerelease: false
67
+ version_requirements: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - "~>"
70
+ - !ruby/object:Gem::Version
71
+ version: '6.11'
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: 6.11.5
75
+ - !ruby/object:Gem::Dependency
76
+ name: unicode-confusable
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '1.9'
82
+ type: :runtime
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '1.9'
89
+ description: Toolkit for security research manipulating Unicode
90
+ email: alexandre.zanni@europe.com
91
+ executables:
92
+ - unisec
93
+ extensions: []
94
+ extra_rdoc_files: []
95
+ files:
96
+ - LICENSE
97
+ - bin/unisec
98
+ - lib/unisec.rb
99
+ - lib/unisec/cli/cli.rb
100
+ - lib/unisec/cli/confusables.rb
101
+ - lib/unisec/cli/hexdump.rb
102
+ - lib/unisec/cli/properties.rb
103
+ - lib/unisec/cli/surrogates.rb
104
+ - lib/unisec/confusables.rb
105
+ - lib/unisec/hexdump.rb
106
+ - lib/unisec/properties.rb
107
+ - lib/unisec/surrogates.rb
108
+ - lib/unisec/utils.rb
109
+ - lib/unisec/version.rb
110
+ homepage: https://github.com/Acceis/unisec
111
+ licenses:
112
+ - MIT
113
+ metadata:
114
+ yard.run: yard
115
+ bug_tracker_uri: https://github.com/Acceis/unisec/issues
116
+ changelog_uri: https://github.com/Acceis/unisec/releases
117
+ documentation_uri: https://acceis.github.io/unisec/
118
+ homepage_uri: https://github.com/Acceis/unisec
119
+ source_code_uri: https://github.com/Acceis/unisec/
120
+ rubygems_mfa_required: 'true'
121
+ post_install_message:
122
+ rdoc_options: []
123
+ require_paths:
124
+ - lib
125
+ required_ruby_version: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - ">="
128
+ - !ruby/object:Gem::Version
129
+ version: 3.0.0
130
+ - - "<"
131
+ - !ruby/object:Gem::Version
132
+ version: '4.0'
133
+ required_rubygems_version: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ">="
136
+ - !ruby/object:Gem::Version
137
+ version: '0'
138
+ requirements: []
139
+ rubygems_version: 3.4.1
140
+ signing_key:
141
+ specification_version: 4
142
+ summary: Unicode Security Toolkit
143
+ test_files: []