unisec 0.0.2 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,170 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'unisec/utils'
4
+ require 'ctf_party'
5
+
6
+ module Unisec
7
+ # Manipulation of bidirectional related content
8
+ class Bidi
9
+ # Attack using BiDi code points like RtLO, for example, for spoofing a domain name or a file name
10
+ class Spoof
11
+ # The target string to spoof (eg. URL, domain or file name)
12
+ # @return [String] the target string
13
+ attr_reader :target_display
14
+
15
+ # Set a new target string to spoof
16
+ #
17
+ # It will automatically set `@spoof_string` and `@spoof_payload` as well.
18
+ # @param input [String] the target string
19
+ # @param opts [Hash] optional parameters, see {Spoof.bidi_affix}
20
+ # @return [String] the target string
21
+ def set_target_display(input, **opts)
22
+ @target_display = input
23
+ @spoof_string = reverse(**opts)
24
+ @spoof_payload = bidi_affix(**opts)
25
+ @target_display
26
+ end
27
+
28
+ # The string for the spoofing attack without the BiDi characters
29
+ # @return [String] the spoof string (without BiDi)
30
+ attr_reader :spoof_string
31
+
32
+ # The string for the spoofing attack with the BiDi characters. (Spoof payload = spoof string + BiDi)
33
+ # @return [String] the spoof string (with BiDi)
34
+ attr_reader :spoof_payload
35
+
36
+ # @param input [String] the target string
37
+ # @param opts [Hash] optional parameters, see {Spoof.bidi_affix}
38
+ # @example
39
+ # bd = Unisec::Bidi::Spoof.new('https://moc.example.org//:sptth')
40
+ # bd.target_display # => "https://moc.example.org//:sptth"
41
+ # bd.spoof_string # => "https://gro.elpmaxe.com//:sptth"
42
+ # bd.spoof_payload => "‮https://gro.elpmaxe.com//:sptth‬"
43
+ def initialize(input, **opts)
44
+ opts[:index] ||= opts[:infix_pos]
45
+
46
+ @target_display = input
47
+ @spoof_string = reverse(**opts)
48
+ @spoof_payload = bidi_affix(**opts)
49
+ end
50
+
51
+ # Reverse the (sub)-string (grapheme cluster aware)
52
+ # @param target [String] string to reverse
53
+ # @param opts [Hash] optional parameters
54
+ # @option opts [String] :index Index at which the revese starts (before this position will be left untouched)
55
+ # @return [String] the reversed string
56
+ # @example
57
+ # Unisec::Bidi::Spoof.reverse('document_anntxt.exe', index: 12)
58
+ # # => "document_annexe.txt"
59
+ #
60
+ # Unisec::Bidi::Spoof.reverse("🇫🇷🐓")
61
+ # # => "🐓🇫🇷"
62
+ def self.reverse(target, **opts)
63
+ opts[:index] ||= 0
64
+
65
+ target[0...opts[:index]] + Unisec::Utils::String.grapheme_reverse(target[opts[:index]..])
66
+ end
67
+
68
+ # Call {Spoof.reverse} with `@target_display` as default input (target).
69
+ def reverse(**opts)
70
+ Spoof.reverse(@target_display, **opts)
71
+ end
72
+
73
+ # Inject BiDi characters into the input string
74
+ # @param input [String] input string
75
+ # @param opts [Hash] optional parameters
76
+ # @option opts [String] :prefix Prefix Bidi. Default: RLO (U+202E).
77
+ # @option opts [String] :suffix Suffix Bidi. Default: PDF (U+202C).
78
+ # @option opts [String] :infix_bidi Bidi injected at a chosen position. Default: none (empty string).
79
+ # @option opts [String] :infix_pos Position (index) where to inject an extra BiDi. Default: 0.
80
+ # @return [String] spoof payload (input string with injected BiDi)
81
+ # @example
82
+ # # By default inject a RLO prefix, a PDF suffix and no infix.
83
+ # Unisec::Bidi::Spoof.bidi_affix('acceis')
84
+ # # => "‮acceis‬"
85
+ #
86
+ # # RLI ... PDI
87
+ # Unisec::Bidi::Spoof.bidi_affix('acceis', prefix: "\u{2067}", suffix: "\u{2069}")
88
+ # # => "⁧acceis⁩"
89
+ #
90
+ # # RLE ... PDF
91
+ # Unisec::Bidi::Spoof.bidi_affix('acceis', prefix: "\u{202B}", suffix: "\u{202C}")
92
+ # # => "‫acceis‬"
93
+ #
94
+ # # RLO ... PDF
95
+ # Unisec::Bidi::Spoof.bidi_affix('https://moc.example.org//:sptth', prefix: "\u{202E}", suffix: "\u{202C}")
96
+ # # => "‮https://moc.example.org//:sptth‬"
97
+ #
98
+ # # FSI RLO ... PDF PDI
99
+ # Unisec::Bidi::Spoof.bidi_affix('https://moc.example.org//:sptth', prefix: "\u{2068 202E}", suffix: "\u{202C 2069}")
100
+ # # => "⁨‮https://moc.example.org//:sptth‬⁩"
101
+ #
102
+ # # RLM ...
103
+ # Unisec::Bidi::Spoof.bidi_affix('unicode', prefix: "\u{200F}", suffix: '')
104
+ # # => "‏unicode"
105
+ #
106
+ # # For file name spoofing, it is useful to be able to inject just a RLO before the fake extension
107
+ # # so we can void the prefix and suffix and just set the position of an infix
108
+ # ex = Unisec::Bidi::Spoof.bidi_affix('document_anntxt.exe', prefix: '', suffix: '', infix_bidi: "\u{202E}", infix_pos: 12)
109
+ # # => "document_ann‮txt.exe"
110
+ # puts ex
111
+ # # document_ann‮txt.exe
112
+ def self.bidi_affix(input, **opts)
113
+ opts[:prefix] ||= "\u{202E}" # RLO
114
+ opts[:suffix] ||= "\u{202C}" # PDF
115
+ opts[:infix_bidi] ||= ''
116
+ opts[:infix_pos] ||= 0
117
+
118
+ out = "#{opts[:prefix]}#{input}#{opts[:suffix]}"
119
+ out.insert(opts[:infix_pos], opts[:infix_bidi])
120
+ out
121
+ end
122
+
123
+ # Call {Spoof.bidi_affix} with `@spoof_string` as input.
124
+ def bidi_affix(**opts)
125
+ Spoof.bidi_affix(@spoof_string, **opts)
126
+ end
127
+
128
+ # Display a CLI-friendly output summurizing the spoof payload
129
+ #
130
+ # The light version displays only the spoof payload for easy piping with other commands.
131
+ # @param light [Boolean] `true` = light display (displays only the spoof payload for easy piping with other commands), `false` (default) = full display.
132
+ # @example
133
+ # puts Unisec::Bidi::Spoof.new('noraj').display
134
+ # # Target string: noraj
135
+ # # Spoof payload (display) ⚠: ‮jaron‬
136
+ # # Spoof string 🛈: jaron
137
+ # # Spoof payload (hex): e280ae6a61726f6ee280ac
138
+ # # Spoof payload (hex, escaped): \xe2\x80\xae\x6a\x61\x72\x6f\x6e\xe2\x80\xac
139
+ # # Spoof payload (base64): 4oCuamFyb27igKw=
140
+ # # Spoof payload (urlencode): %E2%80%AEjaron%E2%80%AC
141
+ # # Spoof payload (code points): U+202E U+006A U+0061 U+0072 U+006F U+006E U+202C
142
+ # #
143
+ # #
144
+ # #
145
+ # # ⚠: for the spoof payload to display correctly, be sure your VTE has RTL support, e.g. see https://wiki.archlinux.org/title/Bidirectional_text#Terminal.
146
+ # # 🛈: Does not contain the BiDi character (e.g. RtLO).
147
+ #
148
+ # puts Unisec::Bidi::Spoof.new('noraj').display(light: true)
149
+ # # ‮jaron‬
150
+ def display(light: false)
151
+ if light == false # full display
152
+ "Target string: #{@target_display}\n" \
153
+ "Spoof payload (display) ⚠: #{@spoof_payload}\n" \
154
+ "Spoof string 🛈: #{@spoof_string}\n" \
155
+ "Spoof payload (hex): #{@spoof_payload.to_hex}\n" \
156
+ "Spoof payload (hex, escaped): #{@spoof_payload.to_hex(prefixall: '\\x')}\n" \
157
+ "Spoof payload (base64): #{@spoof_payload.to_b64}\n" \
158
+ "Spoof payload (urlencode): #{@spoof_payload.urlencode}\n" \
159
+ "Spoof payload (code points): #{Unisec::Properties.chars2codepoints(@spoof_payload)}\n" \
160
+ "\n\n\n" \
161
+ '⚠: for the spoof payload to display correctly, be sure your VTE has RTL support, ' \
162
+ "e.g. see https://wiki.archlinux.org/title/Bidirectional_text#Terminal.\n" \
163
+ '🛈: Does not contain the BiDi character (e.g. RtLO).'
164
+ else # light display
165
+ @spoof_payload
166
+ end
167
+ end
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+ require 'unisec/utils'
6
+
7
+ module Unisec
8
+ module CLI
9
+ module Commands
10
+ # CLI sub-commands `unisec bidi xxx` for the class {Unisec::Bidi} from the lib.
11
+ module Bidi
12
+ # Command `unisec bidi spoof`
13
+ #
14
+ # Example:
15
+ #
16
+ # ```plaintext
17
+ # $ unisec bidi spoof noraj
18
+ # Target string: noraj
19
+ # Spoof payload (display) ⚠: ‮jaron‬
20
+ # Spoof string 🛈: jaron
21
+ # Spoof payload (hex): e280ae6a61726f6ee280ac
22
+ # Spoof payload (hex, escaped): \xe2\x80\xae\x6a\x61\x72\x6f\x6e\xe2\x80\xac
23
+ # Spoof payload (base64): 4oCuamFyb27igKw=
24
+ # Spoof payload (urlencode): %E2%80%AEjaron%E2%80%AC
25
+ # Spoof payload (code points): U+202E U+006A U+0061 U+0072 U+006F U+006E U+202C
26
+ #
27
+ #
28
+ #
29
+ # ⚠: for the spoof payload to display correctly, be sure your VTE has RTL support, e.g. see https://wiki.archlinux.org/title/Bidirectional_text#Terminal.
30
+ # 🛈: Does not contain the BiDi character (e.g. RtLO).
31
+ #
32
+ # $ unisec bidi spoof 'document_annexe.txt' --prefix '' --suffix '' --infix-bidi $'\U202E' --infix-pos 12 --light=true
33
+ # document_ann‮txt.exe
34
+ # ```
35
+ class Spoof < Dry::CLI::Command
36
+ desc 'Craft a payload for BiDi attacks (for example, for spoofing a domain name or a file name)'
37
+
38
+ argument :input, required: true,
39
+ desc: 'String input'
40
+ option :light, default: false, values: %w[true false],
41
+ desc: 'true = light display (displays only the spoof payload for easy piping with other ' \
42
+ 'commands), false = full display'
43
+ option :prefix, default: nil, desc: 'Prefix Bidi. Default: RLO (U+202E).'
44
+ option :suffix, default: nil, desc: 'Suffix Bidi. Default: PDF (U+202C).'
45
+ option :infix_bidi, default: nil, desc: 'Bidi injected at a chosen position. Default: none (empty string).'
46
+ option :infix_pos, default: nil, desc: 'Spoof payload (input string with injected BiDi)'
47
+
48
+ # Craft a payload for BiDi attacks
49
+ # @param input [String] Input string to spoof
50
+ # @param options [Hash] optional parameters, see {Unisec::Bidi::Spoof.bidi_affix}
51
+ def call(input: nil, **options)
52
+ to_bool = ->(str) { ['true', true].include?(str) }
53
+ light = to_bool.call(options.fetch(:light))
54
+ infix_pos = options[:infix_pos].to_i unless options[:infix_pos].nil?
55
+ puts Unisec::Bidi::Spoof.new(input, prefix: options[:prefix], suffix: options[:suffix],
56
+ infix_bidi: options[:infix_bidi],
57
+ infix_pos: infix_pos).display(light: light)
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -1,11 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'unisec/cli/surrogates'
3
+ require 'unisec/cli/bidi'
4
+ require 'unisec/cli/confusables'
4
5
  require 'unisec/cli/hexdump'
5
6
  require 'unisec/cli/properties'
6
- require 'unisec/cli/confusables'
7
- require 'unisec/cli/versions'
7
+ require 'unisec/cli/rugrep'
8
8
  require 'unisec/cli/size'
9
+ require 'unisec/cli/surrogates'
10
+ require 'unisec/cli/versions'
9
11
 
10
12
  module Unisec
11
13
  # Module used to create the CLI for the executable
@@ -16,16 +18,18 @@ module Unisec
16
18
 
17
19
  # Mapping between the (sub-)commands as seen by the user
18
20
  # on the command-line interface and the CLI modules in the lib
19
- register 'surrogates to', Surrogates::To
20
- register 'surrogates from', Surrogates::From
21
- register 'hexdump', Hexdump
22
- register 'properties list', Properties::List
23
- register 'properties codepoints', Properties::Codepoints
24
- register 'properties char', Properties::Char
21
+ register 'bidi spoof', Bidi::Spoof
25
22
  register 'confusables list', Confusables::List
26
23
  register 'confusables randomize', Confusables::Randomize
27
- register 'versions', Versions
24
+ register 'grep', Grep
25
+ register 'hexdump', Hexdump
26
+ register 'properties char', Properties::Char
27
+ register 'properties codepoints', Properties::Codepoints
28
+ register 'properties list', Properties::List
28
29
  register 'size', Size
30
+ register 'surrogates from', Surrogates::From
31
+ register 'surrogates to', Surrogates::To
32
+ register 'versions', Versions
29
33
  end
30
34
  end
31
35
  end
@@ -22,12 +22,21 @@ module Unisec
22
22
  desc 'Hexdump in all Unicode encodings'
23
23
 
24
24
  argument :input, required: true,
25
- desc: 'String input'
25
+ desc: 'String input. Read from STDIN if equal to -.'
26
+
27
+ option :enc, default: nil, values: %w[utf8 utf16be utf16le utf32be utf32le],
28
+ desc: 'Output only in the specified encoding.'
26
29
 
27
30
  # Hexdump of all Unicode encodings.
28
31
  # @param input [String] Input string to encode
29
- def call(input: nil, **)
30
- puts Unisec::Hexdump.new(input).display
32
+ def call(input: nil, **options)
33
+ input = $stdin.read.chomp if input == '-'
34
+ if options[:enc].nil?
35
+ puts Unisec::Hexdump.new(input).display
36
+ else
37
+ # using send() is safe here thanks to the value whitelist
38
+ puts puts Unisec::Hexdump.send(options[:enc], input)
39
+ end
31
40
  end
32
41
  end
33
42
  end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/cli'
4
+ require 'unisec'
5
+
6
+ module Unisec
7
+ module CLI
8
+ module Commands
9
+ # CLI command `unisec grep` for the class {Unisec::Rugrep} from the lib.
10
+ #
11
+ # Example:
12
+ #
13
+ # ```plaintext
14
+ # $ unisec grep 'FRENCH \w+'
15
+ # U+20A3 ₣ FRENCH FRANC SIGN
16
+ # U+1F35F 🍟 FRENCH FRIES
17
+ # ```
18
+ class Grep < Dry::CLI::Command
19
+ desc 'Search for Unicode code point names by regular expression'
20
+
21
+ argument :regexp, required: true,
22
+ desc: 'regular expression'
23
+
24
+ # Hexdump of all Unicode encodings.
25
+ # @param regexp [Regexp] Regular expression without delimiters or modifiers.
26
+ # Supports everything Ruby Regexp supports
27
+ def call(regexp: nil, **)
28
+ puts Unisec::Rugrep.regrep_display(regexp)
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -50,7 +50,7 @@ module Unisec
50
50
  def self.codepoints_display(prop)
51
51
  codepoints = Properties.codepoints(prop)
52
52
  codepoints.each do |cp|
53
- puts "#{Properties.char2codepoint(cp[:char]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
53
+ puts "#{Properties.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
54
54
  end
55
55
  nil
56
56
  end
@@ -158,7 +158,7 @@ module Unisec
158
158
  # @example
159
159
  # Unisec::Properties.char2codepoint('💎') # => "U+1F48E"
160
160
  def self.char2codepoint(chr)
161
- "U+#{format('%.4x', chr.codepoints.first).upcase}"
161
+ Properties.deccp2stdhexcp(chr.codepoints.first)
162
162
  end
163
163
 
164
164
  # Display the code points in Unicode format for the given characters (code points as string)
@@ -174,5 +174,14 @@ module Unisec
174
174
  end
175
175
  out.join(' ')
176
176
  end
177
+
178
+ # Convert from decimal code point to standardized format hexadecimal code point
179
+ # @param int_cp [Integer] Code point in decimal format
180
+ # @return [String] code point in Unicode format
181
+ # @example
182
+ # Unisec::Properties.intcp2stdhexcp(128640) # => "U+1F680"
183
+ def self.deccp2stdhexcp(int_cp)
184
+ "U+#{format('%.4x', int_cp).upcase}"
185
+ end
177
186
  end
178
187
  end
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'twitter_cldr'
4
+ require 'paint'
5
+
6
+ module Unisec
7
+ # Ruby grep : Ruby regular expression search for Unicode code point names
8
+ class Rugrep
9
+ # UCD Derived names file location
10
+ # @see https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedName.txt
11
+ UCD_DERIVEDNAME = File.join(__dir__, '../../data/DerivedName.txt')
12
+
13
+ # Search code points by (Ruby) regexp
14
+ # @param regexp [Regexp] Regular expression without delimiters or modifiers.
15
+ # Supports everything Ruby Regexp supports
16
+ # @return [Array<Hash>] Array of code points (`{char: String, codepoint: Integer, name: String}`)
17
+ # @example
18
+ # Unisec::Rugrep.regrep('snowman|snowflake')
19
+ # # =>
20
+ # # [{:char=>"☃", :codepoint=>9731, :name=>"SNOWMAN"},
21
+ # # {:char=>"⛄", :codepoint=>9924, :name=>"SNOWMAN WITHOUT SNOW"},
22
+ # # {:char=>"⛇", :codepoint=>9927, :name=>"BLACK SNOWMAN"},
23
+ # # {:char=>"❄", :codepoint=>10052, :name=>"SNOWFLAKE"},
24
+ # # {:char=>"❅", :codepoint=>10053, :name=>"TIGHT TRIFOLIATE SNOWFLAKE"},
25
+ # # {:char=>"❆", :codepoint=>10054, :name=>"HEAVY CHEVRON SNOWFLAKE"}]
26
+ # Unisec::Rugrep.regrep('greek small letter \w+')
27
+ # # =>
28
+ # # [{:char=>"ͱ", :codepoint=>881, :name=>"GREEK SMALL LETTER HETA"},
29
+ # # {:char=>"ͳ", :codepoint=>883, :name=>"GREEK SMALL LETTER ARCHAIC SAMPI"},
30
+ # # {:char=>"ͷ", :codepoint=>887, :name=>"GREEK SMALL LETTER PAMPHYLIAN DIGAMMA"},
31
+ # # …]
32
+ def self.regrep(regexp)
33
+ out = []
34
+ file = File.new(UCD_DERIVEDNAME)
35
+ file.each_line(chomp: true) do |line|
36
+ # Skip if the line is empty or a comment
37
+ next if line.empty? || line[0] == '#'
38
+
39
+ # parse the line to extract code point as integer and the name
40
+ cp_int, name = line.split(';')
41
+ cp_int = cp_int.chomp.to_i(16)
42
+ name.lstrip!
43
+ next unless /#{regexp}/i.match?(name) # compiling regexp once is surprisingly not faster
44
+
45
+ out << {
46
+ char: TwitterCldr::Utils::CodePoints.to_string([cp_int]),
47
+ codepoint: cp_int,
48
+ name: name
49
+ }
50
+ end
51
+ out
52
+ end
53
+
54
+ # Display a CLI-friendly output listing all code points corresponding to a regular expression.
55
+ # @example
56
+ # Unisec::Rugrep.regrep_display('snowman|snowflake')
57
+ # # =>
58
+ # # U+2603 ☃ SNOWMAN
59
+ # # U+26C4 ⛄ SNOWMAN WITHOUT SNOW
60
+ # # U+26C7 ⛇ BLACK SNOWMAN
61
+ # # U+2744 ❄ SNOWFLAKE
62
+ # # U+2745 ❅ TIGHT TRIFOLIATE SNOWFLAKE
63
+ # # U+2746 ❆ HEAVY CHEVRON SNOWFLAKE
64
+ def self.regrep_display(regexp)
65
+ codepoints = regrep(regexp)
66
+ codepoints.each do |cp|
67
+ puts "#{Properties.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
68
+ end
69
+ nil
70
+ end
71
+
72
+ # Returns the version of Unicode used in UCD local file (data/DerivedName.txt)
73
+ # @return [String] Unicode version
74
+ # @example
75
+ # Unisec::Rugrep.ucd_derivedname_version # => "15.1.0"
76
+ def self.ucd_derivedname_version
77
+ first_line = File.open(UCD_DERIVEDNAME, &:readline)
78
+ first_line.match(/-(\d+\.\d+\.\d+)\.txt/).captures.first
79
+ end
80
+
81
+ # Search code points by (Ruby) regexp
82
+ # @param regexp [Regexp] Regular expression without delimiters or modifiers
83
+ # @return [Array<Hash>] Array of code points (`{char: String, codepoint: Integer, name: String}`)
84
+ # @example
85
+ # Unisec::Rugrep.regrep_slow('snowman|snowflake')
86
+ # # =>
87
+ # # [{:char=>"☃", :codepoint=>9731, :name=>"SNOWMAN"},
88
+ # # {:char=>"⛄", :codepoint=>9924, :name=>"SNOWMAN WITHOUT SNOW"},
89
+ # # {:char=>"⛇", :codepoint=>9927, :name=>"BLACK SNOWMAN"},
90
+ # # {:char=>"❄", :codepoint=>10052, :name=>"SNOWFLAKE"},
91
+ # # {:char=>"❅", :codepoint=>10053, :name=>"TIGHT TRIFOLIATE SNOWFLAKE"},
92
+ # # {:char=>"❆", :codepoint=>10054, :name=>"HEAVY CHEVRON SNOWFLAKE"}]
93
+ # @note ⚠ This command is very time consuming (~ 1min) and unoptimized (execute one regexp per code point…)
94
+ def self.regrep_slow(regexp)
95
+ out = []
96
+ TwitterCldr::Shared::CodePoint.each do |cp|
97
+ next unless /#{regexp}/oi.match?(cp.name) # compiling regexp once is surprisingly not faster
98
+
99
+ out << {
100
+ char: TwitterCldr::Utils::CodePoints.to_string([cp.code_point]),
101
+ codepoint: cp.code_point,
102
+ name: cp.name
103
+ }
104
+ end
105
+ out
106
+ end
107
+
108
+ # Display a CLI-friendly output listing all code points corresponding to a regular expression.
109
+ # @example
110
+ # Unisec::Rugrep.regrep_display_slow('snowman|snowflake')
111
+ # # =>
112
+ # # U+2603 ☃ SNOWMAN
113
+ # # U+26C4 ⛄ SNOWMAN WITHOUT SNOW
114
+ # # U+26C7 ⛇ BLACK SNOWMAN
115
+ # # U+2744 ❄ SNOWFLAKE
116
+ # # U+2745 ❅ TIGHT TRIFOLIATE SNOWFLAKE
117
+ # # U+2746 ❆ HEAVY CHEVRON SNOWFLAKE
118
+ def self.regrep_display_slow(regexp)
119
+ codepoints = regrep_slow(regexp)
120
+ codepoints.each do |cp|
121
+ puts "#{Properties.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
122
+ end
123
+ nil
124
+ end
125
+ end
126
+ end
data/lib/unisec/utils.rb CHANGED
@@ -98,6 +98,16 @@ module Unisec
98
98
  :string
99
99
  end
100
100
  end
101
+
102
+ # Reverse a string by graphemes (not by code points)
103
+ # @return [String] the reversed string
104
+ # @example
105
+ # b = "\u{1f1eb}\u{1f1f7}\u{1F413}" # => "🇫🇷🐓"
106
+ # b.reverse # => "🐓🇷🇫"
107
+ # Unisec::Utils::String.grapheme_reverse(b) # => "🐓🇫🇷"
108
+ def self.grapheme_reverse(str)
109
+ str.grapheme_clusters.reverse.join
110
+ end
101
111
  end
102
112
  end
103
113
  end
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Unisec
4
4
  # Version of unisec library and app
5
- VERSION = '0.0.2'
5
+ VERSION = '0.0.4'
6
6
  end
@@ -51,6 +51,10 @@ module Unisec
51
51
  unicodeconfusable_unicode: {
52
52
  version: Unicode::Confusable::UNICODE_VERSION,
53
53
  label: 'Unicode (unicode-confusable gem)'
54
+ },
55
+ ucd_derivedname: {
56
+ version: Unisec::Rugrep.ucd_derivedname_version,
57
+ label: 'UCD (data/DerivedName.txt)'
54
58
  }
55
59
  }
56
60
  end
@@ -76,6 +80,7 @@ module Unisec
76
80
  display.call(:twittercldr_icu)
77
81
  display.call(:twittercldr_cldr)
78
82
  display.call(:ruby_unicode_emoji)
83
+ display.call(:ucd_derivedname)
79
84
  puts Paint["\nGems:", :underline]
80
85
  display.call(:unisec)
81
86
  display.call(:twittercldr)
data/lib/unisec.rb CHANGED
@@ -2,9 +2,11 @@
2
2
 
3
3
  require 'unisec/version'
4
4
 
5
- require 'unisec/surrogates'
5
+ require 'unisec/bidi'
6
+ require 'unisec/confusables'
6
7
  require 'unisec/hexdump'
7
8
  require 'unisec/properties'
8
- require 'unisec/confusables'
9
- require 'unisec/versions'
9
+ require 'unisec/rugrep'
10
10
  require 'unisec/size'
11
+ require 'unisec/surrogates'
12
+ require 'unisec/versions'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unisec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexandre ZANNI
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-08-18 00:00:00.000000000 Z
11
+ date: 2024-01-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ctf-party
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2.3'
19
+ version: '3.0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.3'
26
+ version: '3.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: dry-cli
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -86,7 +86,9 @@ dependencies:
86
86
  - - "~>"
87
87
  - !ruby/object:Gem::Version
88
88
  version: '1.9'
89
- description: Toolkit for security research manipulating Unicode
89
+ description: 'Toolkit for security research manipulating Unicode: confusables, homoglyphs,
90
+ hexdump, code point, UTF-8, UTF-16, UTF-32, properties, regexp search, size, grapheme,
91
+ surrogates, version, ICU, CLDR, UCD'
90
92
  email: alexandre.zanni@europe.com
91
93
  executables:
92
94
  - unisec
@@ -95,17 +97,22 @@ extra_rdoc_files: []
95
97
  files:
96
98
  - LICENSE
97
99
  - bin/unisec
100
+ - data/DerivedName.txt
98
101
  - lib/unisec.rb
102
+ - lib/unisec/bidi.rb
103
+ - lib/unisec/cli/bidi.rb
99
104
  - lib/unisec/cli/cli.rb
100
105
  - lib/unisec/cli/confusables.rb
101
106
  - lib/unisec/cli/hexdump.rb
102
107
  - lib/unisec/cli/properties.rb
108
+ - lib/unisec/cli/rugrep.rb
103
109
  - lib/unisec/cli/size.rb
104
110
  - lib/unisec/cli/surrogates.rb
105
111
  - lib/unisec/cli/versions.rb
106
112
  - lib/unisec/confusables.rb
107
113
  - lib/unisec/hexdump.rb
108
114
  - lib/unisec/properties.rb
115
+ - lib/unisec/rugrep.rb
109
116
  - lib/unisec/size.rb
110
117
  - lib/unisec/surrogates.rb
111
118
  - lib/unisec/utils.rb
@@ -140,7 +147,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
147
  - !ruby/object:Gem::Version
141
148
  version: '0'
142
149
  requirements: []
143
- rubygems_version: 3.4.1
150
+ rubygems_version: 3.5.3
144
151
  signing_key:
145
152
  specification_version: 4
146
153
  summary: Unicode Security Toolkit