heitt 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HEITT
4
+ VERSION = "0.4.1"
5
+ GITHUB = "https://github.com/jobotow/heitt"
6
+ end
data/lib/heitt.rb ADDED
@@ -0,0 +1,357 @@
1
+ require 'json'
2
+ require 'set'
3
+ require 'strscan'
4
+ require 'colorize'
5
+ require_relative 'heitt/database'
6
+ require_relative 'heitt/version'
7
+
8
+ module HEITT
9
+ module Color
10
+ def self.colorize(text, color, *styles)
11
+ return text unless STDOUT.isatty #&& !(defined?(Flags) && Flags.no_color)
12
+
13
+ colored = text.colorize(color)
14
+ styles.each do |style|
15
+ colored = colored.send(style)
16
+ end
17
+ colored
18
+ end
19
+ end
20
+ #private_constant :Color
21
+
22
+
23
+ module Grouper
24
+
25
+ def self.group(results)
26
+ clusters = {}
27
+
28
+ clusters = results.group_by {|r| r[:candidates].first[:name]}
29
+ groups = clusters.each_with_index.map do |(name, group), index|
30
+ hashes = group.map {|r| r[:hash]}
31
+ {
32
+ cluster_id: index+1,
33
+ hashes: hashes,
34
+ candidates: group.first[:candidates],
35
+ count: hashes.size
36
+ }
37
+ end
38
+ groups
39
+ end
40
+ end
41
+
42
+
43
+
44
+ module Analyzer
45
+ def self.analyze(text, database: HEITT::DATABASE)
46
+ keyword_counts = keyword_counts(text.downcase, database: database)
47
+ algorithm_scores(keyword_counts, database: database)
48
+ end
49
+
50
+ def self.extract_prefix(text, offset)
51
+ line_start = text.rindex("\n", offset) || 0
52
+ text[line_start...offset]
53
+ end
54
+
55
+ def self.high_entropy?(text, min_ent)
56
+ entropy(text) >= min_ent
57
+ end
58
+
59
+
60
+ def self.score_candidates(modes, delim_prefix, context_scores)
61
+ prefix_matched_mode = nil
62
+ #context based scoring
63
+ matches = modes.map do |mode|
64
+ score_data = context_scores[mode[:name]]
65
+ score = score_data || 0
66
+
67
+ if prefix_match?(mode, delim_prefix)
68
+ #boost score as confidence is high if prefix matched
69
+ prefix_matched_mode = mode[:name]
70
+ score += 20
71
+ end
72
+ #puts "MODENAME: #{mode[:name]}"
73
+ {
74
+ name: mode[:name],
75
+ hashcat: mode[:hashcat],
76
+ john: mode[:john],
77
+ description: mode[:description],
78
+ extended: mode[:extended],
79
+ score: score
80
+ }
81
+ end
82
+ return [] if matches.empty?
83
+
84
+ #calculate confidence
85
+ scores_hash = matches.map {|m| [m[:name], m[:score]]}.to_h
86
+
87
+ confidences = assign_confidence(scores_hash, prefix_matched_mode)
88
+ matches.map{|m| m.merge(confidence: confidences[m[:name]])}.sort_by {|m| -m[:score]}
89
+ end
90
+
91
+
92
+ private
93
+ def self.get_modes(entry)
94
+ entry[:modes] || entry[:algorithms] || entry[:hashes] ||
95
+ entry[:candidates] || entry[:types] || entry[:hashtypes]
96
+ end
97
+
98
+ #this code is a copy or inspiration of "https://github.com/chrisjchandler/entropy/blob/main/entropy.go"
99
+ def self.entropy(text)
100
+ frequency = Hash.new(0)
101
+ text.each_char { |ch| frequency[ch] += 1 }
102
+
103
+ #calculate the total number of characters
104
+ total = text.length.to_f
105
+ #caluclate entropy
106
+ entropy = 0.0
107
+ frequency.each_value do |count|
108
+ probability = count.to_f / total
109
+ entropy += probability * Math.log2(probability)
110
+ end
111
+ #negate the sum as entropy is positive
112
+ -entropy
113
+ end
114
+
115
+ def self.keyword_counts(content_lower, database: HEITT::DATABASE)
116
+ keywords = database.flat_map do |entry|
117
+ modes = get_modes(entry)
118
+ next [] unless modes
119
+ modes.flat_map {|mode| mode[:context] || []}
120
+ end.uniq.map(&:downcase)
121
+
122
+ counts = {}
123
+ keywords.each do |kw|
124
+ count = content_lower.scan(/\b#{Regexp.escape(kw)}\b/).size
125
+ counts[kw] = count if count > 0
126
+ end
127
+ counts
128
+ end
129
+
130
+
131
+ def self.algorithm_scores(keyword_counts, database: HEITT::DATABASE)
132
+ scores = {}
133
+ return scores if keyword_counts.nil?
134
+
135
+ database.each do |entry|
136
+ modes = get_modes(entry)
137
+ next unless modes
138
+ modes.each do |mode|
139
+ contexts = mode[:context] || []
140
+ next if contexts.empty?
141
+ total = contexts.sum {|kw| keyword_counts[kw.downcase] || 0}
142
+ scores[mode[:name]] = total if total > 0
143
+ end
144
+ end
145
+ scores
146
+ end
147
+
148
+
149
+ def self.prefix_match?(mode, delim_prefix)
150
+ prefixes = mode[:prefixes] || []
151
+ return false if prefixes.empty?
152
+
153
+ delimiters = "= : "
154
+ raw_prefix = delim_prefix.strip.split(/[#{Regexp.escape(delimiters)}]/).last&.strip&.downcase
155
+ prefixes.map(&:downcase).include?(raw_prefix)
156
+ end
157
+
158
+
159
+ def self.assign_confidence(scores_hash, prefix_matched_mode=nil)
160
+ all_scores = scores_hash.values
161
+
162
+ return {} if all_scores.empty?
163
+
164
+ avg_score = all_scores.sum.to_f / all_scores.size
165
+
166
+ scores_hash.transform_values do |score|
167
+ if score == 0
168
+ "regex-match"
169
+ else
170
+ mode_name = scores_hash.key(score)
171
+ is_prefix_mode = (prefix_matched_mode == mode_name)
172
+ deviation = (score - avg_score) / avg_score
173
+
174
+ case deviation
175
+ when 2.0..Float::INFINITY
176
+ "high"
177
+ when 0.5..2.0
178
+ is_prefix_mode ? "high" : "medium-high"
179
+ else
180
+ is_prefix_mode ? "medium-high" : "medium-low"
181
+ end
182
+ end
183
+ end
184
+ end
185
+ end
186
+ #private_constant :Analyzer
187
+
188
+
189
+
190
+ module Scanner
191
+
192
+ def self.scan(input, database: HEITT::DATABASE, min_entropy: 3.5)
193
+ text = File.exist?(input) ? File.read(input) : input
194
+ context_scores = HEITT::Analyzer.analyze(text, database: database)
195
+ found = {}#[]
196
+ seen = {}
197
+
198
+
199
+ database.each do |entry|
200
+ regex = get_regex(entry)
201
+ modes = get_modes(entry)
202
+ next unless regex && modes && !modes.empty?
203
+ pattern = regex.is_a?(Regexp) ? regex : Regexp.new(regex)
204
+ scanner = StringScanner.new(text)
205
+
206
+ while scanner.scan_until(pattern)
207
+ matched = scanner.matched
208
+ next unless matched.length < 8 || HEITT::Analyzer.high_entropy?(matched, min_entropy)
209
+ offset = scanner.pos - matched.length
210
+ delim_prefix = HEITT::Analyzer.extract_prefix(text, offset)
211
+
212
+ candidates = HEITT::Analyzer.score_candidates(modes, delim_prefix, context_scores)
213
+ score = candidates.first[:score]
214
+ if score > (seen[matched] || -1)
215
+ seen[matched] = score
216
+ found[matched] = {hash: matched, candidates: candidates}
217
+ end
218
+ end
219
+ end
220
+ found.values
221
+ end
222
+
223
+ private
224
+
225
+ def self.get_regex(entry)
226
+ entry[:extract_regex] || entry[:regex] || entry[:pattern] || entry[:regexp]
227
+ end
228
+
229
+ def self.get_modes(entry)
230
+ entry[:modes] || entry[:algorithms] || entry[:hashes] ||
231
+ entry[:candidates] || entry[:types] || entry[:hashtypes]
232
+ end
233
+ end
234
+
235
+
236
+ module Formatter
237
+
238
+
239
+ def self.tree(groups, verbose: false, extended: false, show_regex_match: false)
240
+ result = ""
241
+
242
+ #Filter out groups with extended candidates as true
243
+ visible_groups = groups.select do |group|
244
+ has_non_extended = group[:candidates].any? {|c| !c[:extended] || extended}
245
+ has_non_regex = group[:candidates].any? {|c| c[:confidence] != "regex-match" || show_regex_match}
246
+ has_non_extended && has_non_regex
247
+ end
248
+ #Renumber after filtering
249
+ renumbered_groups= visible_groups.each_with_index.map { |group, index| group.merge(cluster_id: index + 1) }
250
+
251
+ root = {
252
+ text: "#{HEITT::Color.colorize("\n\n[", :bold, :blue)}#{HEITT::Color.colorize("CLUSTERED HASHES", :green)}#{HEITT::Color.colorize("]", :bold, :blue)}",
253
+ children: renumbered_groups.map do |group|
254
+ {
255
+ text: HEITT::Color.colorize("HASH CLUSTER #{group[:cluster_id]}", :magenta, :bold),
256
+ children: group[:hashes].map{|h| {text: h, children: []}}
257
+ }
258
+ end
259
+ }
260
+
261
+ result += render_tree([root])
262
+
263
+ renumbered_groups.each do |group|
264
+ result += "#{HEITT::Color.colorize("\n\n[", :bold, :blue)}#{HEITT::Color.colorize("HASH CLUSTER #{group[:cluster_id]}", :white, :bold)}#{HEITT::Color.colorize("]\n", :bold, :blue)}"#, children: []}
265
+ candidate_nodes = (group[:candidates]).each_with_index.map do |candidate, idx|
266
+ next if candidate.nil?
267
+ next if candidate[:name].nil?
268
+ next if candidate[:extended] && !extended
269
+ next if candidate[:confidence] == "regex-match" && !show_regex_match
270
+ confidence = candidate[:confidence] ? " — CONFIDENCE: #{candidate[:confidence].upcase}" : ""
271
+
272
+ children = [
273
+ {text: "Hashcat Mode: #{candidate[:hashcat] || "--"}", children: []},
274
+ {text: "John Format: #{candidate[:john] || "--"}", children: []}
275
+ ]
276
+
277
+ if verbose
278
+ if candidate[:description] && !candidate[:description].empty?
279
+ children << {text: "Description: #{candidate[:description]}", children: []}
280
+ end
281
+
282
+ if candidate[:notes] && !candidate[:notes].empty?
283
+ children << {text: "Notes:", children: candidate[:notes].map {|note| {text: note, children: []}}}
284
+ end
285
+ end
286
+ {
287
+ text: "#{HEITT::Color.colorize("[", :bold, :blue)}#{HEITT::Color.colorize("CANDIDATE #{idx + 1}: ", :bold, :cyan)}#{HEITT::Color.colorize("#{candidate[:name]}#{confidence}", :bold, :cyan)}#{HEITT::Color.colorize("]", :bold, :blue)}",
288
+ children: children
289
+ }
290
+ end.compact
291
+ result += render_tree(candidate_nodes, "", false, false) unless candidate_nodes.nil? || candidate_nodes.empty?
292
+ end
293
+ result
294
+ end
295
+
296
+ def self.json(groups, extended: false, show_regex_match: false)
297
+ visible_groups = groups.select do |group|
298
+ has_non_extended = group[:candidates].any? {|c| c[:extended] || extended}
299
+ has_non_regex = group[:candidates].any? {|c| c[:confidence] != "regex-match" || show_regex_match}
300
+ has_non_extended && has_non_regex
301
+ end
302
+ #Renumber after filtering
303
+ renumbered_groups = visible_groups.each_with_index.map { |group, index| group.merge(cluster_id: index+1)}
304
+
305
+ JSON.pretty_generate(
306
+ renumbered_groups.map do |group|
307
+ visible_candidates = group[:candidates].select do |c|
308
+ (!c[:extended] || extended) && (c[:confidence] != "regex-match" || show_regex_match)
309
+ end
310
+ {
311
+ cluster_id: group[:cluster_id],
312
+ count: group[:count],
313
+ hashes: group[:hashes],
314
+ candidates: visible_candidates.map do |candidate|
315
+ {
316
+ name: candidate[:name],
317
+ hashcat: candidate[:hashcat],
318
+ john: candidate[:john],
319
+ confidence: candidate[:confidence],
320
+ description: candidate[:description]
321
+ }
322
+ end
323
+ }
324
+ end
325
+ )
326
+ end
327
+
328
+ private
329
+ def self.render_tree(items, prefix = "", parent_is_last=true, is_root=true)
330
+ result = ""
331
+
332
+ items.each_with_index do |node, i|
333
+ is_last_item = (i == items.length - 1)
334
+
335
+ line = if is_root
336
+ "#{node[:text]}\n"
337
+ else
338
+ "#{HEITT::Color.colorize(prefix, :blue)}#{HEITT::Color.colorize((is_last_item ? '└── ' : '├── '), :blue)}#{node[:text]}\n"
339
+ end
340
+
341
+ child_prefix = if is_root
342
+ ""
343
+ else
344
+ "#{HEITT::Color.colorize(prefix, :bold, :blue)}#{HEITT::Color.colorize((is_last_item ? " " : "│ "), :bold, :blue)}"
345
+ end
346
+ result += line
347
+ result += render_tree(node[:children], child_prefix, is_last_item, false) if node[:children].any?
348
+
349
+ if is_last_item && !is_root and !node[:children].any?
350
+ result += "#{HEITT::Color.colorize(prefix, :bold, :blue)} \n"
351
+ end
352
+ end
353
+ result
354
+ end
355
+ end
356
+ end
357
+
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: heitt
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.4.1
5
+ platform: ruby
6
+ authors:
7
+ - Jonathan Botchway Owusu
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: colorize
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: 0.8.1
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: 0.8.1
26
+ description: Hash Extraction, Identification and Triage Tool.
27
+ email:
28
+ - jbotchwayowusu@gmail.com
29
+ executables:
30
+ - heitt
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - bin/heitt
35
+ - lib/heitt.rb
36
+ - lib/heitt/database.rb
37
+ - lib/heitt/version.rb
38
+ homepage: https://github.com/jobotow/heitt
39
+ licenses: []
40
+ metadata:
41
+ homepage_uri: https://github.com/jobotow/heitt
42
+ source_code_uri: https://github.com/jobotow/heitt
43
+ changelog_uri: https://github.com/jobotow/heitt/blob/main/CHANGELOG.md
44
+ rdoc_options: []
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: 3.1.0
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ requirements: []
58
+ rubygems_version: 3.6.7
59
+ specification_version: 4
60
+ summary: Hash Extraction, Identification and Triage Tool.
61
+ test_files: []