explodethosebits 0.3.0__cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etb/__init__.py +351 -0
- etb/__init__.pyi +976 -0
- etb/_etb.cpython-39-x86_64-linux-gnu.so +0 -0
- etb/_version.py +34 -0
- etb/py.typed +2 -0
- explodethosebits-0.3.0.dist-info/METADATA +405 -0
- explodethosebits-0.3.0.dist-info/RECORD +88 -0
- explodethosebits-0.3.0.dist-info/WHEEL +6 -0
- explodethosebits-0.3.0.dist-info/licenses/LICENSE +21 -0
- explodethosebits-0.3.0.dist-info/sboms/auditwheel.cdx.json +1 -0
- explodethosebits.libs/libcudart-c3a75b33.so.12.8.90 +0 -0
- include/etb/bit_coordinate.hpp +45 -0
- include/etb/bit_extraction.hpp +79 -0
- include/etb/bit_pruning.hpp +122 -0
- include/etb/config.hpp +284 -0
- include/etb/cuda/arch_optimizations.cuh +358 -0
- include/etb/cuda/blackwell_optimizations.cuh +300 -0
- include/etb/cuda/cuda_common.cuh +265 -0
- include/etb/cuda/etb_cuda.cuh +200 -0
- include/etb/cuda/gpu_memory.cuh +406 -0
- include/etb/cuda/heuristics_kernel.cuh +315 -0
- include/etb/cuda/path_generator_kernel.cuh +272 -0
- include/etb/cuda/prefix_pruner_kernel.cuh +370 -0
- include/etb/cuda/signature_kernel.cuh +328 -0
- include/etb/early_stopping.hpp +246 -0
- include/etb/etb.hpp +20 -0
- include/etb/heuristics.hpp +165 -0
- include/etb/memoization.hpp +285 -0
- include/etb/path.hpp +86 -0
- include/etb/path_count.hpp +87 -0
- include/etb/path_generator.hpp +175 -0
- include/etb/prefix_trie.hpp +339 -0
- include/etb/reporting.hpp +437 -0
- include/etb/scoring.hpp +269 -0
- include/etb/signature.hpp +190 -0
- include/gmock/gmock-actions.h +2297 -0
- include/gmock/gmock-cardinalities.h +159 -0
- include/gmock/gmock-function-mocker.h +518 -0
- include/gmock/gmock-matchers.h +5623 -0
- include/gmock/gmock-more-actions.h +658 -0
- include/gmock/gmock-more-matchers.h +120 -0
- include/gmock/gmock-nice-strict.h +277 -0
- include/gmock/gmock-spec-builders.h +2148 -0
- include/gmock/gmock.h +96 -0
- include/gmock/internal/custom/README.md +18 -0
- include/gmock/internal/custom/gmock-generated-actions.h +7 -0
- include/gmock/internal/custom/gmock-matchers.h +37 -0
- include/gmock/internal/custom/gmock-port.h +40 -0
- include/gmock/internal/gmock-internal-utils.h +487 -0
- include/gmock/internal/gmock-port.h +139 -0
- include/gmock/internal/gmock-pp.h +279 -0
- include/gtest/gtest-assertion-result.h +237 -0
- include/gtest/gtest-death-test.h +345 -0
- include/gtest/gtest-matchers.h +923 -0
- include/gtest/gtest-message.h +252 -0
- include/gtest/gtest-param-test.h +546 -0
- include/gtest/gtest-printers.h +1161 -0
- include/gtest/gtest-spi.h +250 -0
- include/gtest/gtest-test-part.h +192 -0
- include/gtest/gtest-typed-test.h +331 -0
- include/gtest/gtest.h +2321 -0
- include/gtest/gtest_pred_impl.h +279 -0
- include/gtest/gtest_prod.h +60 -0
- include/gtest/internal/custom/README.md +44 -0
- include/gtest/internal/custom/gtest-port.h +37 -0
- include/gtest/internal/custom/gtest-printers.h +42 -0
- include/gtest/internal/custom/gtest.h +37 -0
- include/gtest/internal/gtest-death-test-internal.h +307 -0
- include/gtest/internal/gtest-filepath.h +227 -0
- include/gtest/internal/gtest-internal.h +1560 -0
- include/gtest/internal/gtest-param-util.h +1026 -0
- include/gtest/internal/gtest-port-arch.h +122 -0
- include/gtest/internal/gtest-port.h +2481 -0
- include/gtest/internal/gtest-string.h +178 -0
- include/gtest/internal/gtest-type-util.h +220 -0
- lib/libetb_core.a +0 -0
- lib64/cmake/GTest/GTestConfig.cmake +33 -0
- lib64/cmake/GTest/GTestConfigVersion.cmake +43 -0
- lib64/cmake/GTest/GTestTargets-release.cmake +49 -0
- lib64/cmake/GTest/GTestTargets.cmake +139 -0
- lib64/libgmock.a +0 -0
- lib64/libgmock_main.a +0 -0
- lib64/libgtest.a +0 -0
- lib64/libgtest_main.a +0 -0
- lib64/pkgconfig/gmock.pc +10 -0
- lib64/pkgconfig/gmock_main.pc +10 -0
- lib64/pkgconfig/gtest.pc +9 -0
- lib64/pkgconfig/gtest_main.pc +10 -0
|
@@ -0,0 +1,437 @@
|
|
|
1
|
+
#ifndef ETB_REPORTING_HPP
|
|
2
|
+
#define ETB_REPORTING_HPP
|
|
3
|
+
|
|
4
|
+
#include "scoring.hpp"
|
|
5
|
+
#include "heuristics.hpp"
|
|
6
|
+
#include "signature.hpp"
|
|
7
|
+
#include "path.hpp"
|
|
8
|
+
#include "bit_pruning.hpp"
|
|
9
|
+
#include <cstdint>
|
|
10
|
+
#include <string>
|
|
11
|
+
#include <vector>
|
|
12
|
+
#include <chrono>
|
|
13
|
+
#include <optional>
|
|
14
|
+
|
|
15
|
+
namespace etb {
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Validation report for a successful extraction.
|
|
19
|
+
* Contains detailed validation information about the extracted data.
|
|
20
|
+
*/
|
|
21
|
+
struct ValidationReport {
|
|
22
|
+
bool signature_valid; // Signature validation passed
|
|
23
|
+
bool structure_valid; // Structural validation passed
|
|
24
|
+
bool heuristics_valid; // Heuristics within expected ranges
|
|
25
|
+
float overall_validity; // Overall validity score [0.0, 1.0]
|
|
26
|
+
std::string validation_notes; // Human-readable validation notes
|
|
27
|
+
|
|
28
|
+
ValidationReport()
|
|
29
|
+
: signature_valid(false)
|
|
30
|
+
, structure_valid(false)
|
|
31
|
+
, heuristics_valid(false)
|
|
32
|
+
, overall_validity(0.0f) {}
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Success result containing extracted data and metadata.
|
|
37
|
+
* Requirements: 12.1
|
|
38
|
+
*/
|
|
39
|
+
struct SuccessResult {
|
|
40
|
+
std::vector<uint8_t> extracted_bytes; // The extracted byte sequence
|
|
41
|
+
std::string detected_format; // Detected format name (e.g., "PNG", "JPEG")
|
|
42
|
+
std::string format_category; // Format category (e.g., "image", "archive")
|
|
43
|
+
float confidence; // Confidence score [0.0, 1.0]
|
|
44
|
+
Path reconstruction_path; // The path taken to reconstruct the data
|
|
45
|
+
ValidationReport validation; // Detailed validation report
|
|
46
|
+
HeuristicResult heuristics; // Heuristic analysis results
|
|
47
|
+
SignatureMatch signature_match; // Signature match details
|
|
48
|
+
|
|
49
|
+
SuccessResult() : confidence(0.0f) {}
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Partial match information for failed extractions.
|
|
54
|
+
*/
|
|
55
|
+
struct PartialMatch {
|
|
56
|
+
std::vector<uint8_t> partial_data; // Partial reconstructed data
|
|
57
|
+
std::string possible_format; // Possible format (if any signature prefix matched)
|
|
58
|
+
float partial_score; // Score achieved before failure
|
|
59
|
+
size_t depth_reached; // How deep the path went before stopping
|
|
60
|
+
std::string failure_reason; // Why this path was abandoned
|
|
61
|
+
|
|
62
|
+
PartialMatch() : partial_score(0.0f), depth_reached(0) {}
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Suggestion for parameter adjustment when extraction fails.
|
|
67
|
+
*/
|
|
68
|
+
struct ParameterSuggestion {
|
|
69
|
+
std::string parameter_name; // Name of the parameter to adjust
|
|
70
|
+
std::string current_value; // Current value as string
|
|
71
|
+
std::string suggested_value; // Suggested new value
|
|
72
|
+
std::string rationale; // Why this adjustment might help
|
|
73
|
+
|
|
74
|
+
ParameterSuggestion() = default;
|
|
75
|
+
ParameterSuggestion(const std::string& name, const std::string& current,
|
|
76
|
+
const std::string& suggested, const std::string& reason)
|
|
77
|
+
: parameter_name(name), current_value(current)
|
|
78
|
+
, suggested_value(suggested), rationale(reason) {}
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Failure result containing diagnostic information.
|
|
83
|
+
* Requirements: 12.2
|
|
84
|
+
*/
|
|
85
|
+
struct FailureResult {
|
|
86
|
+
uint64_t paths_explored; // Total paths explored before giving up
|
|
87
|
+
size_t effective_depth_reached; // Maximum depth reached
|
|
88
|
+
std::vector<PartialMatch> best_partials;// Best partial matches found
|
|
89
|
+
std::vector<ParameterSuggestion> suggestions; // Suggestions for parameter adjustment
|
|
90
|
+
std::string failure_summary; // Human-readable failure summary
|
|
91
|
+
|
|
92
|
+
FailureResult() : paths_explored(0), effective_depth_reached(0) {}
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Extraction metrics for reporting.
|
|
97
|
+
* Requirements: 12.3, 12.4
|
|
98
|
+
*/
|
|
99
|
+
struct ExtractionMetrics {
|
|
100
|
+
// Path statistics
|
|
101
|
+
uint64_t total_paths_possible; // Theoretical total paths
|
|
102
|
+
uint64_t paths_evaluated; // Actual paths evaluated
|
|
103
|
+
uint64_t paths_pruned_level1; // Paths pruned at Level 1 (2-4 bytes)
|
|
104
|
+
uint64_t paths_pruned_level2; // Paths pruned at Level 2 (8 bytes)
|
|
105
|
+
uint64_t paths_pruned_level3; // Paths pruned at Level 3 (16 bytes)
|
|
106
|
+
uint64_t paths_pruned_prefix; // Paths pruned by prefix trie
|
|
107
|
+
|
|
108
|
+
// Efficiency metrics
|
|
109
|
+
float effective_branching_factor; // Actual branching factor achieved
|
|
110
|
+
float effective_depth; // Average depth of evaluated paths
|
|
111
|
+
float cache_hit_rate; // Memoization cache hit rate [0.0, 1.0]
|
|
112
|
+
|
|
113
|
+
// Prune rates
|
|
114
|
+
float level1_prune_rate; // Percentage pruned at Level 1
|
|
115
|
+
float level2_prune_rate; // Percentage pruned at Level 2
|
|
116
|
+
float level3_prune_rate; // Percentage pruned at Level 3
|
|
117
|
+
float prefix_prune_rate; // Percentage pruned by prefix
|
|
118
|
+
|
|
119
|
+
// Format detection
|
|
120
|
+
std::vector<std::pair<std::string, uint32_t>> format_distribution; // Format -> count
|
|
121
|
+
|
|
122
|
+
// Timing
|
|
123
|
+
double wall_clock_seconds; // Total wall clock time
|
|
124
|
+
double average_time_per_path_us; // Average microseconds per path
|
|
125
|
+
float gpu_utilization; // GPU utilization percentage [0.0, 1.0]
|
|
126
|
+
|
|
127
|
+
// Complexity reduction
|
|
128
|
+
std::string complexity_reduction; // Human-readable complexity reduction
|
|
129
|
+
|
|
130
|
+
ExtractionMetrics()
|
|
131
|
+
: total_paths_possible(0)
|
|
132
|
+
, paths_evaluated(0)
|
|
133
|
+
, paths_pruned_level1(0)
|
|
134
|
+
, paths_pruned_level2(0)
|
|
135
|
+
, paths_pruned_level3(0)
|
|
136
|
+
, paths_pruned_prefix(0)
|
|
137
|
+
, effective_branching_factor(8.0f)
|
|
138
|
+
, effective_depth(0.0f)
|
|
139
|
+
, cache_hit_rate(0.0f)
|
|
140
|
+
, level1_prune_rate(0.0f)
|
|
141
|
+
, level2_prune_rate(0.0f)
|
|
142
|
+
, level3_prune_rate(0.0f)
|
|
143
|
+
, prefix_prune_rate(0.0f)
|
|
144
|
+
, wall_clock_seconds(0.0)
|
|
145
|
+
, average_time_per_path_us(0.0)
|
|
146
|
+
, gpu_utilization(0.0f) {}
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Complete extraction result combining success/failure with metrics.
|
|
151
|
+
*/
|
|
152
|
+
struct ExtractionResult {
|
|
153
|
+
bool success; // Whether extraction succeeded
|
|
154
|
+
std::vector<SuccessResult> candidates; // Successful candidates (if any)
|
|
155
|
+
std::optional<FailureResult> failure; // Failure details (if failed)
|
|
156
|
+
ExtractionMetrics metrics; // Extraction metrics
|
|
157
|
+
|
|
158
|
+
ExtractionResult() : success(false) {}
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Success Result Builder
|
|
164
|
+
* Formats extracted bytes, format, confidence, path, and validation report.
|
|
165
|
+
* Requirements: 12.1
|
|
166
|
+
*/
|
|
167
|
+
class SuccessResultBuilder {
|
|
168
|
+
public:
|
|
169
|
+
SuccessResultBuilder() = default;
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Set the extracted byte data.
|
|
173
|
+
*/
|
|
174
|
+
SuccessResultBuilder& set_data(const std::vector<uint8_t>& data);
|
|
175
|
+
SuccessResultBuilder& set_data(std::vector<uint8_t>&& data);
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Set the detected format information.
|
|
179
|
+
*/
|
|
180
|
+
SuccessResultBuilder& set_format(const std::string& format_name,
|
|
181
|
+
const std::string& category = "");
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Set the confidence score.
|
|
185
|
+
*/
|
|
186
|
+
SuccessResultBuilder& set_confidence(float confidence);
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Set the reconstruction path.
|
|
190
|
+
*/
|
|
191
|
+
SuccessResultBuilder& set_path(const Path& path);
|
|
192
|
+
SuccessResultBuilder& set_path(Path&& path);
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Set the heuristic analysis results.
|
|
196
|
+
*/
|
|
197
|
+
SuccessResultBuilder& set_heuristics(const HeuristicResult& heuristics);
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Set the signature match results.
|
|
201
|
+
*/
|
|
202
|
+
SuccessResultBuilder& set_signature_match(const SignatureMatch& match);
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Set structural validation results.
|
|
206
|
+
*/
|
|
207
|
+
SuccessResultBuilder& set_structural_validation(const StructuralValidation& structure);
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Build the validation report based on set values.
|
|
211
|
+
*/
|
|
212
|
+
SuccessResultBuilder& build_validation_report();
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Build and return the success result.
|
|
216
|
+
*/
|
|
217
|
+
SuccessResult build() const;
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Build a success result from a Candidate object.
|
|
221
|
+
*/
|
|
222
|
+
static SuccessResult from_candidate(const Candidate& candidate);
|
|
223
|
+
|
|
224
|
+
private:
|
|
225
|
+
SuccessResult result_;
|
|
226
|
+
StructuralValidation structure_;
|
|
227
|
+
bool has_structure_ = false;
|
|
228
|
+
|
|
229
|
+
void compute_validation();
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Failure Result Builder
|
|
234
|
+
* Includes paths explored, best partials, and suggestions.
|
|
235
|
+
* Requirements: 12.2
|
|
236
|
+
*/
|
|
237
|
+
class FailureResultBuilder {
|
|
238
|
+
public:
|
|
239
|
+
FailureResultBuilder() = default;
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Set the number of paths explored.
|
|
243
|
+
*/
|
|
244
|
+
FailureResultBuilder& set_paths_explored(uint64_t count);
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Set the effective depth reached.
|
|
248
|
+
*/
|
|
249
|
+
FailureResultBuilder& set_effective_depth(size_t depth);
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Add a partial match.
|
|
253
|
+
*/
|
|
254
|
+
FailureResultBuilder& add_partial_match(const PartialMatch& partial);
|
|
255
|
+
FailureResultBuilder& add_partial_match(PartialMatch&& partial);
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Add a partial match from a Candidate.
|
|
259
|
+
*/
|
|
260
|
+
FailureResultBuilder& add_partial_from_candidate(const Candidate& candidate,
|
|
261
|
+
const std::string& failure_reason);
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Add a parameter suggestion.
|
|
265
|
+
*/
|
|
266
|
+
FailureResultBuilder& add_suggestion(const ParameterSuggestion& suggestion);
|
|
267
|
+
FailureResultBuilder& add_suggestion(const std::string& param, const std::string& current,
|
|
268
|
+
const std::string& suggested, const std::string& rationale);
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Generate suggestions based on metrics.
|
|
272
|
+
*/
|
|
273
|
+
FailureResultBuilder& generate_suggestions(const ExtractionMetrics& metrics);
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Set the failure summary.
|
|
277
|
+
*/
|
|
278
|
+
FailureResultBuilder& set_summary(const std::string& summary);
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* Auto-generate failure summary based on set values.
|
|
282
|
+
*/
|
|
283
|
+
FailureResultBuilder& generate_summary();
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* Build and return the failure result.
|
|
287
|
+
*/
|
|
288
|
+
FailureResult build() const;
|
|
289
|
+
|
|
290
|
+
private:
|
|
291
|
+
FailureResult result_;
|
|
292
|
+
};
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Metrics Reporter
|
|
296
|
+
* Calculates and reports all extraction metrics.
|
|
297
|
+
* Requirements: 12.3, 12.4
|
|
298
|
+
*/
|
|
299
|
+
class MetricsReporter {
|
|
300
|
+
public:
|
|
301
|
+
MetricsReporter() = default;
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* Set path statistics.
|
|
305
|
+
*/
|
|
306
|
+
MetricsReporter& set_total_paths_possible(uint64_t count);
|
|
307
|
+
MetricsReporter& set_paths_evaluated(uint64_t count);
|
|
308
|
+
MetricsReporter& set_paths_pruned_level1(uint64_t count);
|
|
309
|
+
MetricsReporter& set_paths_pruned_level2(uint64_t count);
|
|
310
|
+
MetricsReporter& set_paths_pruned_level3(uint64_t count);
|
|
311
|
+
MetricsReporter& set_paths_pruned_prefix(uint64_t count);
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Set efficiency metrics.
|
|
315
|
+
*/
|
|
316
|
+
MetricsReporter& set_effective_branching_factor(float factor);
|
|
317
|
+
MetricsReporter& set_effective_depth(float depth);
|
|
318
|
+
MetricsReporter& set_cache_hit_rate(float rate);
|
|
319
|
+
|
|
320
|
+
/**
|
|
321
|
+
* Add format detection result.
|
|
322
|
+
*/
|
|
323
|
+
MetricsReporter& add_format_detection(const std::string& format, uint32_t count = 1);
|
|
324
|
+
|
|
325
|
+
/**
|
|
326
|
+
* Set timing information.
|
|
327
|
+
*/
|
|
328
|
+
MetricsReporter& set_wall_clock_time(double seconds);
|
|
329
|
+
MetricsReporter& set_gpu_utilization(float utilization);
|
|
330
|
+
|
|
331
|
+
/**
|
|
332
|
+
* Calculate derived metrics (prune rates, average time, complexity reduction).
|
|
333
|
+
*/
|
|
334
|
+
MetricsReporter& calculate_derived_metrics();
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* Generate complexity reduction string.
|
|
338
|
+
* Format: "Reduced from O(8^n) to O(k^d) where k=X.X, d=Y"
|
|
339
|
+
*/
|
|
340
|
+
MetricsReporter& generate_complexity_reduction(uint32_t input_length);
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* Build and return the metrics.
|
|
344
|
+
*/
|
|
345
|
+
ExtractionMetrics build() const;
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Get a human-readable report string.
|
|
349
|
+
* @param verbosity "minimal", "standard", or "full"
|
|
350
|
+
*/
|
|
351
|
+
std::string to_string(const std::string& verbosity = "full") const;
|
|
352
|
+
|
|
353
|
+
private:
|
|
354
|
+
ExtractionMetrics metrics_;
|
|
355
|
+
};
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* Complete Extraction Result Builder
|
|
359
|
+
* Combines success/failure results with metrics.
|
|
360
|
+
*/
|
|
361
|
+
class ExtractionResultBuilder {
|
|
362
|
+
public:
|
|
363
|
+
ExtractionResultBuilder() = default;
|
|
364
|
+
|
|
365
|
+
/**
|
|
366
|
+
* Mark as successful extraction.
|
|
367
|
+
*/
|
|
368
|
+
ExtractionResultBuilder& set_success(bool success);
|
|
369
|
+
|
|
370
|
+
/**
|
|
371
|
+
* Add a successful candidate.
|
|
372
|
+
*/
|
|
373
|
+
ExtractionResultBuilder& add_candidate(const SuccessResult& result);
|
|
374
|
+
ExtractionResultBuilder& add_candidate(SuccessResult&& result);
|
|
375
|
+
|
|
376
|
+
/**
|
|
377
|
+
* Add candidates from a vector of Candidate objects.
|
|
378
|
+
*/
|
|
379
|
+
ExtractionResultBuilder& add_candidates(const std::vector<Candidate>& candidates);
|
|
380
|
+
|
|
381
|
+
/**
|
|
382
|
+
* Set failure information.
|
|
383
|
+
*/
|
|
384
|
+
ExtractionResultBuilder& set_failure(const FailureResult& failure);
|
|
385
|
+
ExtractionResultBuilder& set_failure(FailureResult&& failure);
|
|
386
|
+
|
|
387
|
+
/**
|
|
388
|
+
* Set extraction metrics.
|
|
389
|
+
*/
|
|
390
|
+
ExtractionResultBuilder& set_metrics(const ExtractionMetrics& metrics);
|
|
391
|
+
ExtractionResultBuilder& set_metrics(ExtractionMetrics&& metrics);
|
|
392
|
+
|
|
393
|
+
/**
|
|
394
|
+
* Build and return the complete result.
|
|
395
|
+
*/
|
|
396
|
+
ExtractionResult build() const;
|
|
397
|
+
|
|
398
|
+
private:
|
|
399
|
+
ExtractionResult result_;
|
|
400
|
+
};
|
|
401
|
+
|
|
402
|
+
// Utility functions
|
|
403
|
+
|
|
404
|
+
/**
|
|
405
|
+
* Format a path as a human-readable string.
|
|
406
|
+
* @param path The path to format
|
|
407
|
+
* @param max_coords Maximum coordinates to show (0 = all)
|
|
408
|
+
* @return Formatted string like "[(0,3), (1,5), (2,1), ...]"
|
|
409
|
+
*/
|
|
410
|
+
std::string format_path(const Path& path, size_t max_coords = 10);
|
|
411
|
+
|
|
412
|
+
/**
|
|
413
|
+
* Format bytes as a hex string.
|
|
414
|
+
* @param data The bytes to format
|
|
415
|
+
* @param max_bytes Maximum bytes to show (0 = all)
|
|
416
|
+
* @return Formatted hex string like "89 50 4E 47 ..."
|
|
417
|
+
*/
|
|
418
|
+
std::string format_bytes_hex(const std::vector<uint8_t>& data, size_t max_bytes = 32);
|
|
419
|
+
|
|
420
|
+
/**
|
|
421
|
+
* Format a confidence score as a percentage string.
|
|
422
|
+
*/
|
|
423
|
+
std::string format_confidence(float confidence);
|
|
424
|
+
|
|
425
|
+
/**
|
|
426
|
+
* Format a duration in human-readable form.
|
|
427
|
+
*/
|
|
428
|
+
std::string format_duration(double seconds);
|
|
429
|
+
|
|
430
|
+
/**
|
|
431
|
+
* Format a large number with appropriate suffix (K, M, B).
|
|
432
|
+
*/
|
|
433
|
+
std::string format_count(uint64_t count);
|
|
434
|
+
|
|
435
|
+
} // namespace etb
|
|
436
|
+
|
|
437
|
+
#endif // ETB_REPORTING_HPP
|
include/etb/scoring.hpp
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
#ifndef ETB_SCORING_HPP
|
|
2
|
+
#define ETB_SCORING_HPP
|
|
3
|
+
|
|
4
|
+
#include "heuristics.hpp"
|
|
5
|
+
#include "signature.hpp"
|
|
6
|
+
#include "path.hpp"
|
|
7
|
+
#include <cstdint>
|
|
8
|
+
#include <vector>
|
|
9
|
+
#include <queue>
|
|
10
|
+
#include <functional>
|
|
11
|
+
|
|
12
|
+
namespace etb {
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Configurable weights for composite scoring.
|
|
16
|
+
* Default weights: signature (40%), heuristic (30%), length (15%), structure (15%)
|
|
17
|
+
*/
|
|
18
|
+
struct ScoringWeights {
|
|
19
|
+
float signature_weight; // Weight for signature match score
|
|
20
|
+
float heuristic_weight; // Weight for heuristic analysis score
|
|
21
|
+
float length_weight; // Weight for length/completeness score
|
|
22
|
+
float structure_weight; // Weight for structural validity score
|
|
23
|
+
|
|
24
|
+
ScoringWeights()
|
|
25
|
+
: signature_weight(0.40f)
|
|
26
|
+
, heuristic_weight(0.30f)
|
|
27
|
+
, length_weight(0.15f)
|
|
28
|
+
, structure_weight(0.15f) {}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Validate that weights sum to approximately 1.0
|
|
32
|
+
* @return true if weights are valid
|
|
33
|
+
*/
|
|
34
|
+
bool is_valid() const;
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Normalize weights to sum to 1.0
|
|
38
|
+
*/
|
|
39
|
+
void normalize();
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Structural validation result for a candidate.
|
|
44
|
+
*/
|
|
45
|
+
struct StructuralValidation {
|
|
46
|
+
float validity_score; // Overall structural validity [0.0, 1.0]
|
|
47
|
+
bool has_valid_length; // Length claims are coherent
|
|
48
|
+
bool has_valid_checksum; // Checksum validation passed (if applicable)
|
|
49
|
+
bool has_valid_pointers; // Internal pointers are coherent (if applicable)
|
|
50
|
+
|
|
51
|
+
StructuralValidation()
|
|
52
|
+
: validity_score(0.5f) // Default neutral score
|
|
53
|
+
, has_valid_length(true)
|
|
54
|
+
, has_valid_checksum(true)
|
|
55
|
+
, has_valid_pointers(true) {}
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* A candidate reconstruction with all associated metadata.
|
|
60
|
+
*/
|
|
61
|
+
struct Candidate {
|
|
62
|
+
Path path; // The path taken to reconstruct this candidate
|
|
63
|
+
std::vector<uint8_t> data; // Reconstructed byte sequence
|
|
64
|
+
uint16_t format_id; // Detected format ID (0 = unknown)
|
|
65
|
+
std::string format_name; // Detected format name
|
|
66
|
+
float confidence; // Overall confidence score [0.0, 1.0]
|
|
67
|
+
HeuristicResult heuristics; // Heuristic analysis results
|
|
68
|
+
SignatureMatch signature_match; // Signature match results
|
|
69
|
+
StructuralValidation structure; // Structural validation results
|
|
70
|
+
float composite_score; // Final weighted composite score
|
|
71
|
+
|
|
72
|
+
Candidate()
|
|
73
|
+
: format_id(0)
|
|
74
|
+
, confidence(0.0f)
|
|
75
|
+
, composite_score(0.0f) {}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Compare candidates by composite score (for priority queue).
|
|
79
|
+
*/
|
|
80
|
+
bool operator<(const Candidate& other) const {
|
|
81
|
+
return composite_score < other.composite_score;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
bool operator>(const Candidate& other) const {
|
|
85
|
+
return composite_score > other.composite_score;
|
|
86
|
+
}
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Composite score calculator.
|
|
91
|
+
* Calculates weighted scores from component scores.
|
|
92
|
+
*/
|
|
93
|
+
class ScoreCalculator {
|
|
94
|
+
public:
|
|
95
|
+
/**
|
|
96
|
+
* Construct with default weights.
|
|
97
|
+
*/
|
|
98
|
+
ScoreCalculator();
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Construct with custom weights.
|
|
102
|
+
* @param weights Custom scoring weights
|
|
103
|
+
*/
|
|
104
|
+
explicit ScoreCalculator(const ScoringWeights& weights);
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Set the scoring weights.
|
|
108
|
+
* @param weights New weights to use
|
|
109
|
+
*/
|
|
110
|
+
void set_weights(const ScoringWeights& weights);
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Get the current scoring weights.
|
|
114
|
+
*/
|
|
115
|
+
const ScoringWeights& get_weights() const { return weights_; }
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Calculate composite score from component scores.
|
|
119
|
+
* @param signature_score Signature match score [0.0, 1.0]
|
|
120
|
+
* @param heuristic_score Heuristic analysis score [0.0, 1.0]
|
|
121
|
+
* @param length_score Length/completeness score [0.0, 1.0]
|
|
122
|
+
* @param structure_score Structural validity score [0.0, 1.0]
|
|
123
|
+
* @return Weighted composite score [0.0, 1.0]
|
|
124
|
+
*/
|
|
125
|
+
float calculate(float signature_score, float heuristic_score,
|
|
126
|
+
float length_score, float structure_score) const;
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Calculate composite score from a SignatureMatch and HeuristicResult.
|
|
130
|
+
* @param sig_match Signature match result
|
|
131
|
+
* @param heuristics Heuristic analysis result
|
|
132
|
+
* @param data_length Length of reconstructed data
|
|
133
|
+
* @param expected_length Expected length (0 = unknown)
|
|
134
|
+
* @param structure Structural validation result
|
|
135
|
+
* @return Weighted composite score [0.0, 1.0]
|
|
136
|
+
*/
|
|
137
|
+
float calculate(const SignatureMatch& sig_match,
|
|
138
|
+
const HeuristicResult& heuristics,
|
|
139
|
+
size_t data_length,
|
|
140
|
+
size_t expected_length,
|
|
141
|
+
const StructuralValidation& structure) const;
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Calculate and populate a Candidate's composite score.
|
|
145
|
+
* @param candidate Candidate to score (modified in place)
|
|
146
|
+
* @param expected_length Expected length for length scoring (0 = unknown)
|
|
147
|
+
*/
|
|
148
|
+
void score_candidate(Candidate& candidate, size_t expected_length = 0) const;
|
|
149
|
+
|
|
150
|
+
private:
|
|
151
|
+
ScoringWeights weights_;
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Calculate length score based on actual vs expected length.
|
|
155
|
+
*/
|
|
156
|
+
static float calculate_length_score(size_t actual_length, size_t expected_length);
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Priority queue for tracking top-K candidates.
|
|
161
|
+
* Maintains a max-heap based on composite score.
|
|
162
|
+
*/
|
|
163
|
+
class CandidateQueue {
|
|
164
|
+
public:
|
|
165
|
+
/**
|
|
166
|
+
* Construct a candidate queue with specified capacity.
|
|
167
|
+
* @param capacity Maximum number of candidates to track (top-K)
|
|
168
|
+
*/
|
|
169
|
+
explicit CandidateQueue(size_t capacity = 10);
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Try to add a candidate to the queue.
|
|
173
|
+
* If queue is full and candidate scores lower than minimum, it's rejected.
|
|
174
|
+
* @param candidate Candidate to add
|
|
175
|
+
* @return true if candidate was added, false if rejected
|
|
176
|
+
*/
|
|
177
|
+
bool push(const Candidate& candidate);
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Try to add a candidate to the queue (move version).
|
|
181
|
+
* @param candidate Candidate to add (moved if accepted)
|
|
182
|
+
* @return true if candidate was added, false if rejected
|
|
183
|
+
*/
|
|
184
|
+
bool push(Candidate&& candidate);
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Get the top candidate (highest score).
|
|
188
|
+
* @return Reference to top candidate
|
|
189
|
+
* @throws std::runtime_error if queue is empty
|
|
190
|
+
*/
|
|
191
|
+
const Candidate& top() const;
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Remove and return the top candidate.
|
|
195
|
+
* @return Top candidate
|
|
196
|
+
* @throws std::runtime_error if queue is empty
|
|
197
|
+
*/
|
|
198
|
+
Candidate pop();
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Get all candidates sorted by score (descending).
|
|
202
|
+
* @return Vector of candidates sorted by composite score
|
|
203
|
+
*/
|
|
204
|
+
std::vector<Candidate> get_top_k() const;
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Get the minimum score currently in the queue.
|
|
208
|
+
* Used for fast rejection of low-scoring candidates.
|
|
209
|
+
* @return Minimum score, or 0.0 if queue is not full
|
|
210
|
+
*/
|
|
211
|
+
float min_score() const { return min_score_; }
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Check if a score would be accepted into the queue.
|
|
215
|
+
* @param score Score to check
|
|
216
|
+
* @return true if score would be accepted
|
|
217
|
+
*/
|
|
218
|
+
bool would_accept(float score) const;
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Get the number of candidates in the queue.
|
|
222
|
+
*/
|
|
223
|
+
size_t size() const { return heap_.size(); }
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Get the capacity of the queue.
|
|
227
|
+
*/
|
|
228
|
+
size_t capacity() const { return capacity_; }
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Check if the queue is empty.
|
|
232
|
+
*/
|
|
233
|
+
bool empty() const { return heap_.empty(); }
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Check if the queue is full.
|
|
237
|
+
*/
|
|
238
|
+
bool full() const { return heap_.size() >= capacity_; }
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Clear all candidates from the queue.
|
|
242
|
+
*/
|
|
243
|
+
void clear();
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* Set a new capacity for the queue.
|
|
247
|
+
* If new capacity is smaller, lowest-scoring candidates are removed.
|
|
248
|
+
* @param new_capacity New capacity
|
|
249
|
+
*/
|
|
250
|
+
void set_capacity(size_t new_capacity);
|
|
251
|
+
|
|
252
|
+
private:
|
|
253
|
+
size_t capacity_;
|
|
254
|
+
float min_score_;
|
|
255
|
+
|
|
256
|
+
// Min-heap to efficiently track the minimum score for rejection
|
|
257
|
+
// We use a min-heap so we can efficiently remove the lowest-scoring candidate
|
|
258
|
+
// when the queue is full and a better candidate arrives
|
|
259
|
+
std::vector<Candidate> heap_;
|
|
260
|
+
|
|
261
|
+
void heapify_up(size_t index);
|
|
262
|
+
void heapify_down(size_t index);
|
|
263
|
+
void update_min_score();
|
|
264
|
+
void rebuild_heap();
|
|
265
|
+
};
|
|
266
|
+
|
|
267
|
+
} // namespace etb
|
|
268
|
+
|
|
269
|
+
#endif // ETB_SCORING_HPP
|