explodethosebits 0.3.0__cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. etb/__init__.py +351 -0
  2. etb/__init__.pyi +976 -0
  3. etb/_etb.cpython-39-x86_64-linux-gnu.so +0 -0
  4. etb/_version.py +34 -0
  5. etb/py.typed +2 -0
  6. explodethosebits-0.3.0.dist-info/METADATA +405 -0
  7. explodethosebits-0.3.0.dist-info/RECORD +88 -0
  8. explodethosebits-0.3.0.dist-info/WHEEL +6 -0
  9. explodethosebits-0.3.0.dist-info/licenses/LICENSE +21 -0
  10. explodethosebits-0.3.0.dist-info/sboms/auditwheel.cdx.json +1 -0
  11. explodethosebits.libs/libcudart-c3a75b33.so.12.8.90 +0 -0
  12. include/etb/bit_coordinate.hpp +45 -0
  13. include/etb/bit_extraction.hpp +79 -0
  14. include/etb/bit_pruning.hpp +122 -0
  15. include/etb/config.hpp +284 -0
  16. include/etb/cuda/arch_optimizations.cuh +358 -0
  17. include/etb/cuda/blackwell_optimizations.cuh +300 -0
  18. include/etb/cuda/cuda_common.cuh +265 -0
  19. include/etb/cuda/etb_cuda.cuh +200 -0
  20. include/etb/cuda/gpu_memory.cuh +406 -0
  21. include/etb/cuda/heuristics_kernel.cuh +315 -0
  22. include/etb/cuda/path_generator_kernel.cuh +272 -0
  23. include/etb/cuda/prefix_pruner_kernel.cuh +370 -0
  24. include/etb/cuda/signature_kernel.cuh +328 -0
  25. include/etb/early_stopping.hpp +246 -0
  26. include/etb/etb.hpp +20 -0
  27. include/etb/heuristics.hpp +165 -0
  28. include/etb/memoization.hpp +285 -0
  29. include/etb/path.hpp +86 -0
  30. include/etb/path_count.hpp +87 -0
  31. include/etb/path_generator.hpp +175 -0
  32. include/etb/prefix_trie.hpp +339 -0
  33. include/etb/reporting.hpp +437 -0
  34. include/etb/scoring.hpp +269 -0
  35. include/etb/signature.hpp +190 -0
  36. include/gmock/gmock-actions.h +2297 -0
  37. include/gmock/gmock-cardinalities.h +159 -0
  38. include/gmock/gmock-function-mocker.h +518 -0
  39. include/gmock/gmock-matchers.h +5623 -0
  40. include/gmock/gmock-more-actions.h +658 -0
  41. include/gmock/gmock-more-matchers.h +120 -0
  42. include/gmock/gmock-nice-strict.h +277 -0
  43. include/gmock/gmock-spec-builders.h +2148 -0
  44. include/gmock/gmock.h +96 -0
  45. include/gmock/internal/custom/README.md +18 -0
  46. include/gmock/internal/custom/gmock-generated-actions.h +7 -0
  47. include/gmock/internal/custom/gmock-matchers.h +37 -0
  48. include/gmock/internal/custom/gmock-port.h +40 -0
  49. include/gmock/internal/gmock-internal-utils.h +487 -0
  50. include/gmock/internal/gmock-port.h +139 -0
  51. include/gmock/internal/gmock-pp.h +279 -0
  52. include/gtest/gtest-assertion-result.h +237 -0
  53. include/gtest/gtest-death-test.h +345 -0
  54. include/gtest/gtest-matchers.h +923 -0
  55. include/gtest/gtest-message.h +252 -0
  56. include/gtest/gtest-param-test.h +546 -0
  57. include/gtest/gtest-printers.h +1161 -0
  58. include/gtest/gtest-spi.h +250 -0
  59. include/gtest/gtest-test-part.h +192 -0
  60. include/gtest/gtest-typed-test.h +331 -0
  61. include/gtest/gtest.h +2321 -0
  62. include/gtest/gtest_pred_impl.h +279 -0
  63. include/gtest/gtest_prod.h +60 -0
  64. include/gtest/internal/custom/README.md +44 -0
  65. include/gtest/internal/custom/gtest-port.h +37 -0
  66. include/gtest/internal/custom/gtest-printers.h +42 -0
  67. include/gtest/internal/custom/gtest.h +37 -0
  68. include/gtest/internal/gtest-death-test-internal.h +307 -0
  69. include/gtest/internal/gtest-filepath.h +227 -0
  70. include/gtest/internal/gtest-internal.h +1560 -0
  71. include/gtest/internal/gtest-param-util.h +1026 -0
  72. include/gtest/internal/gtest-port-arch.h +122 -0
  73. include/gtest/internal/gtest-port.h +2481 -0
  74. include/gtest/internal/gtest-string.h +178 -0
  75. include/gtest/internal/gtest-type-util.h +220 -0
  76. lib/libetb_core.a +0 -0
  77. lib64/cmake/GTest/GTestConfig.cmake +33 -0
  78. lib64/cmake/GTest/GTestConfigVersion.cmake +43 -0
  79. lib64/cmake/GTest/GTestTargets-release.cmake +49 -0
  80. lib64/cmake/GTest/GTestTargets.cmake +139 -0
  81. lib64/libgmock.a +0 -0
  82. lib64/libgmock_main.a +0 -0
  83. lib64/libgtest.a +0 -0
  84. lib64/libgtest_main.a +0 -0
  85. lib64/pkgconfig/gmock.pc +10 -0
  86. lib64/pkgconfig/gmock_main.pc +10 -0
  87. lib64/pkgconfig/gtest.pc +9 -0
  88. lib64/pkgconfig/gtest_main.pc +10 -0
@@ -0,0 +1,437 @@
1
+ #ifndef ETB_REPORTING_HPP
2
+ #define ETB_REPORTING_HPP
3
+
4
+ #include "scoring.hpp"
5
+ #include "heuristics.hpp"
6
+ #include "signature.hpp"
7
+ #include "path.hpp"
8
+ #include "bit_pruning.hpp"
9
+ #include <cstdint>
10
+ #include <string>
11
+ #include <vector>
12
+ #include <chrono>
13
+ #include <optional>
14
+
15
+ namespace etb {
16
+
17
+ /**
18
+ * Validation report for a successful extraction.
19
+ * Contains detailed validation information about the extracted data.
20
+ */
21
+ struct ValidationReport {
22
+ bool signature_valid; // Signature validation passed
23
+ bool structure_valid; // Structural validation passed
24
+ bool heuristics_valid; // Heuristics within expected ranges
25
+ float overall_validity; // Overall validity score [0.0, 1.0]
26
+ std::string validation_notes; // Human-readable validation notes
27
+
28
+ ValidationReport()
29
+ : signature_valid(false)
30
+ , structure_valid(false)
31
+ , heuristics_valid(false)
32
+ , overall_validity(0.0f) {}
33
+ };
34
+
35
+ /**
36
+ * Success result containing extracted data and metadata.
37
+ * Requirements: 12.1
38
+ */
39
+ struct SuccessResult {
40
+ std::vector<uint8_t> extracted_bytes; // The extracted byte sequence
41
+ std::string detected_format; // Detected format name (e.g., "PNG", "JPEG")
42
+ std::string format_category; // Format category (e.g., "image", "archive")
43
+ float confidence; // Confidence score [0.0, 1.0]
44
+ Path reconstruction_path; // The path taken to reconstruct the data
45
+ ValidationReport validation; // Detailed validation report
46
+ HeuristicResult heuristics; // Heuristic analysis results
47
+ SignatureMatch signature_match; // Signature match details
48
+
49
+ SuccessResult() : confidence(0.0f) {}
50
+ };
51
+
52
+ /**
53
+ * Partial match information for failed extractions.
54
+ */
55
+ struct PartialMatch {
56
+ std::vector<uint8_t> partial_data; // Partial reconstructed data
57
+ std::string possible_format; // Possible format (if any signature prefix matched)
58
+ float partial_score; // Score achieved before failure
59
+ size_t depth_reached; // How deep the path went before stopping
60
+ std::string failure_reason; // Why this path was abandoned
61
+
62
+ PartialMatch() : partial_score(0.0f), depth_reached(0) {}
63
+ };
64
+
65
+ /**
66
+ * Suggestion for parameter adjustment when extraction fails.
67
+ */
68
+ struct ParameterSuggestion {
69
+ std::string parameter_name; // Name of the parameter to adjust
70
+ std::string current_value; // Current value as string
71
+ std::string suggested_value; // Suggested new value
72
+ std::string rationale; // Why this adjustment might help
73
+
74
+ ParameterSuggestion() = default;
75
+ ParameterSuggestion(const std::string& name, const std::string& current,
76
+ const std::string& suggested, const std::string& reason)
77
+ : parameter_name(name), current_value(current)
78
+ , suggested_value(suggested), rationale(reason) {}
79
+ };
80
+
81
+ /**
82
+ * Failure result containing diagnostic information.
83
+ * Requirements: 12.2
84
+ */
85
+ struct FailureResult {
86
+ uint64_t paths_explored; // Total paths explored before giving up
87
+ size_t effective_depth_reached; // Maximum depth reached
88
+ std::vector<PartialMatch> best_partials;// Best partial matches found
89
+ std::vector<ParameterSuggestion> suggestions; // Suggestions for parameter adjustment
90
+ std::string failure_summary; // Human-readable failure summary
91
+
92
+ FailureResult() : paths_explored(0), effective_depth_reached(0) {}
93
+ };
94
+
95
+ /**
96
+ * Extraction metrics for reporting.
97
+ * Requirements: 12.3, 12.4
98
+ */
99
+ struct ExtractionMetrics {
100
+ // Path statistics
101
+ uint64_t total_paths_possible; // Theoretical total paths
102
+ uint64_t paths_evaluated; // Actual paths evaluated
103
+ uint64_t paths_pruned_level1; // Paths pruned at Level 1 (2-4 bytes)
104
+ uint64_t paths_pruned_level2; // Paths pruned at Level 2 (8 bytes)
105
+ uint64_t paths_pruned_level3; // Paths pruned at Level 3 (16 bytes)
106
+ uint64_t paths_pruned_prefix; // Paths pruned by prefix trie
107
+
108
+ // Efficiency metrics
109
+ float effective_branching_factor; // Actual branching factor achieved
110
+ float effective_depth; // Average depth of evaluated paths
111
+ float cache_hit_rate; // Memoization cache hit rate [0.0, 1.0]
112
+
113
+ // Prune rates
114
+ float level1_prune_rate; // Percentage pruned at Level 1
115
+ float level2_prune_rate; // Percentage pruned at Level 2
116
+ float level3_prune_rate; // Percentage pruned at Level 3
117
+ float prefix_prune_rate; // Percentage pruned by prefix
118
+
119
+ // Format detection
120
+ std::vector<std::pair<std::string, uint32_t>> format_distribution; // Format -> count
121
+
122
+ // Timing
123
+ double wall_clock_seconds; // Total wall clock time
124
+ double average_time_per_path_us; // Average microseconds per path
125
+ float gpu_utilization; // GPU utilization percentage [0.0, 1.0]
126
+
127
+ // Complexity reduction
128
+ std::string complexity_reduction; // Human-readable complexity reduction
129
+
130
+ ExtractionMetrics()
131
+ : total_paths_possible(0)
132
+ , paths_evaluated(0)
133
+ , paths_pruned_level1(0)
134
+ , paths_pruned_level2(0)
135
+ , paths_pruned_level3(0)
136
+ , paths_pruned_prefix(0)
137
+ , effective_branching_factor(8.0f)
138
+ , effective_depth(0.0f)
139
+ , cache_hit_rate(0.0f)
140
+ , level1_prune_rate(0.0f)
141
+ , level2_prune_rate(0.0f)
142
+ , level3_prune_rate(0.0f)
143
+ , prefix_prune_rate(0.0f)
144
+ , wall_clock_seconds(0.0)
145
+ , average_time_per_path_us(0.0)
146
+ , gpu_utilization(0.0f) {}
147
+ };
148
+
149
+ /**
150
+ * Complete extraction result combining success/failure with metrics.
151
+ */
152
+ struct ExtractionResult {
153
+ bool success; // Whether extraction succeeded
154
+ std::vector<SuccessResult> candidates; // Successful candidates (if any)
155
+ std::optional<FailureResult> failure; // Failure details (if failed)
156
+ ExtractionMetrics metrics; // Extraction metrics
157
+
158
+ ExtractionResult() : success(false) {}
159
+ };
160
+
161
+
162
+ /**
163
+ * Success Result Builder
164
+ * Formats extracted bytes, format, confidence, path, and validation report.
165
+ * Requirements: 12.1
166
+ */
167
+ class SuccessResultBuilder {
168
+ public:
169
+ SuccessResultBuilder() = default;
170
+
171
+ /**
172
+ * Set the extracted byte data.
173
+ */
174
+ SuccessResultBuilder& set_data(const std::vector<uint8_t>& data);
175
+ SuccessResultBuilder& set_data(std::vector<uint8_t>&& data);
176
+
177
+ /**
178
+ * Set the detected format information.
179
+ */
180
+ SuccessResultBuilder& set_format(const std::string& format_name,
181
+ const std::string& category = "");
182
+
183
+ /**
184
+ * Set the confidence score.
185
+ */
186
+ SuccessResultBuilder& set_confidence(float confidence);
187
+
188
+ /**
189
+ * Set the reconstruction path.
190
+ */
191
+ SuccessResultBuilder& set_path(const Path& path);
192
+ SuccessResultBuilder& set_path(Path&& path);
193
+
194
+ /**
195
+ * Set the heuristic analysis results.
196
+ */
197
+ SuccessResultBuilder& set_heuristics(const HeuristicResult& heuristics);
198
+
199
+ /**
200
+ * Set the signature match results.
201
+ */
202
+ SuccessResultBuilder& set_signature_match(const SignatureMatch& match);
203
+
204
+ /**
205
+ * Set structural validation results.
206
+ */
207
+ SuccessResultBuilder& set_structural_validation(const StructuralValidation& structure);
208
+
209
+ /**
210
+ * Build the validation report based on set values.
211
+ */
212
+ SuccessResultBuilder& build_validation_report();
213
+
214
+ /**
215
+ * Build and return the success result.
216
+ */
217
+ SuccessResult build() const;
218
+
219
+ /**
220
+ * Build a success result from a Candidate object.
221
+ */
222
+ static SuccessResult from_candidate(const Candidate& candidate);
223
+
224
+ private:
225
+ SuccessResult result_;
226
+ StructuralValidation structure_;
227
+ bool has_structure_ = false;
228
+
229
+ void compute_validation();
230
+ };
231
+
232
+ /**
233
+ * Failure Result Builder
234
+ * Includes paths explored, best partials, and suggestions.
235
+ * Requirements: 12.2
236
+ */
237
+ class FailureResultBuilder {
238
+ public:
239
+ FailureResultBuilder() = default;
240
+
241
+ /**
242
+ * Set the number of paths explored.
243
+ */
244
+ FailureResultBuilder& set_paths_explored(uint64_t count);
245
+
246
+ /**
247
+ * Set the effective depth reached.
248
+ */
249
+ FailureResultBuilder& set_effective_depth(size_t depth);
250
+
251
+ /**
252
+ * Add a partial match.
253
+ */
254
+ FailureResultBuilder& add_partial_match(const PartialMatch& partial);
255
+ FailureResultBuilder& add_partial_match(PartialMatch&& partial);
256
+
257
+ /**
258
+ * Add a partial match from a Candidate.
259
+ */
260
+ FailureResultBuilder& add_partial_from_candidate(const Candidate& candidate,
261
+ const std::string& failure_reason);
262
+
263
+ /**
264
+ * Add a parameter suggestion.
265
+ */
266
+ FailureResultBuilder& add_suggestion(const ParameterSuggestion& suggestion);
267
+ FailureResultBuilder& add_suggestion(const std::string& param, const std::string& current,
268
+ const std::string& suggested, const std::string& rationale);
269
+
270
+ /**
271
+ * Generate suggestions based on metrics.
272
+ */
273
+ FailureResultBuilder& generate_suggestions(const ExtractionMetrics& metrics);
274
+
275
+ /**
276
+ * Set the failure summary.
277
+ */
278
+ FailureResultBuilder& set_summary(const std::string& summary);
279
+
280
+ /**
281
+ * Auto-generate failure summary based on set values.
282
+ */
283
+ FailureResultBuilder& generate_summary();
284
+
285
+ /**
286
+ * Build and return the failure result.
287
+ */
288
+ FailureResult build() const;
289
+
290
+ private:
291
+ FailureResult result_;
292
+ };
293
+
294
+ /**
295
+ * Metrics Reporter
296
+ * Calculates and reports all extraction metrics.
297
+ * Requirements: 12.3, 12.4
298
+ */
299
+ class MetricsReporter {
300
+ public:
301
+ MetricsReporter() = default;
302
+
303
+ /**
304
+ * Set path statistics.
305
+ */
306
+ MetricsReporter& set_total_paths_possible(uint64_t count);
307
+ MetricsReporter& set_paths_evaluated(uint64_t count);
308
+ MetricsReporter& set_paths_pruned_level1(uint64_t count);
309
+ MetricsReporter& set_paths_pruned_level2(uint64_t count);
310
+ MetricsReporter& set_paths_pruned_level3(uint64_t count);
311
+ MetricsReporter& set_paths_pruned_prefix(uint64_t count);
312
+
313
+ /**
314
+ * Set efficiency metrics.
315
+ */
316
+ MetricsReporter& set_effective_branching_factor(float factor);
317
+ MetricsReporter& set_effective_depth(float depth);
318
+ MetricsReporter& set_cache_hit_rate(float rate);
319
+
320
+ /**
321
+ * Add format detection result.
322
+ */
323
+ MetricsReporter& add_format_detection(const std::string& format, uint32_t count = 1);
324
+
325
+ /**
326
+ * Set timing information.
327
+ */
328
+ MetricsReporter& set_wall_clock_time(double seconds);
329
+ MetricsReporter& set_gpu_utilization(float utilization);
330
+
331
+ /**
332
+ * Calculate derived metrics (prune rates, average time, complexity reduction).
333
+ */
334
+ MetricsReporter& calculate_derived_metrics();
335
+
336
+ /**
337
+ * Generate complexity reduction string.
338
+ * Format: "Reduced from O(8^n) to O(k^d) where k=X.X, d=Y"
339
+ */
340
+ MetricsReporter& generate_complexity_reduction(uint32_t input_length);
341
+
342
+ /**
343
+ * Build and return the metrics.
344
+ */
345
+ ExtractionMetrics build() const;
346
+
347
+ /**
348
+ * Get a human-readable report string.
349
+ * @param verbosity "minimal", "standard", or "full"
350
+ */
351
+ std::string to_string(const std::string& verbosity = "full") const;
352
+
353
+ private:
354
+ ExtractionMetrics metrics_;
355
+ };
356
+
357
+ /**
358
+ * Complete Extraction Result Builder
359
+ * Combines success/failure results with metrics.
360
+ */
361
+ class ExtractionResultBuilder {
362
+ public:
363
+ ExtractionResultBuilder() = default;
364
+
365
+ /**
366
+ * Mark as successful extraction.
367
+ */
368
+ ExtractionResultBuilder& set_success(bool success);
369
+
370
+ /**
371
+ * Add a successful candidate.
372
+ */
373
+ ExtractionResultBuilder& add_candidate(const SuccessResult& result);
374
+ ExtractionResultBuilder& add_candidate(SuccessResult&& result);
375
+
376
+ /**
377
+ * Add candidates from a vector of Candidate objects.
378
+ */
379
+ ExtractionResultBuilder& add_candidates(const std::vector<Candidate>& candidates);
380
+
381
+ /**
382
+ * Set failure information.
383
+ */
384
+ ExtractionResultBuilder& set_failure(const FailureResult& failure);
385
+ ExtractionResultBuilder& set_failure(FailureResult&& failure);
386
+
387
+ /**
388
+ * Set extraction metrics.
389
+ */
390
+ ExtractionResultBuilder& set_metrics(const ExtractionMetrics& metrics);
391
+ ExtractionResultBuilder& set_metrics(ExtractionMetrics&& metrics);
392
+
393
+ /**
394
+ * Build and return the complete result.
395
+ */
396
+ ExtractionResult build() const;
397
+
398
+ private:
399
+ ExtractionResult result_;
400
+ };
401
+
402
+ // Utility functions
403
+
404
+ /**
405
+ * Format a path as a human-readable string.
406
+ * @param path The path to format
407
+ * @param max_coords Maximum coordinates to show (0 = all)
408
+ * @return Formatted string like "[(0,3), (1,5), (2,1), ...]"
409
+ */
410
+ std::string format_path(const Path& path, size_t max_coords = 10);
411
+
412
+ /**
413
+ * Format bytes as a hex string.
414
+ * @param data The bytes to format
415
+ * @param max_bytes Maximum bytes to show (0 = all)
416
+ * @return Formatted hex string like "89 50 4E 47 ..."
417
+ */
418
+ std::string format_bytes_hex(const std::vector<uint8_t>& data, size_t max_bytes = 32);
419
+
420
+ /**
421
+ * Format a confidence score as a percentage string.
422
+ */
423
+ std::string format_confidence(float confidence);
424
+
425
+ /**
426
+ * Format a duration in human-readable form.
427
+ */
428
+ std::string format_duration(double seconds);
429
+
430
+ /**
431
+ * Format a large number with appropriate suffix (K, M, B).
432
+ */
433
+ std::string format_count(uint64_t count);
434
+
435
+ } // namespace etb
436
+
437
+ #endif // ETB_REPORTING_HPP
@@ -0,0 +1,269 @@
1
+ #ifndef ETB_SCORING_HPP
2
+ #define ETB_SCORING_HPP
3
+
4
+ #include "heuristics.hpp"
5
+ #include "signature.hpp"
6
+ #include "path.hpp"
7
+ #include <cstdint>
8
+ #include <vector>
9
+ #include <queue>
10
+ #include <functional>
11
+
12
+ namespace etb {
13
+
14
+ /**
15
+ * Configurable weights for composite scoring.
16
+ * Default weights: signature (40%), heuristic (30%), length (15%), structure (15%)
17
+ */
18
+ struct ScoringWeights {
19
+ float signature_weight; // Weight for signature match score
20
+ float heuristic_weight; // Weight for heuristic analysis score
21
+ float length_weight; // Weight for length/completeness score
22
+ float structure_weight; // Weight for structural validity score
23
+
24
+ ScoringWeights()
25
+ : signature_weight(0.40f)
26
+ , heuristic_weight(0.30f)
27
+ , length_weight(0.15f)
28
+ , structure_weight(0.15f) {}
29
+
30
+ /**
31
+ * Validate that weights sum to approximately 1.0
32
+ * @return true if weights are valid
33
+ */
34
+ bool is_valid() const;
35
+
36
+ /**
37
+ * Normalize weights to sum to 1.0
38
+ */
39
+ void normalize();
40
+ };
41
+
42
+ /**
43
+ * Structural validation result for a candidate.
44
+ */
45
+ struct StructuralValidation {
46
+ float validity_score; // Overall structural validity [0.0, 1.0]
47
+ bool has_valid_length; // Length claims are coherent
48
+ bool has_valid_checksum; // Checksum validation passed (if applicable)
49
+ bool has_valid_pointers; // Internal pointers are coherent (if applicable)
50
+
51
+ StructuralValidation()
52
+ : validity_score(0.5f) // Default neutral score
53
+ , has_valid_length(true)
54
+ , has_valid_checksum(true)
55
+ , has_valid_pointers(true) {}
56
+ };
57
+
58
+ /**
59
+ * A candidate reconstruction with all associated metadata.
60
+ */
61
+ struct Candidate {
62
+ Path path; // The path taken to reconstruct this candidate
63
+ std::vector<uint8_t> data; // Reconstructed byte sequence
64
+ uint16_t format_id; // Detected format ID (0 = unknown)
65
+ std::string format_name; // Detected format name
66
+ float confidence; // Overall confidence score [0.0, 1.0]
67
+ HeuristicResult heuristics; // Heuristic analysis results
68
+ SignatureMatch signature_match; // Signature match results
69
+ StructuralValidation structure; // Structural validation results
70
+ float composite_score; // Final weighted composite score
71
+
72
+ Candidate()
73
+ : format_id(0)
74
+ , confidence(0.0f)
75
+ , composite_score(0.0f) {}
76
+
77
+ /**
78
+ * Compare candidates by composite score (for priority queue).
79
+ */
80
+ bool operator<(const Candidate& other) const {
81
+ return composite_score < other.composite_score;
82
+ }
83
+
84
+ bool operator>(const Candidate& other) const {
85
+ return composite_score > other.composite_score;
86
+ }
87
+ };
88
+
89
+ /**
90
+ * Composite score calculator.
91
+ * Calculates weighted scores from component scores.
92
+ */
93
+ class ScoreCalculator {
94
+ public:
95
+ /**
96
+ * Construct with default weights.
97
+ */
98
+ ScoreCalculator();
99
+
100
+ /**
101
+ * Construct with custom weights.
102
+ * @param weights Custom scoring weights
103
+ */
104
+ explicit ScoreCalculator(const ScoringWeights& weights);
105
+
106
+ /**
107
+ * Set the scoring weights.
108
+ * @param weights New weights to use
109
+ */
110
+ void set_weights(const ScoringWeights& weights);
111
+
112
+ /**
113
+ * Get the current scoring weights.
114
+ */
115
+ const ScoringWeights& get_weights() const { return weights_; }
116
+
117
+ /**
118
+ * Calculate composite score from component scores.
119
+ * @param signature_score Signature match score [0.0, 1.0]
120
+ * @param heuristic_score Heuristic analysis score [0.0, 1.0]
121
+ * @param length_score Length/completeness score [0.0, 1.0]
122
+ * @param structure_score Structural validity score [0.0, 1.0]
123
+ * @return Weighted composite score [0.0, 1.0]
124
+ */
125
+ float calculate(float signature_score, float heuristic_score,
126
+ float length_score, float structure_score) const;
127
+
128
+ /**
129
+ * Calculate composite score from a SignatureMatch and HeuristicResult.
130
+ * @param sig_match Signature match result
131
+ * @param heuristics Heuristic analysis result
132
+ * @param data_length Length of reconstructed data
133
+ * @param expected_length Expected length (0 = unknown)
134
+ * @param structure Structural validation result
135
+ * @return Weighted composite score [0.0, 1.0]
136
+ */
137
+ float calculate(const SignatureMatch& sig_match,
138
+ const HeuristicResult& heuristics,
139
+ size_t data_length,
140
+ size_t expected_length,
141
+ const StructuralValidation& structure) const;
142
+
143
+ /**
144
+ * Calculate and populate a Candidate's composite score.
145
+ * @param candidate Candidate to score (modified in place)
146
+ * @param expected_length Expected length for length scoring (0 = unknown)
147
+ */
148
+ void score_candidate(Candidate& candidate, size_t expected_length = 0) const;
149
+
150
+ private:
151
+ ScoringWeights weights_;
152
+
153
+ /**
154
+ * Calculate length score based on actual vs expected length.
155
+ */
156
+ static float calculate_length_score(size_t actual_length, size_t expected_length);
157
+ };
158
+
159
+ /**
160
+ * Priority queue for tracking top-K candidates.
161
+ * Maintains a max-heap based on composite score.
162
+ */
163
+ class CandidateQueue {
164
+ public:
165
+ /**
166
+ * Construct a candidate queue with specified capacity.
167
+ * @param capacity Maximum number of candidates to track (top-K)
168
+ */
169
+ explicit CandidateQueue(size_t capacity = 10);
170
+
171
+ /**
172
+ * Try to add a candidate to the queue.
173
+ * If queue is full and candidate scores lower than minimum, it's rejected.
174
+ * @param candidate Candidate to add
175
+ * @return true if candidate was added, false if rejected
176
+ */
177
+ bool push(const Candidate& candidate);
178
+
179
+ /**
180
+ * Try to add a candidate to the queue (move version).
181
+ * @param candidate Candidate to add (moved if accepted)
182
+ * @return true if candidate was added, false if rejected
183
+ */
184
+ bool push(Candidate&& candidate);
185
+
186
+ /**
187
+ * Get the top candidate (highest score).
188
+ * @return Reference to top candidate
189
+ * @throws std::runtime_error if queue is empty
190
+ */
191
+ const Candidate& top() const;
192
+
193
+ /**
194
+ * Remove and return the top candidate.
195
+ * @return Top candidate
196
+ * @throws std::runtime_error if queue is empty
197
+ */
198
+ Candidate pop();
199
+
200
+ /**
201
+ * Get all candidates sorted by score (descending).
202
+ * @return Vector of candidates sorted by composite score
203
+ */
204
+ std::vector<Candidate> get_top_k() const;
205
+
206
+ /**
207
+ * Get the minimum score currently in the queue.
208
+ * Used for fast rejection of low-scoring candidates.
209
+ * @return Minimum score, or 0.0 if queue is not full
210
+ */
211
+ float min_score() const { return min_score_; }
212
+
213
+ /**
214
+ * Check if a score would be accepted into the queue.
215
+ * @param score Score to check
216
+ * @return true if score would be accepted
217
+ */
218
+ bool would_accept(float score) const;
219
+
220
+ /**
221
+ * Get the number of candidates in the queue.
222
+ */
223
+ size_t size() const { return heap_.size(); }
224
+
225
+ /**
226
+ * Get the capacity of the queue.
227
+ */
228
+ size_t capacity() const { return capacity_; }
229
+
230
+ /**
231
+ * Check if the queue is empty.
232
+ */
233
+ bool empty() const { return heap_.empty(); }
234
+
235
+ /**
236
+ * Check if the queue is full.
237
+ */
238
+ bool full() const { return heap_.size() >= capacity_; }
239
+
240
+ /**
241
+ * Clear all candidates from the queue.
242
+ */
243
+ void clear();
244
+
245
+ /**
246
+ * Set a new capacity for the queue.
247
+ * If new capacity is smaller, lowest-scoring candidates are removed.
248
+ * @param new_capacity New capacity
249
+ */
250
+ void set_capacity(size_t new_capacity);
251
+
252
+ private:
253
+ size_t capacity_;
254
+ float min_score_;
255
+
256
+ // Min-heap to efficiently track the minimum score for rejection
257
+ // We use a min-heap so we can efficiently remove the lowest-scoring candidate
258
+ // when the queue is full and a better candidate arrives
259
+ std::vector<Candidate> heap_;
260
+
261
+ void heapify_up(size_t index);
262
+ void heapify_down(size_t index);
263
+ void update_min_score();
264
+ void rebuild_heap();
265
+ };
266
+
267
+ } // namespace etb
268
+
269
+ #endif // ETB_SCORING_HPP