explodethosebits 0.3.0__cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etb/__init__.py +351 -0
- etb/__init__.pyi +976 -0
- etb/_etb.cpython-39-x86_64-linux-gnu.so +0 -0
- etb/_version.py +34 -0
- etb/py.typed +2 -0
- explodethosebits-0.3.0.dist-info/METADATA +405 -0
- explodethosebits-0.3.0.dist-info/RECORD +88 -0
- explodethosebits-0.3.0.dist-info/WHEEL +6 -0
- explodethosebits-0.3.0.dist-info/licenses/LICENSE +21 -0
- explodethosebits-0.3.0.dist-info/sboms/auditwheel.cdx.json +1 -0
- explodethosebits.libs/libcudart-c3a75b33.so.12.8.90 +0 -0
- include/etb/bit_coordinate.hpp +45 -0
- include/etb/bit_extraction.hpp +79 -0
- include/etb/bit_pruning.hpp +122 -0
- include/etb/config.hpp +284 -0
- include/etb/cuda/arch_optimizations.cuh +358 -0
- include/etb/cuda/blackwell_optimizations.cuh +300 -0
- include/etb/cuda/cuda_common.cuh +265 -0
- include/etb/cuda/etb_cuda.cuh +200 -0
- include/etb/cuda/gpu_memory.cuh +406 -0
- include/etb/cuda/heuristics_kernel.cuh +315 -0
- include/etb/cuda/path_generator_kernel.cuh +272 -0
- include/etb/cuda/prefix_pruner_kernel.cuh +370 -0
- include/etb/cuda/signature_kernel.cuh +328 -0
- include/etb/early_stopping.hpp +246 -0
- include/etb/etb.hpp +20 -0
- include/etb/heuristics.hpp +165 -0
- include/etb/memoization.hpp +285 -0
- include/etb/path.hpp +86 -0
- include/etb/path_count.hpp +87 -0
- include/etb/path_generator.hpp +175 -0
- include/etb/prefix_trie.hpp +339 -0
- include/etb/reporting.hpp +437 -0
- include/etb/scoring.hpp +269 -0
- include/etb/signature.hpp +190 -0
- include/gmock/gmock-actions.h +2297 -0
- include/gmock/gmock-cardinalities.h +159 -0
- include/gmock/gmock-function-mocker.h +518 -0
- include/gmock/gmock-matchers.h +5623 -0
- include/gmock/gmock-more-actions.h +658 -0
- include/gmock/gmock-more-matchers.h +120 -0
- include/gmock/gmock-nice-strict.h +277 -0
- include/gmock/gmock-spec-builders.h +2148 -0
- include/gmock/gmock.h +96 -0
- include/gmock/internal/custom/README.md +18 -0
- include/gmock/internal/custom/gmock-generated-actions.h +7 -0
- include/gmock/internal/custom/gmock-matchers.h +37 -0
- include/gmock/internal/custom/gmock-port.h +40 -0
- include/gmock/internal/gmock-internal-utils.h +487 -0
- include/gmock/internal/gmock-port.h +139 -0
- include/gmock/internal/gmock-pp.h +279 -0
- include/gtest/gtest-assertion-result.h +237 -0
- include/gtest/gtest-death-test.h +345 -0
- include/gtest/gtest-matchers.h +923 -0
- include/gtest/gtest-message.h +252 -0
- include/gtest/gtest-param-test.h +546 -0
- include/gtest/gtest-printers.h +1161 -0
- include/gtest/gtest-spi.h +250 -0
- include/gtest/gtest-test-part.h +192 -0
- include/gtest/gtest-typed-test.h +331 -0
- include/gtest/gtest.h +2321 -0
- include/gtest/gtest_pred_impl.h +279 -0
- include/gtest/gtest_prod.h +60 -0
- include/gtest/internal/custom/README.md +44 -0
- include/gtest/internal/custom/gtest-port.h +37 -0
- include/gtest/internal/custom/gtest-printers.h +42 -0
- include/gtest/internal/custom/gtest.h +37 -0
- include/gtest/internal/gtest-death-test-internal.h +307 -0
- include/gtest/internal/gtest-filepath.h +227 -0
- include/gtest/internal/gtest-internal.h +1560 -0
- include/gtest/internal/gtest-param-util.h +1026 -0
- include/gtest/internal/gtest-port-arch.h +122 -0
- include/gtest/internal/gtest-port.h +2481 -0
- include/gtest/internal/gtest-string.h +178 -0
- include/gtest/internal/gtest-type-util.h +220 -0
- lib/libetb_core.a +0 -0
- lib64/cmake/GTest/GTestConfig.cmake +33 -0
- lib64/cmake/GTest/GTestConfigVersion.cmake +43 -0
- lib64/cmake/GTest/GTestTargets-release.cmake +49 -0
- lib64/cmake/GTest/GTestTargets.cmake +139 -0
- lib64/libgmock.a +0 -0
- lib64/libgmock_main.a +0 -0
- lib64/libgtest.a +0 -0
- lib64/libgtest_main.a +0 -0
- lib64/pkgconfig/gmock.pc +10 -0
- lib64/pkgconfig/gmock_main.pc +10 -0
- lib64/pkgconfig/gtest.pc +9 -0
- lib64/pkgconfig/gtest_main.pc +10 -0
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
#ifndef ETB_SIGNATURE_KERNEL_CUH
|
|
2
|
+
#define ETB_SIGNATURE_KERNEL_CUH
|
|
3
|
+
|
|
4
|
+
#include "cuda_common.cuh"
|
|
5
|
+
#include "gpu_memory.cuh"
|
|
6
|
+
|
|
7
|
+
namespace etb {
|
|
8
|
+
namespace cuda {
|
|
9
|
+
|
|
10
|
+
// Constant memory declarations (defined in gpu_memory.cu)
|
|
11
|
+
extern __constant__ DeviceFileSignature d_signatures[MAX_SIGNATURES];
|
|
12
|
+
extern __constant__ DeviceFooterSignature d_footers[MAX_SIGNATURES];
|
|
13
|
+
extern __constant__ uint32_t d_signature_count;
|
|
14
|
+
extern __constant__ uint32_t d_footer_count;
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Shared memory structure for signature matching.
|
|
18
|
+
*/
|
|
19
|
+
struct SignatureSharedMem {
|
|
20
|
+
// Best match found by this block
|
|
21
|
+
DeviceSignatureMatch best_match;
|
|
22
|
+
|
|
23
|
+
// Reduction scratch space
|
|
24
|
+
float match_scores[256];
|
|
25
|
+
uint32_t match_indices[256];
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Signature matcher CUDA kernel.
|
|
30
|
+
*
|
|
31
|
+
* Performs parallel sliding window matching using constant memory
|
|
32
|
+
* for signature broadcast.
|
|
33
|
+
*
|
|
34
|
+
* Requirements: 9.4
|
|
35
|
+
*
|
|
36
|
+
* @param data Input byte data
|
|
37
|
+
* @param length Length of data
|
|
38
|
+
* @param max_offset Maximum offset to search for headers
|
|
39
|
+
* @param result Output signature match result
|
|
40
|
+
*/
|
|
41
|
+
__global__ void signature_matcher_kernel(
|
|
42
|
+
const uint8_t* data,
|
|
43
|
+
uint32_t length,
|
|
44
|
+
uint32_t max_offset,
|
|
45
|
+
DeviceSignatureMatch* result
|
|
46
|
+
);
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Batch signature matching kernel.
|
|
50
|
+
* Each block handles one byte sequence.
|
|
51
|
+
*
|
|
52
|
+
* @param data_ptrs Array of pointers to byte sequences
|
|
53
|
+
* @param lengths Array of sequence lengths
|
|
54
|
+
* @param num_sequences Number of sequences
|
|
55
|
+
* @param max_offset Maximum offset to search
|
|
56
|
+
* @param results Output array of match results
|
|
57
|
+
*/
|
|
58
|
+
__global__ void batch_signature_matcher_kernel(
|
|
59
|
+
const uint8_t** data_ptrs,
|
|
60
|
+
const uint32_t* lengths,
|
|
61
|
+
uint32_t num_sequences,
|
|
62
|
+
uint32_t max_offset,
|
|
63
|
+
DeviceSignatureMatch* results
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Quick signature prefix check kernel.
|
|
68
|
+
* Checks only the first few bytes for fast rejection.
|
|
69
|
+
*
|
|
70
|
+
* @param data Input byte data
|
|
71
|
+
* @param length Length of data
|
|
72
|
+
* @param has_potential_match Output flag indicating if any signature could match
|
|
73
|
+
*/
|
|
74
|
+
__global__ void signature_prefix_check_kernel(
|
|
75
|
+
const uint8_t* data,
|
|
76
|
+
uint32_t length,
|
|
77
|
+
bool* has_potential_match
|
|
78
|
+
);
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Host-side launcher for signature matching kernels.
|
|
82
|
+
*/
|
|
83
|
+
class SignatureMatcherKernel {
|
|
84
|
+
public:
|
|
85
|
+
SignatureMatcherKernel();
|
|
86
|
+
~SignatureMatcherKernel();
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Configure the kernel for a specific device.
|
|
90
|
+
* @param device_id CUDA device ID
|
|
91
|
+
*/
|
|
92
|
+
void configure(int device_id);
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Match signatures against a byte sequence.
|
|
96
|
+
* @param data Device pointer to byte data
|
|
97
|
+
* @param length Length of data
|
|
98
|
+
* @param max_offset Maximum offset to search
|
|
99
|
+
* @param result Device pointer to result
|
|
100
|
+
* @param stream CUDA stream
|
|
101
|
+
*/
|
|
102
|
+
void match(const uint8_t* data, uint32_t length, uint32_t max_offset,
|
|
103
|
+
DeviceSignatureMatch* result, cudaStream_t stream = nullptr);
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Match signatures against multiple byte sequences.
|
|
107
|
+
* @param data_ptrs Device array of pointers
|
|
108
|
+
* @param lengths Device array of lengths
|
|
109
|
+
* @param num_sequences Number of sequences
|
|
110
|
+
* @param max_offset Maximum offset to search
|
|
111
|
+
* @param results Device array of results
|
|
112
|
+
* @param stream CUDA stream
|
|
113
|
+
*/
|
|
114
|
+
void match_batch(const uint8_t** data_ptrs, const uint32_t* lengths,
|
|
115
|
+
uint32_t num_sequences, uint32_t max_offset,
|
|
116
|
+
DeviceSignatureMatch* results, cudaStream_t stream = nullptr);
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Quick prefix check for early rejection.
|
|
120
|
+
* @param data Device pointer to byte data
|
|
121
|
+
* @param length Length of data
|
|
122
|
+
* @param has_potential Device pointer to result flag
|
|
123
|
+
* @param stream CUDA stream
|
|
124
|
+
*/
|
|
125
|
+
void prefix_check(const uint8_t* data, uint32_t length,
|
|
126
|
+
bool* has_potential, cudaStream_t stream = nullptr);
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Get the kernel configuration.
|
|
130
|
+
*/
|
|
131
|
+
const KernelConfig& get_config() const { return kernel_config_; }
|
|
132
|
+
|
|
133
|
+
private:
|
|
134
|
+
KernelConfig kernel_config_;
|
|
135
|
+
bool configured_;
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
// ============================================================================
|
|
139
|
+
// Device Functions
|
|
140
|
+
// ============================================================================
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Check if signature matches at a specific position.
|
|
144
|
+
* Uses constant memory for signature data.
|
|
145
|
+
*/
|
|
146
|
+
__device__ inline bool check_signature_at_position(
|
|
147
|
+
const uint8_t* data,
|
|
148
|
+
uint32_t data_length,
|
|
149
|
+
uint32_t position,
|
|
150
|
+
uint32_t sig_idx
|
|
151
|
+
) {
|
|
152
|
+
const DeviceFileSignature& sig = d_signatures[sig_idx];
|
|
153
|
+
|
|
154
|
+
// Check if signature fits at this position
|
|
155
|
+
if (position + sig.offset + sig.length > data_length) {
|
|
156
|
+
return false;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Compare bytes with mask
|
|
160
|
+
const uint8_t* check_pos = data + position + sig.offset;
|
|
161
|
+
for (uint8_t i = 0; i < sig.length; ++i) {
|
|
162
|
+
uint8_t masked_data = check_pos[i] & sig.mask[i];
|
|
163
|
+
uint8_t masked_sig = sig.magic_bytes[i] & sig.mask[i];
|
|
164
|
+
if (masked_data != masked_sig) {
|
|
165
|
+
return false;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return true;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Check if footer matches at end of data.
|
|
174
|
+
*/
|
|
175
|
+
__device__ inline bool check_footer_at_end(
|
|
176
|
+
const uint8_t* data,
|
|
177
|
+
uint32_t data_length,
|
|
178
|
+
uint32_t footer_idx
|
|
179
|
+
) {
|
|
180
|
+
const DeviceFooterSignature& footer = d_footers[footer_idx];
|
|
181
|
+
|
|
182
|
+
if (footer.length == 0 || footer.length > data_length) {
|
|
183
|
+
return false;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const uint8_t* footer_pos = data + data_length - footer.length;
|
|
187
|
+
for (uint8_t i = 0; i < footer.length; ++i) {
|
|
188
|
+
if (footer_pos[i] != footer.magic_bytes[i]) {
|
|
189
|
+
return false;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
return true;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Calculate match confidence based on match quality.
|
|
198
|
+
*/
|
|
199
|
+
__device__ inline float calculate_match_confidence(
|
|
200
|
+
const DeviceFileSignature& sig,
|
|
201
|
+
bool header_matched,
|
|
202
|
+
bool footer_matched,
|
|
203
|
+
bool footer_required
|
|
204
|
+
) {
|
|
205
|
+
float confidence = sig.base_confidence;
|
|
206
|
+
|
|
207
|
+
if (!header_matched) {
|
|
208
|
+
return 0.0f;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if (footer_matched) {
|
|
212
|
+
// Boost confidence for header + footer match
|
|
213
|
+
confidence = fminf(confidence + 0.1f, 1.0f);
|
|
214
|
+
} else if (footer_required) {
|
|
215
|
+
// Reduce confidence if footer was required but not found
|
|
216
|
+
confidence *= 0.7f;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
return confidence;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Warp-level reduction to find best match.
|
|
224
|
+
*/
|
|
225
|
+
__device__ inline void warp_reduce_best_match(
|
|
226
|
+
float& best_score,
|
|
227
|
+
uint32_t& best_idx,
|
|
228
|
+
uint32_t& best_offset,
|
|
229
|
+
bool& best_header,
|
|
230
|
+
bool& best_footer
|
|
231
|
+
) {
|
|
232
|
+
for (int offset = 16; offset > 0; offset /= 2) {
|
|
233
|
+
float other_score = __shfl_down_sync(0xFFFFFFFF, best_score, offset);
|
|
234
|
+
uint32_t other_idx = __shfl_down_sync(0xFFFFFFFF, best_idx, offset);
|
|
235
|
+
uint32_t other_offset = __shfl_down_sync(0xFFFFFFFF, best_offset, offset);
|
|
236
|
+
bool other_header = __shfl_down_sync(0xFFFFFFFF, best_header ? 1 : 0, offset);
|
|
237
|
+
bool other_footer = __shfl_down_sync(0xFFFFFFFF, best_footer ? 1 : 0, offset);
|
|
238
|
+
|
|
239
|
+
if (other_score > best_score) {
|
|
240
|
+
best_score = other_score;
|
|
241
|
+
best_idx = other_idx;
|
|
242
|
+
best_offset = other_offset;
|
|
243
|
+
best_header = other_header;
|
|
244
|
+
best_footer = other_footer;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Inline signature check for use within other kernels.
|
|
251
|
+
* Checks first 4 bytes against all signatures for quick rejection.
|
|
252
|
+
*/
|
|
253
|
+
__device__ inline bool quick_signature_check(const uint8_t* data, uint32_t length) {
|
|
254
|
+
if (length < 4) return false;
|
|
255
|
+
|
|
256
|
+
uint32_t sig_count = d_signature_count;
|
|
257
|
+
|
|
258
|
+
for (uint32_t i = 0; i < sig_count; ++i) {
|
|
259
|
+
const DeviceFileSignature& sig = d_signatures[i];
|
|
260
|
+
|
|
261
|
+
// Only check first 4 bytes for quick match
|
|
262
|
+
uint8_t check_len = sig.length < 4 ? sig.length : 4;
|
|
263
|
+
bool matches = true;
|
|
264
|
+
|
|
265
|
+
for (uint8_t j = 0; j < check_len && matches; ++j) {
|
|
266
|
+
uint8_t masked_data = data[j] & sig.mask[j];
|
|
267
|
+
uint8_t masked_sig = sig.magic_bytes[j] & sig.mask[j];
|
|
268
|
+
if (masked_data != masked_sig) {
|
|
269
|
+
matches = false;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
if (matches) return true;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
return false;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Full signature match for inline use.
|
|
281
|
+
* Returns the best matching signature index or -1 if no match.
|
|
282
|
+
*/
|
|
283
|
+
__device__ inline DeviceSignatureMatch inline_signature_match(
|
|
284
|
+
const uint8_t* data,
|
|
285
|
+
uint32_t length,
|
|
286
|
+
uint32_t max_offset
|
|
287
|
+
) {
|
|
288
|
+
DeviceSignatureMatch result;
|
|
289
|
+
result.matched = false;
|
|
290
|
+
result.confidence = 0.0f;
|
|
291
|
+
|
|
292
|
+
uint32_t sig_count = d_signature_count;
|
|
293
|
+
|
|
294
|
+
for (uint32_t offset = 0; offset <= max_offset && offset < length; ++offset) {
|
|
295
|
+
for (uint32_t i = 0; i < sig_count; ++i) {
|
|
296
|
+
if (check_signature_at_position(data, length, offset, i)) {
|
|
297
|
+
const DeviceFileSignature& sig = d_signatures[i];
|
|
298
|
+
|
|
299
|
+
// Check footer if available
|
|
300
|
+
bool footer_matched = false;
|
|
301
|
+
bool footer_required = false;
|
|
302
|
+
if (i < d_footer_count) {
|
|
303
|
+
footer_required = d_footers[i].required;
|
|
304
|
+
footer_matched = check_footer_at_end(data, length, i);
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
float confidence = calculate_match_confidence(
|
|
308
|
+
sig, true, footer_matched, footer_required);
|
|
309
|
+
|
|
310
|
+
if (confidence > result.confidence) {
|
|
311
|
+
result.matched = true;
|
|
312
|
+
result.format_id = sig.format_id;
|
|
313
|
+
result.confidence = confidence;
|
|
314
|
+
result.match_offset = offset;
|
|
315
|
+
result.header_matched = true;
|
|
316
|
+
result.footer_matched = footer_matched;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
return result;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
} // namespace cuda
|
|
326
|
+
} // namespace etb
|
|
327
|
+
|
|
328
|
+
#endif // ETB_SIGNATURE_KERNEL_CUH
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
#ifndef ETB_EARLY_STOPPING_HPP
|
|
2
|
+
#define ETB_EARLY_STOPPING_HPP
|
|
3
|
+
|
|
4
|
+
#include <cstdint>
|
|
5
|
+
#include <cstddef>
|
|
6
|
+
#include <vector>
|
|
7
|
+
#include "etb/heuristics.hpp"
|
|
8
|
+
#include "etb/signature.hpp"
|
|
9
|
+
|
|
10
|
+
namespace etb {
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Stop levels for multi-level early stopping.
|
|
14
|
+
* Each level represents a depth at which stopping decisions are made.
|
|
15
|
+
*/
|
|
16
|
+
enum class StopLevel : uint8_t {
|
|
17
|
+
LEVEL_1 = 4, // 2-4 bytes: signature prefix + basic heuristics
|
|
18
|
+
LEVEL_2 = 8, // 8 bytes: entropy bounds + checksum validation
|
|
19
|
+
LEVEL_3 = 16 // 16 bytes: structural coherence
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Result of an early stopping check.
|
|
24
|
+
*/
|
|
25
|
+
struct StopDecision {
|
|
26
|
+
bool should_stop; // Whether to stop exploring this path
|
|
27
|
+
StopLevel level; // Level at which decision was made
|
|
28
|
+
float score; // Heuristic score at decision point
|
|
29
|
+
const char* reason; // Human-readable reason for stopping
|
|
30
|
+
|
|
31
|
+
StopDecision()
|
|
32
|
+
: should_stop(false), level(StopLevel::LEVEL_1),
|
|
33
|
+
score(0.0f), reason(nullptr) {}
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Configuration for early stopping thresholds.
|
|
38
|
+
*/
|
|
39
|
+
struct EarlyStoppingConfig {
|
|
40
|
+
// Byte thresholds for each level
|
|
41
|
+
uint32_t level1_bytes; // Default: 4
|
|
42
|
+
uint32_t level2_bytes; // Default: 8
|
|
43
|
+
uint32_t level3_bytes; // Default: 16
|
|
44
|
+
|
|
45
|
+
// Entropy bounds
|
|
46
|
+
float min_entropy; // Below this = repeated pattern garbage (default: 0.1)
|
|
47
|
+
float max_entropy; // Above this = random/encrypted (default: 7.9)
|
|
48
|
+
|
|
49
|
+
// Heuristic thresholds
|
|
50
|
+
float level1_threshold; // Minimum score to continue at level 1 (default: 0.2)
|
|
51
|
+
float level2_threshold; // Minimum score to continue at level 2 (default: 0.3)
|
|
52
|
+
float level3_threshold; // Minimum score to continue at level 3 (default: 0.4)
|
|
53
|
+
|
|
54
|
+
// Adaptive threshold settings
|
|
55
|
+
bool adaptive_thresholds; // Enable adaptive threshold adjustment
|
|
56
|
+
float adaptive_tighten; // Threshold when global best > 0.8 (default: 0.6)
|
|
57
|
+
float adaptive_relax; // Threshold when global best < 0.3 (default: 0.2)
|
|
58
|
+
float adaptive_default; // Default threshold (default: 0.4)
|
|
59
|
+
|
|
60
|
+
EarlyStoppingConfig()
|
|
61
|
+
: level1_bytes(4)
|
|
62
|
+
, level2_bytes(8)
|
|
63
|
+
, level3_bytes(16)
|
|
64
|
+
, min_entropy(0.1f)
|
|
65
|
+
, max_entropy(7.9f)
|
|
66
|
+
, level1_threshold(0.2f)
|
|
67
|
+
, level2_threshold(0.3f)
|
|
68
|
+
, level3_threshold(0.4f)
|
|
69
|
+
, adaptive_thresholds(true)
|
|
70
|
+
, adaptive_tighten(0.6f)
|
|
71
|
+
, adaptive_relax(0.2f)
|
|
72
|
+
, adaptive_default(0.4f) {}
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Adaptive threshold manager for early stopping.
|
|
78
|
+
* Tracks global best score and adjusts thresholds based on running statistics.
|
|
79
|
+
*/
|
|
80
|
+
class AdaptiveThresholdManager {
|
|
81
|
+
public:
|
|
82
|
+
AdaptiveThresholdManager();
|
|
83
|
+
explicit AdaptiveThresholdManager(const EarlyStoppingConfig& config);
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Update the global best score seen so far.
|
|
87
|
+
* @param score New score to consider
|
|
88
|
+
*/
|
|
89
|
+
void update_best_score(float score);
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Get the current global best score.
|
|
93
|
+
*/
|
|
94
|
+
float get_best_score() const { return global_best_score_; }
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Get the current adaptive threshold based on global best score.
|
|
98
|
+
*/
|
|
99
|
+
float get_adaptive_threshold() const;
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Reset the manager to initial state.
|
|
103
|
+
*/
|
|
104
|
+
void reset();
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Get statistics about threshold adjustments.
|
|
108
|
+
*/
|
|
109
|
+
uint64_t get_update_count() const { return update_count_; }
|
|
110
|
+
|
|
111
|
+
private:
|
|
112
|
+
EarlyStoppingConfig config_;
|
|
113
|
+
float global_best_score_;
|
|
114
|
+
uint64_t update_count_;
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Early Stopping Controller - CPU Reference Implementation
|
|
119
|
+
*
|
|
120
|
+
* Implements multi-level early stopping to reduce search space from O(8^n) to O(8^d)
|
|
121
|
+
* where d << n. Uses signature matching, heuristics, and adaptive thresholds.
|
|
122
|
+
*/
|
|
123
|
+
class EarlyStoppingController {
|
|
124
|
+
public:
|
|
125
|
+
/**
|
|
126
|
+
* Construct with default configuration.
|
|
127
|
+
*/
|
|
128
|
+
EarlyStoppingController();
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Construct with custom configuration.
|
|
132
|
+
* @param config Early stopping configuration
|
|
133
|
+
*/
|
|
134
|
+
explicit EarlyStoppingController(const EarlyStoppingConfig& config);
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Construct with configuration and signature dictionary.
|
|
138
|
+
* @param config Early stopping configuration
|
|
139
|
+
* @param dictionary Signature dictionary for prefix matching
|
|
140
|
+
*/
|
|
141
|
+
EarlyStoppingController(const EarlyStoppingConfig& config,
|
|
142
|
+
const SignatureDictionary* dictionary);
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Set the signature dictionary for prefix matching.
|
|
146
|
+
* @param dictionary Pointer to signature dictionary (can be nullptr)
|
|
147
|
+
*/
|
|
148
|
+
void set_signature_dictionary(const SignatureDictionary* dictionary);
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Set the heuristics engine for scoring.
|
|
152
|
+
* @param engine Pointer to heuristics engine (can be nullptr for default)
|
|
153
|
+
*/
|
|
154
|
+
void set_heuristics_engine(const HeuristicsEngine* engine);
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Check if a path should be stopped at the current depth.
|
|
158
|
+
* @param data Reconstructed byte sequence
|
|
159
|
+
* @param length Length of the byte sequence
|
|
160
|
+
* @return StopDecision indicating whether to stop and why
|
|
161
|
+
*/
|
|
162
|
+
StopDecision should_stop(const uint8_t* data, size_t length) const;
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Check if a path should be stopped (vector overload).
|
|
166
|
+
* @param data Reconstructed byte sequence
|
|
167
|
+
* @return StopDecision indicating whether to stop and why
|
|
168
|
+
*/
|
|
169
|
+
StopDecision should_stop(const std::vector<uint8_t>& data) const;
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Check if data consists entirely of repeated bytes.
|
|
173
|
+
* @param data Byte sequence to check
|
|
174
|
+
* @param length Length of the sequence
|
|
175
|
+
* @return true if all bytes are the same value
|
|
176
|
+
*/
|
|
177
|
+
static bool is_repeated_byte_pattern(const uint8_t* data, size_t length);
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Check if data consists entirely of null bytes.
|
|
181
|
+
* @param data Byte sequence to check
|
|
182
|
+
* @param length Length of the sequence
|
|
183
|
+
* @return true if all bytes are 0x00
|
|
184
|
+
*/
|
|
185
|
+
static bool is_all_null(const uint8_t* data, size_t length);
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Update the adaptive threshold manager with a new score.
|
|
189
|
+
* @param score Score to update with
|
|
190
|
+
*/
|
|
191
|
+
void update_best_score(float score);
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Get the current adaptive threshold.
|
|
195
|
+
*/
|
|
196
|
+
float get_adaptive_threshold() const;
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Get the configuration.
|
|
200
|
+
*/
|
|
201
|
+
const EarlyStoppingConfig& get_config() const { return config_; }
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Get statistics about stopping decisions.
|
|
205
|
+
*/
|
|
206
|
+
struct Statistics {
|
|
207
|
+
uint64_t total_checks;
|
|
208
|
+
uint64_t stopped_level1;
|
|
209
|
+
uint64_t stopped_level2;
|
|
210
|
+
uint64_t stopped_level3;
|
|
211
|
+
uint64_t stopped_repeated;
|
|
212
|
+
uint64_t continued;
|
|
213
|
+
|
|
214
|
+
Statistics() : total_checks(0), stopped_level1(0), stopped_level2(0),
|
|
215
|
+
stopped_level3(0), stopped_repeated(0), continued(0) {}
|
|
216
|
+
};
|
|
217
|
+
|
|
218
|
+
const Statistics& get_statistics() const { return stats_; }
|
|
219
|
+
void reset_statistics();
|
|
220
|
+
|
|
221
|
+
private:
|
|
222
|
+
EarlyStoppingConfig config_;
|
|
223
|
+
const SignatureDictionary* dictionary_;
|
|
224
|
+
const HeuristicsEngine* heuristics_engine_;
|
|
225
|
+
HeuristicsEngine default_heuristics_;
|
|
226
|
+
AdaptiveThresholdManager threshold_manager_;
|
|
227
|
+
mutable Statistics stats_;
|
|
228
|
+
|
|
229
|
+
// Level-specific checks
|
|
230
|
+
StopDecision check_level1(const uint8_t* data, size_t length,
|
|
231
|
+
const HeuristicResult& heuristics) const;
|
|
232
|
+
StopDecision check_level2(const uint8_t* data, size_t length,
|
|
233
|
+
const HeuristicResult& heuristics) const;
|
|
234
|
+
StopDecision check_level3(const uint8_t* data, size_t length,
|
|
235
|
+
const HeuristicResult& heuristics) const;
|
|
236
|
+
|
|
237
|
+
// Check for signature prefix match
|
|
238
|
+
bool has_signature_prefix_match(const uint8_t* data, size_t length) const;
|
|
239
|
+
|
|
240
|
+
// Get the effective threshold for a given level
|
|
241
|
+
float get_threshold_for_level(StopLevel level) const;
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
} // namespace etb
|
|
245
|
+
|
|
246
|
+
#endif // ETB_EARLY_STOPPING_HPP
|
include/etb/etb.hpp
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
#ifndef ETB_HPP
|
|
2
|
+
#define ETB_HPP
|
|
3
|
+
|
|
4
|
+
// Main include header for ExplodeThoseBits library
|
|
5
|
+
#include "bit_coordinate.hpp"
|
|
6
|
+
#include "path.hpp"
|
|
7
|
+
#include "bit_extraction.hpp"
|
|
8
|
+
#include "path_generator.hpp"
|
|
9
|
+
#include "path_count.hpp"
|
|
10
|
+
#include "signature.hpp"
|
|
11
|
+
#include "heuristics.hpp"
|
|
12
|
+
#include "early_stopping.hpp"
|
|
13
|
+
#include "prefix_trie.hpp"
|
|
14
|
+
#include "memoization.hpp"
|
|
15
|
+
#include "bit_pruning.hpp"
|
|
16
|
+
#include "scoring.hpp"
|
|
17
|
+
#include "config.hpp"
|
|
18
|
+
#include "reporting.hpp"
|
|
19
|
+
|
|
20
|
+
#endif // ETB_HPP
|