explodethosebits 0.3.0__cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etb/__init__.py +351 -0
- etb/__init__.pyi +976 -0
- etb/_etb.cpython-39-x86_64-linux-gnu.so +0 -0
- etb/_version.py +34 -0
- etb/py.typed +2 -0
- explodethosebits-0.3.0.dist-info/METADATA +405 -0
- explodethosebits-0.3.0.dist-info/RECORD +88 -0
- explodethosebits-0.3.0.dist-info/WHEEL +6 -0
- explodethosebits-0.3.0.dist-info/licenses/LICENSE +21 -0
- explodethosebits-0.3.0.dist-info/sboms/auditwheel.cdx.json +1 -0
- explodethosebits.libs/libcudart-c3a75b33.so.12.8.90 +0 -0
- include/etb/bit_coordinate.hpp +45 -0
- include/etb/bit_extraction.hpp +79 -0
- include/etb/bit_pruning.hpp +122 -0
- include/etb/config.hpp +284 -0
- include/etb/cuda/arch_optimizations.cuh +358 -0
- include/etb/cuda/blackwell_optimizations.cuh +300 -0
- include/etb/cuda/cuda_common.cuh +265 -0
- include/etb/cuda/etb_cuda.cuh +200 -0
- include/etb/cuda/gpu_memory.cuh +406 -0
- include/etb/cuda/heuristics_kernel.cuh +315 -0
- include/etb/cuda/path_generator_kernel.cuh +272 -0
- include/etb/cuda/prefix_pruner_kernel.cuh +370 -0
- include/etb/cuda/signature_kernel.cuh +328 -0
- include/etb/early_stopping.hpp +246 -0
- include/etb/etb.hpp +20 -0
- include/etb/heuristics.hpp +165 -0
- include/etb/memoization.hpp +285 -0
- include/etb/path.hpp +86 -0
- include/etb/path_count.hpp +87 -0
- include/etb/path_generator.hpp +175 -0
- include/etb/prefix_trie.hpp +339 -0
- include/etb/reporting.hpp +437 -0
- include/etb/scoring.hpp +269 -0
- include/etb/signature.hpp +190 -0
- include/gmock/gmock-actions.h +2297 -0
- include/gmock/gmock-cardinalities.h +159 -0
- include/gmock/gmock-function-mocker.h +518 -0
- include/gmock/gmock-matchers.h +5623 -0
- include/gmock/gmock-more-actions.h +658 -0
- include/gmock/gmock-more-matchers.h +120 -0
- include/gmock/gmock-nice-strict.h +277 -0
- include/gmock/gmock-spec-builders.h +2148 -0
- include/gmock/gmock.h +96 -0
- include/gmock/internal/custom/README.md +18 -0
- include/gmock/internal/custom/gmock-generated-actions.h +7 -0
- include/gmock/internal/custom/gmock-matchers.h +37 -0
- include/gmock/internal/custom/gmock-port.h +40 -0
- include/gmock/internal/gmock-internal-utils.h +487 -0
- include/gmock/internal/gmock-port.h +139 -0
- include/gmock/internal/gmock-pp.h +279 -0
- include/gtest/gtest-assertion-result.h +237 -0
- include/gtest/gtest-death-test.h +345 -0
- include/gtest/gtest-matchers.h +923 -0
- include/gtest/gtest-message.h +252 -0
- include/gtest/gtest-param-test.h +546 -0
- include/gtest/gtest-printers.h +1161 -0
- include/gtest/gtest-spi.h +250 -0
- include/gtest/gtest-test-part.h +192 -0
- include/gtest/gtest-typed-test.h +331 -0
- include/gtest/gtest.h +2321 -0
- include/gtest/gtest_pred_impl.h +279 -0
- include/gtest/gtest_prod.h +60 -0
- include/gtest/internal/custom/README.md +44 -0
- include/gtest/internal/custom/gtest-port.h +37 -0
- include/gtest/internal/custom/gtest-printers.h +42 -0
- include/gtest/internal/custom/gtest.h +37 -0
- include/gtest/internal/gtest-death-test-internal.h +307 -0
- include/gtest/internal/gtest-filepath.h +227 -0
- include/gtest/internal/gtest-internal.h +1560 -0
- include/gtest/internal/gtest-param-util.h +1026 -0
- include/gtest/internal/gtest-port-arch.h +122 -0
- include/gtest/internal/gtest-port.h +2481 -0
- include/gtest/internal/gtest-string.h +178 -0
- include/gtest/internal/gtest-type-util.h +220 -0
- lib/libetb_core.a +0 -0
- lib64/cmake/GTest/GTestConfig.cmake +33 -0
- lib64/cmake/GTest/GTestConfigVersion.cmake +43 -0
- lib64/cmake/GTest/GTestTargets-release.cmake +49 -0
- lib64/cmake/GTest/GTestTargets.cmake +139 -0
- lib64/libgmock.a +0 -0
- lib64/libgmock_main.a +0 -0
- lib64/libgtest.a +0 -0
- lib64/libgtest_main.a +0 -0
- lib64/pkgconfig/gmock.pc +10 -0
- lib64/pkgconfig/gmock_main.pc +10 -0
- lib64/pkgconfig/gtest.pc +9 -0
- lib64/pkgconfig/gtest_main.pc +10 -0
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
#ifndef ETB_HEURISTICS_HPP
|
|
2
|
+
#define ETB_HEURISTICS_HPP
|
|
3
|
+
|
|
4
|
+
#include <cstdint>
|
|
5
|
+
#include <cstddef>
|
|
6
|
+
#include <vector>
|
|
7
|
+
#include <array>
|
|
8
|
+
|
|
9
|
+
namespace etb {
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Result of heuristic analysis on a byte sequence.
|
|
13
|
+
* All scores are normalized to specific ranges as documented.
|
|
14
|
+
*/
|
|
15
|
+
struct HeuristicResult {
|
|
16
|
+
float entropy; // Shannon entropy [0.0, 8.0]
|
|
17
|
+
float printable_ratio; // Ratio of printable ASCII [0.0, 1.0]
|
|
18
|
+
float control_char_ratio; // Ratio of control characters [0.0, 1.0]
|
|
19
|
+
uint32_t max_null_run; // Longest consecutive null byte run
|
|
20
|
+
float utf8_validity; // UTF-8 sequence validity score [0.0, 1.0]
|
|
21
|
+
float composite_score; // Weighted combination [0.0, 1.0]
|
|
22
|
+
|
|
23
|
+
HeuristicResult()
|
|
24
|
+
: entropy(0.0f)
|
|
25
|
+
, printable_ratio(0.0f)
|
|
26
|
+
, control_char_ratio(0.0f)
|
|
27
|
+
, max_null_run(0)
|
|
28
|
+
, utf8_validity(0.0f)
|
|
29
|
+
, composite_score(0.0f) {}
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Configurable weights for composite heuristic scoring.
|
|
34
|
+
*/
|
|
35
|
+
struct HeuristicWeights {
|
|
36
|
+
float entropy_weight; // Weight for entropy score
|
|
37
|
+
float printable_weight; // Weight for printable ratio
|
|
38
|
+
float control_char_weight; // Weight for control char penalty
|
|
39
|
+
float null_run_weight; // Weight for null run penalty
|
|
40
|
+
float utf8_weight; // Weight for UTF-8 validity
|
|
41
|
+
|
|
42
|
+
HeuristicWeights()
|
|
43
|
+
: entropy_weight(0.25f)
|
|
44
|
+
, printable_weight(0.25f)
|
|
45
|
+
, control_char_weight(0.15f)
|
|
46
|
+
, null_run_weight(0.15f)
|
|
47
|
+
, utf8_weight(0.20f) {}
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Heuristics Engine - CPU Reference Implementation
|
|
52
|
+
*
|
|
53
|
+
* Provides functions for analyzing byte sequences to determine viability
|
|
54
|
+
* as valid data. Used for scoring partial reconstructions during path exploration.
|
|
55
|
+
*/
|
|
56
|
+
class HeuristicsEngine {
|
|
57
|
+
public:
|
|
58
|
+
/**
|
|
59
|
+
* Construct a heuristics engine with default weights.
|
|
60
|
+
*/
|
|
61
|
+
HeuristicsEngine();
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Construct a heuristics engine with custom weights.
|
|
65
|
+
* @param weights Custom scoring weights
|
|
66
|
+
*/
|
|
67
|
+
explicit HeuristicsEngine(const HeuristicWeights& weights);
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Set the scoring weights.
|
|
71
|
+
* @param weights New weights to use
|
|
72
|
+
*/
|
|
73
|
+
void set_weights(const HeuristicWeights& weights);
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Get the current scoring weights.
|
|
77
|
+
*/
|
|
78
|
+
const HeuristicWeights& get_weights() const { return weights_; }
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Perform full heuristic analysis on a byte sequence.
|
|
82
|
+
* @param data Pointer to byte data
|
|
83
|
+
* @param length Length of data
|
|
84
|
+
* @return Complete heuristic analysis result
|
|
85
|
+
*/
|
|
86
|
+
HeuristicResult analyze(const uint8_t* data, size_t length) const;
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Perform full heuristic analysis on a vector of bytes.
|
|
90
|
+
* @param data Byte vector to analyze
|
|
91
|
+
* @return Complete heuristic analysis result
|
|
92
|
+
*/
|
|
93
|
+
HeuristicResult analyze(const std::vector<uint8_t>& data) const;
|
|
94
|
+
|
|
95
|
+
// Individual heuristic calculations
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Calculate Shannon entropy of a byte sequence.
|
|
99
|
+
* @param data Pointer to byte data
|
|
100
|
+
* @param length Length of data
|
|
101
|
+
* @return Entropy value in range [0.0, 8.0]
|
|
102
|
+
*/
|
|
103
|
+
static float calculate_entropy(const uint8_t* data, size_t length);
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Calculate Shannon entropy of a byte vector.
|
|
107
|
+
* @param data Byte vector
|
|
108
|
+
* @return Entropy value in range [0.0, 8.0]
|
|
109
|
+
*/
|
|
110
|
+
static float calculate_entropy(const std::vector<uint8_t>& data);
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Calculate the ratio of printable ASCII characters.
|
|
114
|
+
* Printable ASCII: bytes in range [0x20, 0x7E]
|
|
115
|
+
* @param data Pointer to byte data
|
|
116
|
+
* @param length Length of data
|
|
117
|
+
* @return Ratio in range [0.0, 1.0]
|
|
118
|
+
*/
|
|
119
|
+
static float calculate_printable_ratio(const uint8_t* data, size_t length);
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Calculate the ratio of control characters.
|
|
123
|
+
* Control characters: bytes in range [0x00, 0x1F] excluding common whitespace (0x09, 0x0A, 0x0D)
|
|
124
|
+
* @param data Pointer to byte data
|
|
125
|
+
* @param length Length of data
|
|
126
|
+
* @return Ratio in range [0.0, 1.0]
|
|
127
|
+
*/
|
|
128
|
+
static float calculate_control_char_ratio(const uint8_t* data, size_t length);
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Find the longest consecutive run of null bytes (0x00).
|
|
132
|
+
* @param data Pointer to byte data
|
|
133
|
+
* @param length Length of data
|
|
134
|
+
* @return Length of longest null byte run
|
|
135
|
+
*/
|
|
136
|
+
static uint32_t find_max_null_run(const uint8_t* data, size_t length);
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Validate UTF-8 sequences and return a validity score.
|
|
140
|
+
* @param data Pointer to byte data
|
|
141
|
+
* @param length Length of data
|
|
142
|
+
* @return Validity score in range [0.0, 1.0] where 1.0 = fully valid UTF-8
|
|
143
|
+
*/
|
|
144
|
+
static float validate_utf8(const uint8_t* data, size_t length);
|
|
145
|
+
|
|
146
|
+
private:
|
|
147
|
+
HeuristicWeights weights_;
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Build a byte frequency histogram.
|
|
151
|
+
* @param data Pointer to byte data
|
|
152
|
+
* @param length Length of data
|
|
153
|
+
* @return Array of 256 frequency counts
|
|
154
|
+
*/
|
|
155
|
+
static std::array<uint32_t, 256> build_histogram(const uint8_t* data, size_t length);
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Calculate composite score from individual heuristics.
|
|
159
|
+
*/
|
|
160
|
+
float calculate_composite_score(const HeuristicResult& result, size_t data_length) const;
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
} // namespace etb
|
|
164
|
+
|
|
165
|
+
#endif // ETB_HEURISTICS_HPP
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
#ifndef ETB_MEMOIZATION_HPP
|
|
2
|
+
#define ETB_MEMOIZATION_HPP
|
|
3
|
+
|
|
4
|
+
#include <cstdint>
|
|
5
|
+
#include <cstddef>
|
|
6
|
+
#include <vector>
|
|
7
|
+
#include <unordered_map>
|
|
8
|
+
#include <list>
|
|
9
|
+
#include <mutex>
|
|
10
|
+
#include <optional>
|
|
11
|
+
#include "heuristics.hpp"
|
|
12
|
+
|
|
13
|
+
namespace etb {
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Result stored in the prefix cache.
|
|
17
|
+
* Contains the evaluation result for a specific prefix.
|
|
18
|
+
*/
|
|
19
|
+
struct PrefixCacheEntry {
|
|
20
|
+
std::vector<uint8_t> prefix; // The prefix bytes
|
|
21
|
+
HeuristicResult heuristics; // Heuristic evaluation result
|
|
22
|
+
float score; // Composite score
|
|
23
|
+
bool should_prune; // Whether this prefix should be pruned
|
|
24
|
+
uint64_t access_count; // Number of times accessed
|
|
25
|
+
|
|
26
|
+
PrefixCacheEntry()
|
|
27
|
+
: score(0.0f)
|
|
28
|
+
, should_prune(false)
|
|
29
|
+
, access_count(0) {}
|
|
30
|
+
|
|
31
|
+
PrefixCacheEntry(const std::vector<uint8_t>& p, const HeuristicResult& h,
|
|
32
|
+
float s, bool prune)
|
|
33
|
+
: prefix(p)
|
|
34
|
+
, heuristics(h)
|
|
35
|
+
, score(s)
|
|
36
|
+
, should_prune(prune)
|
|
37
|
+
, access_count(1) {}
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Configuration for the memoization cache.
|
|
42
|
+
*/
|
|
43
|
+
struct MemoizationConfig {
|
|
44
|
+
size_t max_size_bytes; // Maximum cache size in bytes (default: 1GB)
|
|
45
|
+
size_t max_entries; // Maximum number of entries (default: 1M)
|
|
46
|
+
bool enabled; // Whether caching is enabled
|
|
47
|
+
|
|
48
|
+
MemoizationConfig()
|
|
49
|
+
: max_size_bytes(1024 * 1024 * 1024) // 1GB
|
|
50
|
+
, max_entries(1000000) // 1M entries
|
|
51
|
+
, enabled(true) {}
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Statistics for cache operations.
|
|
56
|
+
*/
|
|
57
|
+
struct MemoizationStats {
|
|
58
|
+
uint64_t hits; // Number of cache hits
|
|
59
|
+
uint64_t misses; // Number of cache misses
|
|
60
|
+
uint64_t insertions; // Number of insertions
|
|
61
|
+
uint64_t evictions; // Number of evictions
|
|
62
|
+
size_t current_entries; // Current number of entries
|
|
63
|
+
size_t current_size_bytes; // Current estimated size in bytes
|
|
64
|
+
|
|
65
|
+
MemoizationStats()
|
|
66
|
+
: hits(0)
|
|
67
|
+
, misses(0)
|
|
68
|
+
, insertions(0)
|
|
69
|
+
, evictions(0)
|
|
70
|
+
, current_entries(0)
|
|
71
|
+
, current_size_bytes(0) {}
|
|
72
|
+
|
|
73
|
+
void reset() {
|
|
74
|
+
hits = 0;
|
|
75
|
+
misses = 0;
|
|
76
|
+
insertions = 0;
|
|
77
|
+
evictions = 0;
|
|
78
|
+
// Don't reset current_entries and current_size_bytes
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Calculate cache hit rate.
|
|
83
|
+
* @return Hit rate in range [0.0, 1.0], or 0.0 if no accesses
|
|
84
|
+
*/
|
|
85
|
+
float hit_rate() const {
|
|
86
|
+
uint64_t total = hits + misses;
|
|
87
|
+
if (total == 0) return 0.0f;
|
|
88
|
+
return static_cast<float>(hits) / static_cast<float>(total);
|
|
89
|
+
}
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Prefix Result Cache with LRU Eviction
|
|
94
|
+
*
|
|
95
|
+
* Stores evaluated prefix results for O(1) lookup on repeated evaluations.
|
|
96
|
+
* Uses LRU (Least Recently Used) eviction policy when cache size exceeds limits.
|
|
97
|
+
*
|
|
98
|
+
* Requirements: 6.1, 6.2, 6.4, 6.5
|
|
99
|
+
*/
|
|
100
|
+
class PrefixCache {
|
|
101
|
+
public:
|
|
102
|
+
/**
|
|
103
|
+
* Construct with default configuration.
|
|
104
|
+
*/
|
|
105
|
+
PrefixCache();
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Construct with custom configuration.
|
|
109
|
+
* @param config Cache configuration
|
|
110
|
+
*/
|
|
111
|
+
explicit PrefixCache(const MemoizationConfig& config);
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Look up a prefix in the cache.
|
|
115
|
+
* Updates LRU order on hit.
|
|
116
|
+
* @param prefix Byte sequence representing the prefix
|
|
117
|
+
* @param length Length of the prefix
|
|
118
|
+
* @return Optional containing the cached entry if found
|
|
119
|
+
*/
|
|
120
|
+
std::optional<PrefixCacheEntry> lookup(const uint8_t* prefix, size_t length);
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Look up a prefix in the cache (vector overload).
|
|
124
|
+
* @param prefix Byte sequence representing the prefix
|
|
125
|
+
* @return Optional containing the cached entry if found
|
|
126
|
+
*/
|
|
127
|
+
std::optional<PrefixCacheEntry> lookup(const std::vector<uint8_t>& prefix);
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Insert or update a prefix result in the cache.
|
|
131
|
+
* May trigger eviction if cache is full.
|
|
132
|
+
* @param prefix Byte sequence representing the prefix
|
|
133
|
+
* @param length Length of the prefix
|
|
134
|
+
* @param heuristics Heuristic evaluation result
|
|
135
|
+
* @param score Composite score
|
|
136
|
+
* @param should_prune Whether this prefix should be pruned
|
|
137
|
+
* @return true if insertion succeeded
|
|
138
|
+
*/
|
|
139
|
+
bool insert(const uint8_t* prefix, size_t length,
|
|
140
|
+
const HeuristicResult& heuristics, float score, bool should_prune);
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Insert or update a prefix result in the cache (vector overload).
|
|
144
|
+
* @param prefix Byte sequence representing the prefix
|
|
145
|
+
* @param heuristics Heuristic evaluation result
|
|
146
|
+
* @param score Composite score
|
|
147
|
+
* @param should_prune Whether this prefix should be pruned
|
|
148
|
+
* @return true if insertion succeeded
|
|
149
|
+
*/
|
|
150
|
+
bool insert(const std::vector<uint8_t>& prefix,
|
|
151
|
+
const HeuristicResult& heuristics, float score, bool should_prune);
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Check if a prefix exists in the cache without updating LRU order.
|
|
155
|
+
* @param prefix Byte sequence representing the prefix
|
|
156
|
+
* @param length Length of the prefix
|
|
157
|
+
* @return true if prefix is cached
|
|
158
|
+
*/
|
|
159
|
+
bool contains(const uint8_t* prefix, size_t length) const;
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Check if a prefix exists in the cache (vector overload).
|
|
163
|
+
* @param prefix Byte sequence representing the prefix
|
|
164
|
+
* @return true if prefix is cached
|
|
165
|
+
*/
|
|
166
|
+
bool contains(const std::vector<uint8_t>& prefix) const;
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Remove a specific prefix from the cache.
|
|
170
|
+
* @param prefix Byte sequence representing the prefix
|
|
171
|
+
* @param length Length of the prefix
|
|
172
|
+
* @return true if prefix was removed
|
|
173
|
+
*/
|
|
174
|
+
bool remove(const uint8_t* prefix, size_t length);
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Remove a specific prefix from the cache (vector overload).
|
|
178
|
+
* @param prefix Byte sequence representing the prefix
|
|
179
|
+
* @return true if prefix was removed
|
|
180
|
+
*/
|
|
181
|
+
bool remove(const std::vector<uint8_t>& prefix);
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Clear all entries from the cache.
|
|
185
|
+
*/
|
|
186
|
+
void clear();
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Get the current number of entries in the cache.
|
|
190
|
+
*/
|
|
191
|
+
size_t size() const;
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Check if the cache is empty.
|
|
195
|
+
*/
|
|
196
|
+
bool empty() const;
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Get the estimated current size in bytes.
|
|
200
|
+
*/
|
|
201
|
+
size_t size_bytes() const;
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Get the configuration.
|
|
205
|
+
*/
|
|
206
|
+
const MemoizationConfig& get_config() const { return config_; }
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Get cache statistics.
|
|
210
|
+
*/
|
|
211
|
+
const MemoizationStats& get_statistics() const { return stats_; }
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Reset statistics (but keep cache contents).
|
|
215
|
+
*/
|
|
216
|
+
void reset_statistics();
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Enable or disable the cache.
|
|
220
|
+
* @param enabled Whether to enable caching
|
|
221
|
+
*/
|
|
222
|
+
void set_enabled(bool enabled);
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Check if caching is enabled.
|
|
226
|
+
*/
|
|
227
|
+
bool is_enabled() const { return config_.enabled; }
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Get the cache hit rate.
|
|
231
|
+
* @return Hit rate in range [0.0, 1.0]
|
|
232
|
+
*/
|
|
233
|
+
float hit_rate() const { return stats_.hit_rate(); }
|
|
234
|
+
|
|
235
|
+
private:
|
|
236
|
+
/**
|
|
237
|
+
* Hash function for prefix vectors.
|
|
238
|
+
*/
|
|
239
|
+
struct PrefixHasher {
|
|
240
|
+
size_t operator()(const std::vector<uint8_t>& prefix) const {
|
|
241
|
+
size_t hash = 0;
|
|
242
|
+
for (uint8_t byte : prefix) {
|
|
243
|
+
hash ^= std::hash<uint8_t>{}(byte) + 0x9e3779b9 + (hash << 6) + (hash >> 2);
|
|
244
|
+
}
|
|
245
|
+
return hash;
|
|
246
|
+
}
|
|
247
|
+
};
|
|
248
|
+
|
|
249
|
+
MemoizationConfig config_;
|
|
250
|
+
mutable MemoizationStats stats_;
|
|
251
|
+
mutable std::mutex mutex_;
|
|
252
|
+
|
|
253
|
+
// LRU list: front = most recently used, back = least recently used
|
|
254
|
+
std::list<std::vector<uint8_t>> lru_list_;
|
|
255
|
+
|
|
256
|
+
// Map from prefix to (entry, iterator into lru_list)
|
|
257
|
+
using LRUIterator = std::list<std::vector<uint8_t>>::iterator;
|
|
258
|
+
std::unordered_map<std::vector<uint8_t>,
|
|
259
|
+
std::pair<PrefixCacheEntry, LRUIterator>,
|
|
260
|
+
PrefixHasher> cache_;
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Estimate the size of an entry in bytes.
|
|
264
|
+
*/
|
|
265
|
+
static size_t estimate_entry_size(const PrefixCacheEntry& entry);
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* Evict least recently used entries until under limits.
|
|
269
|
+
*/
|
|
270
|
+
void evict_if_needed();
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Move a prefix to the front of the LRU list.
|
|
274
|
+
*/
|
|
275
|
+
void touch(const std::vector<uint8_t>& prefix);
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Convert raw pointer + length to vector for internal use.
|
|
279
|
+
*/
|
|
280
|
+
static std::vector<uint8_t> to_vector(const uint8_t* data, size_t length);
|
|
281
|
+
};
|
|
282
|
+
|
|
283
|
+
} // namespace etb
|
|
284
|
+
|
|
285
|
+
#endif // ETB_MEMOIZATION_HPP
|
include/etb/path.hpp
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
#ifndef ETB_PATH_HPP
|
|
2
|
+
#define ETB_PATH_HPP
|
|
3
|
+
|
|
4
|
+
#include "bit_coordinate.hpp"
|
|
5
|
+
#include <vector>
|
|
6
|
+
#include <cstdint>
|
|
7
|
+
#include <stdexcept>
|
|
8
|
+
|
|
9
|
+
namespace etb {
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Represents a forward-only traversal path through bit coordinates.
|
|
13
|
+
* Enforces the constraint that each subsequent coordinate must have
|
|
14
|
+
* a strictly greater byte index than the previous one.
|
|
15
|
+
*/
|
|
16
|
+
class Path {
|
|
17
|
+
public:
|
|
18
|
+
Path() = default;
|
|
19
|
+
explicit Path(size_t capacity) { coordinates_.reserve(capacity); }
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Add a coordinate to the path.
|
|
23
|
+
* @param coord The coordinate to add
|
|
24
|
+
* @return true if added successfully, false if forward-only constraint violated
|
|
25
|
+
*/
|
|
26
|
+
bool add(const BitCoordinate& coord);
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Validate that the path maintains forward-only traversal.
|
|
30
|
+
* @return true if all coordinates satisfy byte_index[i] < byte_index[i+1]
|
|
31
|
+
*/
|
|
32
|
+
bool is_valid() const;
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Get the number of coordinates in the path.
|
|
36
|
+
*/
|
|
37
|
+
size_t length() const { return coordinates_.size(); }
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Check if the path is empty.
|
|
41
|
+
*/
|
|
42
|
+
bool empty() const { return coordinates_.empty(); }
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Clear all coordinates from the path.
|
|
46
|
+
*/
|
|
47
|
+
void clear() { coordinates_.clear(); }
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Reserve capacity for coordinates.
|
|
51
|
+
*/
|
|
52
|
+
void reserve(size_t capacity) { coordinates_.reserve(capacity); }
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Get coordinate at index.
|
|
56
|
+
* @throws std::out_of_range if index is invalid
|
|
57
|
+
*/
|
|
58
|
+
const BitCoordinate& at(size_t index) const { return coordinates_.at(index); }
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Get coordinate at index (no bounds checking).
|
|
62
|
+
*/
|
|
63
|
+
const BitCoordinate& operator[](size_t index) const { return coordinates_[index]; }
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Get the last coordinate in the path.
|
|
67
|
+
* @throws std::out_of_range if path is empty
|
|
68
|
+
*/
|
|
69
|
+
const BitCoordinate& back() const;
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Get read-only access to the underlying coordinates.
|
|
73
|
+
*/
|
|
74
|
+
const std::vector<BitCoordinate>& coordinates() const { return coordinates_; }
|
|
75
|
+
|
|
76
|
+
// Iterator support
|
|
77
|
+
auto begin() const { return coordinates_.begin(); }
|
|
78
|
+
auto end() const { return coordinates_.end(); }
|
|
79
|
+
|
|
80
|
+
private:
|
|
81
|
+
std::vector<BitCoordinate> coordinates_;
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
} // namespace etb
|
|
85
|
+
|
|
86
|
+
#endif // ETB_PATH_HPP
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#ifndef ETB_PATH_COUNT_HPP
|
|
2
|
+
#define ETB_PATH_COUNT_HPP
|
|
3
|
+
|
|
4
|
+
#include <cstdint>
|
|
5
|
+
#include <optional>
|
|
6
|
+
|
|
7
|
+
namespace etb {
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Path count estimation utilities.
|
|
11
|
+
*
|
|
12
|
+
* For n bytes with 8 bits each, the total number of forward-only paths is:
|
|
13
|
+
* Sum over all path lengths k from 1 to n of: C(n,k) * 8^k
|
|
14
|
+
*
|
|
15
|
+
* This equals (1+8)^n - 1 = 9^n - 1 (by binomial theorem, minus empty path)
|
|
16
|
+
*
|
|
17
|
+
* With bit masking (m allowed bits per byte):
|
|
18
|
+
* Total paths = (1+m)^n - 1
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Result of path count estimation.
|
|
23
|
+
*/
|
|
24
|
+
struct PathCountResult {
|
|
25
|
+
uint64_t estimated_count; // Estimated total path count
|
|
26
|
+
bool is_exact; // True if count is exact (not overflow)
|
|
27
|
+
bool exceeds_threshold; // True if count exceeds the given threshold
|
|
28
|
+
double log_count; // log10 of the count (for large values)
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Calculate the exact number of paths for small inputs.
|
|
33
|
+
* Returns nullopt if the calculation would overflow uint64_t.
|
|
34
|
+
*
|
|
35
|
+
* @param input_length Number of bytes in input
|
|
36
|
+
* @param bits_per_byte Number of allowed bit positions per byte (default 8)
|
|
37
|
+
* @return Exact path count or nullopt on overflow
|
|
38
|
+
*/
|
|
39
|
+
std::optional<uint64_t> exact_path_count(uint32_t input_length, uint8_t bits_per_byte = 8);
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Estimate the path count with overflow detection.
|
|
43
|
+
*
|
|
44
|
+
* @param input_length Number of bytes in input
|
|
45
|
+
* @param bits_per_byte Number of allowed bit positions per byte (default 8)
|
|
46
|
+
* @param threshold Optional threshold for early bailout check
|
|
47
|
+
* @return PathCountResult with estimation details
|
|
48
|
+
*/
|
|
49
|
+
PathCountResult estimate_path_count(uint32_t input_length,
|
|
50
|
+
uint8_t bits_per_byte = 8,
|
|
51
|
+
uint64_t threshold = 0);
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Check if path count exceeds a threshold without computing exact count.
|
|
55
|
+
* Uses logarithmic comparison for efficiency with large values.
|
|
56
|
+
*
|
|
57
|
+
* @param input_length Number of bytes in input
|
|
58
|
+
* @param bits_per_byte Number of allowed bit positions per byte
|
|
59
|
+
* @param threshold The threshold to check against
|
|
60
|
+
* @return true if estimated count exceeds threshold
|
|
61
|
+
*/
|
|
62
|
+
bool path_count_exceeds_threshold(uint32_t input_length,
|
|
63
|
+
uint8_t bits_per_byte,
|
|
64
|
+
uint64_t threshold);
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Calculate the log10 of the path count.
|
|
68
|
+
* Useful for displaying very large counts.
|
|
69
|
+
*
|
|
70
|
+
* @param input_length Number of bytes in input
|
|
71
|
+
* @param bits_per_byte Number of allowed bit positions per byte
|
|
72
|
+
* @return log10 of the path count
|
|
73
|
+
*/
|
|
74
|
+
double log10_path_count(uint32_t input_length, uint8_t bits_per_byte = 8);
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Count the number of set bits in a byte (popcount).
|
|
78
|
+
* Used to determine bits_per_byte from a bit mask.
|
|
79
|
+
*
|
|
80
|
+
* @param mask The bit mask
|
|
81
|
+
* @return Number of set bits
|
|
82
|
+
*/
|
|
83
|
+
uint8_t count_bits_in_mask(uint8_t mask);
|
|
84
|
+
|
|
85
|
+
} // namespace etb
|
|
86
|
+
|
|
87
|
+
#endif // ETB_PATH_COUNT_HPP
|