explodethosebits 0.3.0__cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. etb/__init__.py +351 -0
  2. etb/__init__.pyi +976 -0
  3. etb/_etb.cpython-39-x86_64-linux-gnu.so +0 -0
  4. etb/_version.py +34 -0
  5. etb/py.typed +2 -0
  6. explodethosebits-0.3.0.dist-info/METADATA +405 -0
  7. explodethosebits-0.3.0.dist-info/RECORD +88 -0
  8. explodethosebits-0.3.0.dist-info/WHEEL +6 -0
  9. explodethosebits-0.3.0.dist-info/licenses/LICENSE +21 -0
  10. explodethosebits-0.3.0.dist-info/sboms/auditwheel.cdx.json +1 -0
  11. explodethosebits.libs/libcudart-c3a75b33.so.12.8.90 +0 -0
  12. include/etb/bit_coordinate.hpp +45 -0
  13. include/etb/bit_extraction.hpp +79 -0
  14. include/etb/bit_pruning.hpp +122 -0
  15. include/etb/config.hpp +284 -0
  16. include/etb/cuda/arch_optimizations.cuh +358 -0
  17. include/etb/cuda/blackwell_optimizations.cuh +300 -0
  18. include/etb/cuda/cuda_common.cuh +265 -0
  19. include/etb/cuda/etb_cuda.cuh +200 -0
  20. include/etb/cuda/gpu_memory.cuh +406 -0
  21. include/etb/cuda/heuristics_kernel.cuh +315 -0
  22. include/etb/cuda/path_generator_kernel.cuh +272 -0
  23. include/etb/cuda/prefix_pruner_kernel.cuh +370 -0
  24. include/etb/cuda/signature_kernel.cuh +328 -0
  25. include/etb/early_stopping.hpp +246 -0
  26. include/etb/etb.hpp +20 -0
  27. include/etb/heuristics.hpp +165 -0
  28. include/etb/memoization.hpp +285 -0
  29. include/etb/path.hpp +86 -0
  30. include/etb/path_count.hpp +87 -0
  31. include/etb/path_generator.hpp +175 -0
  32. include/etb/prefix_trie.hpp +339 -0
  33. include/etb/reporting.hpp +437 -0
  34. include/etb/scoring.hpp +269 -0
  35. include/etb/signature.hpp +190 -0
  36. include/gmock/gmock-actions.h +2297 -0
  37. include/gmock/gmock-cardinalities.h +159 -0
  38. include/gmock/gmock-function-mocker.h +518 -0
  39. include/gmock/gmock-matchers.h +5623 -0
  40. include/gmock/gmock-more-actions.h +658 -0
  41. include/gmock/gmock-more-matchers.h +120 -0
  42. include/gmock/gmock-nice-strict.h +277 -0
  43. include/gmock/gmock-spec-builders.h +2148 -0
  44. include/gmock/gmock.h +96 -0
  45. include/gmock/internal/custom/README.md +18 -0
  46. include/gmock/internal/custom/gmock-generated-actions.h +7 -0
  47. include/gmock/internal/custom/gmock-matchers.h +37 -0
  48. include/gmock/internal/custom/gmock-port.h +40 -0
  49. include/gmock/internal/gmock-internal-utils.h +487 -0
  50. include/gmock/internal/gmock-port.h +139 -0
  51. include/gmock/internal/gmock-pp.h +279 -0
  52. include/gtest/gtest-assertion-result.h +237 -0
  53. include/gtest/gtest-death-test.h +345 -0
  54. include/gtest/gtest-matchers.h +923 -0
  55. include/gtest/gtest-message.h +252 -0
  56. include/gtest/gtest-param-test.h +546 -0
  57. include/gtest/gtest-printers.h +1161 -0
  58. include/gtest/gtest-spi.h +250 -0
  59. include/gtest/gtest-test-part.h +192 -0
  60. include/gtest/gtest-typed-test.h +331 -0
  61. include/gtest/gtest.h +2321 -0
  62. include/gtest/gtest_pred_impl.h +279 -0
  63. include/gtest/gtest_prod.h +60 -0
  64. include/gtest/internal/custom/README.md +44 -0
  65. include/gtest/internal/custom/gtest-port.h +37 -0
  66. include/gtest/internal/custom/gtest-printers.h +42 -0
  67. include/gtest/internal/custom/gtest.h +37 -0
  68. include/gtest/internal/gtest-death-test-internal.h +307 -0
  69. include/gtest/internal/gtest-filepath.h +227 -0
  70. include/gtest/internal/gtest-internal.h +1560 -0
  71. include/gtest/internal/gtest-param-util.h +1026 -0
  72. include/gtest/internal/gtest-port-arch.h +122 -0
  73. include/gtest/internal/gtest-port.h +2481 -0
  74. include/gtest/internal/gtest-string.h +178 -0
  75. include/gtest/internal/gtest-type-util.h +220 -0
  76. lib/libetb_core.a +0 -0
  77. lib64/cmake/GTest/GTestConfig.cmake +33 -0
  78. lib64/cmake/GTest/GTestConfigVersion.cmake +43 -0
  79. lib64/cmake/GTest/GTestTargets-release.cmake +49 -0
  80. lib64/cmake/GTest/GTestTargets.cmake +139 -0
  81. lib64/libgmock.a +0 -0
  82. lib64/libgmock_main.a +0 -0
  83. lib64/libgtest.a +0 -0
  84. lib64/libgtest_main.a +0 -0
  85. lib64/pkgconfig/gmock.pc +10 -0
  86. lib64/pkgconfig/gmock_main.pc +10 -0
  87. lib64/pkgconfig/gtest.pc +9 -0
  88. lib64/pkgconfig/gtest_main.pc +10 -0
@@ -0,0 +1,165 @@
1
+ #ifndef ETB_HEURISTICS_HPP
2
+ #define ETB_HEURISTICS_HPP
3
+
4
+ #include <cstdint>
5
+ #include <cstddef>
6
+ #include <vector>
7
+ #include <array>
8
+
9
+ namespace etb {
10
+
11
+ /**
12
+ * Result of heuristic analysis on a byte sequence.
13
+ * All scores are normalized to specific ranges as documented.
14
+ */
15
+ struct HeuristicResult {
16
+ float entropy; // Shannon entropy [0.0, 8.0]
17
+ float printable_ratio; // Ratio of printable ASCII [0.0, 1.0]
18
+ float control_char_ratio; // Ratio of control characters [0.0, 1.0]
19
+ uint32_t max_null_run; // Longest consecutive null byte run
20
+ float utf8_validity; // UTF-8 sequence validity score [0.0, 1.0]
21
+ float composite_score; // Weighted combination [0.0, 1.0]
22
+
23
+ HeuristicResult()
24
+ : entropy(0.0f)
25
+ , printable_ratio(0.0f)
26
+ , control_char_ratio(0.0f)
27
+ , max_null_run(0)
28
+ , utf8_validity(0.0f)
29
+ , composite_score(0.0f) {}
30
+ };
31
+
32
+ /**
33
+ * Configurable weights for composite heuristic scoring.
34
+ */
35
+ struct HeuristicWeights {
36
+ float entropy_weight; // Weight for entropy score
37
+ float printable_weight; // Weight for printable ratio
38
+ float control_char_weight; // Weight for control char penalty
39
+ float null_run_weight; // Weight for null run penalty
40
+ float utf8_weight; // Weight for UTF-8 validity
41
+
42
+ HeuristicWeights()
43
+ : entropy_weight(0.25f)
44
+ , printable_weight(0.25f)
45
+ , control_char_weight(0.15f)
46
+ , null_run_weight(0.15f)
47
+ , utf8_weight(0.20f) {}
48
+ };
49
+
50
+ /**
51
+ * Heuristics Engine - CPU Reference Implementation
52
+ *
53
+ * Provides functions for analyzing byte sequences to determine viability
54
+ * as valid data. Used for scoring partial reconstructions during path exploration.
55
+ */
56
+ class HeuristicsEngine {
57
+ public:
58
+ /**
59
+ * Construct a heuristics engine with default weights.
60
+ */
61
+ HeuristicsEngine();
62
+
63
+ /**
64
+ * Construct a heuristics engine with custom weights.
65
+ * @param weights Custom scoring weights
66
+ */
67
+ explicit HeuristicsEngine(const HeuristicWeights& weights);
68
+
69
+ /**
70
+ * Set the scoring weights.
71
+ * @param weights New weights to use
72
+ */
73
+ void set_weights(const HeuristicWeights& weights);
74
+
75
+ /**
76
+ * Get the current scoring weights.
77
+ */
78
+ const HeuristicWeights& get_weights() const { return weights_; }
79
+
80
+ /**
81
+ * Perform full heuristic analysis on a byte sequence.
82
+ * @param data Pointer to byte data
83
+ * @param length Length of data
84
+ * @return Complete heuristic analysis result
85
+ */
86
+ HeuristicResult analyze(const uint8_t* data, size_t length) const;
87
+
88
+ /**
89
+ * Perform full heuristic analysis on a vector of bytes.
90
+ * @param data Byte vector to analyze
91
+ * @return Complete heuristic analysis result
92
+ */
93
+ HeuristicResult analyze(const std::vector<uint8_t>& data) const;
94
+
95
+ // Individual heuristic calculations
96
+
97
+ /**
98
+ * Calculate Shannon entropy of a byte sequence.
99
+ * @param data Pointer to byte data
100
+ * @param length Length of data
101
+ * @return Entropy value in range [0.0, 8.0]
102
+ */
103
+ static float calculate_entropy(const uint8_t* data, size_t length);
104
+
105
+ /**
106
+ * Calculate Shannon entropy of a byte vector.
107
+ * @param data Byte vector
108
+ * @return Entropy value in range [0.0, 8.0]
109
+ */
110
+ static float calculate_entropy(const std::vector<uint8_t>& data);
111
+
112
+ /**
113
+ * Calculate the ratio of printable ASCII characters.
114
+ * Printable ASCII: bytes in range [0x20, 0x7E]
115
+ * @param data Pointer to byte data
116
+ * @param length Length of data
117
+ * @return Ratio in range [0.0, 1.0]
118
+ */
119
+ static float calculate_printable_ratio(const uint8_t* data, size_t length);
120
+
121
+ /**
122
+ * Calculate the ratio of control characters.
123
+ * Control characters: bytes in range [0x00, 0x1F] excluding common whitespace (0x09, 0x0A, 0x0D)
124
+ * @param data Pointer to byte data
125
+ * @param length Length of data
126
+ * @return Ratio in range [0.0, 1.0]
127
+ */
128
+ static float calculate_control_char_ratio(const uint8_t* data, size_t length);
129
+
130
+ /**
131
+ * Find the longest consecutive run of null bytes (0x00).
132
+ * @param data Pointer to byte data
133
+ * @param length Length of data
134
+ * @return Length of longest null byte run
135
+ */
136
+ static uint32_t find_max_null_run(const uint8_t* data, size_t length);
137
+
138
+ /**
139
+ * Validate UTF-8 sequences and return a validity score.
140
+ * @param data Pointer to byte data
141
+ * @param length Length of data
142
+ * @return Validity score in range [0.0, 1.0] where 1.0 = fully valid UTF-8
143
+ */
144
+ static float validate_utf8(const uint8_t* data, size_t length);
145
+
146
+ private:
147
+ HeuristicWeights weights_;
148
+
149
+ /**
150
+ * Build a byte frequency histogram.
151
+ * @param data Pointer to byte data
152
+ * @param length Length of data
153
+ * @return Array of 256 frequency counts
154
+ */
155
+ static std::array<uint32_t, 256> build_histogram(const uint8_t* data, size_t length);
156
+
157
+ /**
158
+ * Calculate composite score from individual heuristics.
159
+ */
160
+ float calculate_composite_score(const HeuristicResult& result, size_t data_length) const;
161
+ };
162
+
163
+ } // namespace etb
164
+
165
+ #endif // ETB_HEURISTICS_HPP
@@ -0,0 +1,285 @@
1
+ #ifndef ETB_MEMOIZATION_HPP
2
+ #define ETB_MEMOIZATION_HPP
3
+
4
+ #include <cstdint>
5
+ #include <cstddef>
6
+ #include <vector>
7
+ #include <unordered_map>
8
+ #include <list>
9
+ #include <mutex>
10
+ #include <optional>
11
+ #include "heuristics.hpp"
12
+
13
+ namespace etb {
14
+
15
+ /**
16
+ * Result stored in the prefix cache.
17
+ * Contains the evaluation result for a specific prefix.
18
+ */
19
+ struct PrefixCacheEntry {
20
+ std::vector<uint8_t> prefix; // The prefix bytes
21
+ HeuristicResult heuristics; // Heuristic evaluation result
22
+ float score; // Composite score
23
+ bool should_prune; // Whether this prefix should be pruned
24
+ uint64_t access_count; // Number of times accessed
25
+
26
+ PrefixCacheEntry()
27
+ : score(0.0f)
28
+ , should_prune(false)
29
+ , access_count(0) {}
30
+
31
+ PrefixCacheEntry(const std::vector<uint8_t>& p, const HeuristicResult& h,
32
+ float s, bool prune)
33
+ : prefix(p)
34
+ , heuristics(h)
35
+ , score(s)
36
+ , should_prune(prune)
37
+ , access_count(1) {}
38
+ };
39
+
40
+ /**
41
+ * Configuration for the memoization cache.
42
+ */
43
+ struct MemoizationConfig {
44
+ size_t max_size_bytes; // Maximum cache size in bytes (default: 1GB)
45
+ size_t max_entries; // Maximum number of entries (default: 1M)
46
+ bool enabled; // Whether caching is enabled
47
+
48
+ MemoizationConfig()
49
+ : max_size_bytes(1024 * 1024 * 1024) // 1GB
50
+ , max_entries(1000000) // 1M entries
51
+ , enabled(true) {}
52
+ };
53
+
54
+ /**
55
+ * Statistics for cache operations.
56
+ */
57
+ struct MemoizationStats {
58
+ uint64_t hits; // Number of cache hits
59
+ uint64_t misses; // Number of cache misses
60
+ uint64_t insertions; // Number of insertions
61
+ uint64_t evictions; // Number of evictions
62
+ size_t current_entries; // Current number of entries
63
+ size_t current_size_bytes; // Current estimated size in bytes
64
+
65
+ MemoizationStats()
66
+ : hits(0)
67
+ , misses(0)
68
+ , insertions(0)
69
+ , evictions(0)
70
+ , current_entries(0)
71
+ , current_size_bytes(0) {}
72
+
73
+ void reset() {
74
+ hits = 0;
75
+ misses = 0;
76
+ insertions = 0;
77
+ evictions = 0;
78
+ // Don't reset current_entries and current_size_bytes
79
+ }
80
+
81
+ /**
82
+ * Calculate cache hit rate.
83
+ * @return Hit rate in range [0.0, 1.0], or 0.0 if no accesses
84
+ */
85
+ float hit_rate() const {
86
+ uint64_t total = hits + misses;
87
+ if (total == 0) return 0.0f;
88
+ return static_cast<float>(hits) / static_cast<float>(total);
89
+ }
90
+ };
91
+
92
+ /**
93
+ * Prefix Result Cache with LRU Eviction
94
+ *
95
+ * Stores evaluated prefix results for O(1) lookup on repeated evaluations.
96
+ * Uses LRU (Least Recently Used) eviction policy when cache size exceeds limits.
97
+ *
98
+ * Requirements: 6.1, 6.2, 6.4, 6.5
99
+ */
100
+ class PrefixCache {
101
+ public:
102
+ /**
103
+ * Construct with default configuration.
104
+ */
105
+ PrefixCache();
106
+
107
+ /**
108
+ * Construct with custom configuration.
109
+ * @param config Cache configuration
110
+ */
111
+ explicit PrefixCache(const MemoizationConfig& config);
112
+
113
+ /**
114
+ * Look up a prefix in the cache.
115
+ * Updates LRU order on hit.
116
+ * @param prefix Byte sequence representing the prefix
117
+ * @param length Length of the prefix
118
+ * @return Optional containing the cached entry if found
119
+ */
120
+ std::optional<PrefixCacheEntry> lookup(const uint8_t* prefix, size_t length);
121
+
122
+ /**
123
+ * Look up a prefix in the cache (vector overload).
124
+ * @param prefix Byte sequence representing the prefix
125
+ * @return Optional containing the cached entry if found
126
+ */
127
+ std::optional<PrefixCacheEntry> lookup(const std::vector<uint8_t>& prefix);
128
+
129
+ /**
130
+ * Insert or update a prefix result in the cache.
131
+ * May trigger eviction if cache is full.
132
+ * @param prefix Byte sequence representing the prefix
133
+ * @param length Length of the prefix
134
+ * @param heuristics Heuristic evaluation result
135
+ * @param score Composite score
136
+ * @param should_prune Whether this prefix should be pruned
137
+ * @return true if insertion succeeded
138
+ */
139
+ bool insert(const uint8_t* prefix, size_t length,
140
+ const HeuristicResult& heuristics, float score, bool should_prune);
141
+
142
+ /**
143
+ * Insert or update a prefix result in the cache (vector overload).
144
+ * @param prefix Byte sequence representing the prefix
145
+ * @param heuristics Heuristic evaluation result
146
+ * @param score Composite score
147
+ * @param should_prune Whether this prefix should be pruned
148
+ * @return true if insertion succeeded
149
+ */
150
+ bool insert(const std::vector<uint8_t>& prefix,
151
+ const HeuristicResult& heuristics, float score, bool should_prune);
152
+
153
+ /**
154
+ * Check if a prefix exists in the cache without updating LRU order.
155
+ * @param prefix Byte sequence representing the prefix
156
+ * @param length Length of the prefix
157
+ * @return true if prefix is cached
158
+ */
159
+ bool contains(const uint8_t* prefix, size_t length) const;
160
+
161
+ /**
162
+ * Check if a prefix exists in the cache (vector overload).
163
+ * @param prefix Byte sequence representing the prefix
164
+ * @return true if prefix is cached
165
+ */
166
+ bool contains(const std::vector<uint8_t>& prefix) const;
167
+
168
+ /**
169
+ * Remove a specific prefix from the cache.
170
+ * @param prefix Byte sequence representing the prefix
171
+ * @param length Length of the prefix
172
+ * @return true if prefix was removed
173
+ */
174
+ bool remove(const uint8_t* prefix, size_t length);
175
+
176
+ /**
177
+ * Remove a specific prefix from the cache (vector overload).
178
+ * @param prefix Byte sequence representing the prefix
179
+ * @return true if prefix was removed
180
+ */
181
+ bool remove(const std::vector<uint8_t>& prefix);
182
+
183
+ /**
184
+ * Clear all entries from the cache.
185
+ */
186
+ void clear();
187
+
188
+ /**
189
+ * Get the current number of entries in the cache.
190
+ */
191
+ size_t size() const;
192
+
193
+ /**
194
+ * Check if the cache is empty.
195
+ */
196
+ bool empty() const;
197
+
198
+ /**
199
+ * Get the estimated current size in bytes.
200
+ */
201
+ size_t size_bytes() const;
202
+
203
+ /**
204
+ * Get the configuration.
205
+ */
206
+ const MemoizationConfig& get_config() const { return config_; }
207
+
208
+ /**
209
+ * Get cache statistics.
210
+ */
211
+ const MemoizationStats& get_statistics() const { return stats_; }
212
+
213
+ /**
214
+ * Reset statistics (but keep cache contents).
215
+ */
216
+ void reset_statistics();
217
+
218
+ /**
219
+ * Enable or disable the cache.
220
+ * @param enabled Whether to enable caching
221
+ */
222
+ void set_enabled(bool enabled);
223
+
224
+ /**
225
+ * Check if caching is enabled.
226
+ */
227
+ bool is_enabled() const { return config_.enabled; }
228
+
229
+ /**
230
+ * Get the cache hit rate.
231
+ * @return Hit rate in range [0.0, 1.0]
232
+ */
233
+ float hit_rate() const { return stats_.hit_rate(); }
234
+
235
+ private:
236
+ /**
237
+ * Hash function for prefix vectors.
238
+ */
239
+ struct PrefixHasher {
240
+ size_t operator()(const std::vector<uint8_t>& prefix) const {
241
+ size_t hash = 0;
242
+ for (uint8_t byte : prefix) {
243
+ hash ^= std::hash<uint8_t>{}(byte) + 0x9e3779b9 + (hash << 6) + (hash >> 2);
244
+ }
245
+ return hash;
246
+ }
247
+ };
248
+
249
+ MemoizationConfig config_;
250
+ mutable MemoizationStats stats_;
251
+ mutable std::mutex mutex_;
252
+
253
+ // LRU list: front = most recently used, back = least recently used
254
+ std::list<std::vector<uint8_t>> lru_list_;
255
+
256
+ // Map from prefix to (entry, iterator into lru_list)
257
+ using LRUIterator = std::list<std::vector<uint8_t>>::iterator;
258
+ std::unordered_map<std::vector<uint8_t>,
259
+ std::pair<PrefixCacheEntry, LRUIterator>,
260
+ PrefixHasher> cache_;
261
+
262
+ /**
263
+ * Estimate the size of an entry in bytes.
264
+ */
265
+ static size_t estimate_entry_size(const PrefixCacheEntry& entry);
266
+
267
+ /**
268
+ * Evict least recently used entries until under limits.
269
+ */
270
+ void evict_if_needed();
271
+
272
+ /**
273
+ * Move a prefix to the front of the LRU list.
274
+ */
275
+ void touch(const std::vector<uint8_t>& prefix);
276
+
277
+ /**
278
+ * Convert raw pointer + length to vector for internal use.
279
+ */
280
+ static std::vector<uint8_t> to_vector(const uint8_t* data, size_t length);
281
+ };
282
+
283
+ } // namespace etb
284
+
285
+ #endif // ETB_MEMOIZATION_HPP
include/etb/path.hpp ADDED
@@ -0,0 +1,86 @@
1
+ #ifndef ETB_PATH_HPP
2
+ #define ETB_PATH_HPP
3
+
4
+ #include "bit_coordinate.hpp"
5
+ #include <vector>
6
+ #include <cstdint>
7
+ #include <stdexcept>
8
+
9
+ namespace etb {
10
+
11
+ /**
12
+ * Represents a forward-only traversal path through bit coordinates.
13
+ * Enforces the constraint that each subsequent coordinate must have
14
+ * a strictly greater byte index than the previous one.
15
+ */
16
+ class Path {
17
+ public:
18
+ Path() = default;
19
+ explicit Path(size_t capacity) { coordinates_.reserve(capacity); }
20
+
21
+ /**
22
+ * Add a coordinate to the path.
23
+ * @param coord The coordinate to add
24
+ * @return true if added successfully, false if forward-only constraint violated
25
+ */
26
+ bool add(const BitCoordinate& coord);
27
+
28
+ /**
29
+ * Validate that the path maintains forward-only traversal.
30
+ * @return true if all coordinates satisfy byte_index[i] < byte_index[i+1]
31
+ */
32
+ bool is_valid() const;
33
+
34
+ /**
35
+ * Get the number of coordinates in the path.
36
+ */
37
+ size_t length() const { return coordinates_.size(); }
38
+
39
+ /**
40
+ * Check if the path is empty.
41
+ */
42
+ bool empty() const { return coordinates_.empty(); }
43
+
44
+ /**
45
+ * Clear all coordinates from the path.
46
+ */
47
+ void clear() { coordinates_.clear(); }
48
+
49
+ /**
50
+ * Reserve capacity for coordinates.
51
+ */
52
+ void reserve(size_t capacity) { coordinates_.reserve(capacity); }
53
+
54
+ /**
55
+ * Get coordinate at index.
56
+ * @throws std::out_of_range if index is invalid
57
+ */
58
+ const BitCoordinate& at(size_t index) const { return coordinates_.at(index); }
59
+
60
+ /**
61
+ * Get coordinate at index (no bounds checking).
62
+ */
63
+ const BitCoordinate& operator[](size_t index) const { return coordinates_[index]; }
64
+
65
+ /**
66
+ * Get the last coordinate in the path.
67
+ * @throws std::out_of_range if path is empty
68
+ */
69
+ const BitCoordinate& back() const;
70
+
71
+ /**
72
+ * Get read-only access to the underlying coordinates.
73
+ */
74
+ const std::vector<BitCoordinate>& coordinates() const { return coordinates_; }
75
+
76
+ // Iterator support
77
+ auto begin() const { return coordinates_.begin(); }
78
+ auto end() const { return coordinates_.end(); }
79
+
80
+ private:
81
+ std::vector<BitCoordinate> coordinates_;
82
+ };
83
+
84
+ } // namespace etb
85
+
86
+ #endif // ETB_PATH_HPP
@@ -0,0 +1,87 @@
1
+ #ifndef ETB_PATH_COUNT_HPP
2
+ #define ETB_PATH_COUNT_HPP
3
+
4
+ #include <cstdint>
5
+ #include <optional>
6
+
7
+ namespace etb {
8
+
9
+ /**
10
+ * Path count estimation utilities.
11
+ *
12
+ * For n bytes with 8 bits each, the total number of forward-only paths is:
13
+ * Sum over all path lengths k from 1 to n of: C(n,k) * 8^k
14
+ *
15
+ * This equals (1+8)^n - 1 = 9^n - 1 (by binomial theorem, minus empty path)
16
+ *
17
+ * With bit masking (m allowed bits per byte):
18
+ * Total paths = (1+m)^n - 1
19
+ */
20
+
21
+ /**
22
+ * Result of path count estimation.
23
+ */
24
+ struct PathCountResult {
25
+ uint64_t estimated_count; // Estimated total path count
26
+ bool is_exact; // True if count is exact (not overflow)
27
+ bool exceeds_threshold; // True if count exceeds the given threshold
28
+ double log_count; // log10 of the count (for large values)
29
+ };
30
+
31
+ /**
32
+ * Calculate the exact number of paths for small inputs.
33
+ * Returns nullopt if the calculation would overflow uint64_t.
34
+ *
35
+ * @param input_length Number of bytes in input
36
+ * @param bits_per_byte Number of allowed bit positions per byte (default 8)
37
+ * @return Exact path count or nullopt on overflow
38
+ */
39
+ std::optional<uint64_t> exact_path_count(uint32_t input_length, uint8_t bits_per_byte = 8);
40
+
41
+ /**
42
+ * Estimate the path count with overflow detection.
43
+ *
44
+ * @param input_length Number of bytes in input
45
+ * @param bits_per_byte Number of allowed bit positions per byte (default 8)
46
+ * @param threshold Optional threshold for early bailout check
47
+ * @return PathCountResult with estimation details
48
+ */
49
+ PathCountResult estimate_path_count(uint32_t input_length,
50
+ uint8_t bits_per_byte = 8,
51
+ uint64_t threshold = 0);
52
+
53
+ /**
54
+ * Check if path count exceeds a threshold without computing exact count.
55
+ * Uses logarithmic comparison for efficiency with large values.
56
+ *
57
+ * @param input_length Number of bytes in input
58
+ * @param bits_per_byte Number of allowed bit positions per byte
59
+ * @param threshold The threshold to check against
60
+ * @return true if estimated count exceeds threshold
61
+ */
62
+ bool path_count_exceeds_threshold(uint32_t input_length,
63
+ uint8_t bits_per_byte,
64
+ uint64_t threshold);
65
+
66
+ /**
67
+ * Calculate the log10 of the path count.
68
+ * Useful for displaying very large counts.
69
+ *
70
+ * @param input_length Number of bytes in input
71
+ * @param bits_per_byte Number of allowed bit positions per byte
72
+ * @return log10 of the path count
73
+ */
74
+ double log10_path_count(uint32_t input_length, uint8_t bits_per_byte = 8);
75
+
76
+ /**
77
+ * Count the number of set bits in a byte (popcount).
78
+ * Used to determine bits_per_byte from a bit mask.
79
+ *
80
+ * @param mask The bit mask
81
+ * @return Number of set bits
82
+ */
83
+ uint8_t count_bits_in_mask(uint8_t mask);
84
+
85
+ } // namespace etb
86
+
87
+ #endif // ETB_PATH_COUNT_HPP