explodethosebits 0.3.0__cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. etb/__init__.py +351 -0
  2. etb/__init__.pyi +976 -0
  3. etb/_etb.cpython-39-x86_64-linux-gnu.so +0 -0
  4. etb/_version.py +34 -0
  5. etb/py.typed +2 -0
  6. explodethosebits-0.3.0.dist-info/METADATA +405 -0
  7. explodethosebits-0.3.0.dist-info/RECORD +88 -0
  8. explodethosebits-0.3.0.dist-info/WHEEL +6 -0
  9. explodethosebits-0.3.0.dist-info/licenses/LICENSE +21 -0
  10. explodethosebits-0.3.0.dist-info/sboms/auditwheel.cdx.json +1 -0
  11. explodethosebits.libs/libcudart-c3a75b33.so.12.8.90 +0 -0
  12. include/etb/bit_coordinate.hpp +45 -0
  13. include/etb/bit_extraction.hpp +79 -0
  14. include/etb/bit_pruning.hpp +122 -0
  15. include/etb/config.hpp +284 -0
  16. include/etb/cuda/arch_optimizations.cuh +358 -0
  17. include/etb/cuda/blackwell_optimizations.cuh +300 -0
  18. include/etb/cuda/cuda_common.cuh +265 -0
  19. include/etb/cuda/etb_cuda.cuh +200 -0
  20. include/etb/cuda/gpu_memory.cuh +406 -0
  21. include/etb/cuda/heuristics_kernel.cuh +315 -0
  22. include/etb/cuda/path_generator_kernel.cuh +272 -0
  23. include/etb/cuda/prefix_pruner_kernel.cuh +370 -0
  24. include/etb/cuda/signature_kernel.cuh +328 -0
  25. include/etb/early_stopping.hpp +246 -0
  26. include/etb/etb.hpp +20 -0
  27. include/etb/heuristics.hpp +165 -0
  28. include/etb/memoization.hpp +285 -0
  29. include/etb/path.hpp +86 -0
  30. include/etb/path_count.hpp +87 -0
  31. include/etb/path_generator.hpp +175 -0
  32. include/etb/prefix_trie.hpp +339 -0
  33. include/etb/reporting.hpp +437 -0
  34. include/etb/scoring.hpp +269 -0
  35. include/etb/signature.hpp +190 -0
  36. include/gmock/gmock-actions.h +2297 -0
  37. include/gmock/gmock-cardinalities.h +159 -0
  38. include/gmock/gmock-function-mocker.h +518 -0
  39. include/gmock/gmock-matchers.h +5623 -0
  40. include/gmock/gmock-more-actions.h +658 -0
  41. include/gmock/gmock-more-matchers.h +120 -0
  42. include/gmock/gmock-nice-strict.h +277 -0
  43. include/gmock/gmock-spec-builders.h +2148 -0
  44. include/gmock/gmock.h +96 -0
  45. include/gmock/internal/custom/README.md +18 -0
  46. include/gmock/internal/custom/gmock-generated-actions.h +7 -0
  47. include/gmock/internal/custom/gmock-matchers.h +37 -0
  48. include/gmock/internal/custom/gmock-port.h +40 -0
  49. include/gmock/internal/gmock-internal-utils.h +487 -0
  50. include/gmock/internal/gmock-port.h +139 -0
  51. include/gmock/internal/gmock-pp.h +279 -0
  52. include/gtest/gtest-assertion-result.h +237 -0
  53. include/gtest/gtest-death-test.h +345 -0
  54. include/gtest/gtest-matchers.h +923 -0
  55. include/gtest/gtest-message.h +252 -0
  56. include/gtest/gtest-param-test.h +546 -0
  57. include/gtest/gtest-printers.h +1161 -0
  58. include/gtest/gtest-spi.h +250 -0
  59. include/gtest/gtest-test-part.h +192 -0
  60. include/gtest/gtest-typed-test.h +331 -0
  61. include/gtest/gtest.h +2321 -0
  62. include/gtest/gtest_pred_impl.h +279 -0
  63. include/gtest/gtest_prod.h +60 -0
  64. include/gtest/internal/custom/README.md +44 -0
  65. include/gtest/internal/custom/gtest-port.h +37 -0
  66. include/gtest/internal/custom/gtest-printers.h +42 -0
  67. include/gtest/internal/custom/gtest.h +37 -0
  68. include/gtest/internal/gtest-death-test-internal.h +307 -0
  69. include/gtest/internal/gtest-filepath.h +227 -0
  70. include/gtest/internal/gtest-internal.h +1560 -0
  71. include/gtest/internal/gtest-param-util.h +1026 -0
  72. include/gtest/internal/gtest-port-arch.h +122 -0
  73. include/gtest/internal/gtest-port.h +2481 -0
  74. include/gtest/internal/gtest-string.h +178 -0
  75. include/gtest/internal/gtest-type-util.h +220 -0
  76. lib/libetb_core.a +0 -0
  77. lib64/cmake/GTest/GTestConfig.cmake +33 -0
  78. lib64/cmake/GTest/GTestConfigVersion.cmake +43 -0
  79. lib64/cmake/GTest/GTestTargets-release.cmake +49 -0
  80. lib64/cmake/GTest/GTestTargets.cmake +139 -0
  81. lib64/libgmock.a +0 -0
  82. lib64/libgmock_main.a +0 -0
  83. lib64/libgtest.a +0 -0
  84. lib64/libgtest_main.a +0 -0
  85. lib64/pkgconfig/gmock.pc +10 -0
  86. lib64/pkgconfig/gmock_main.pc +10 -0
  87. lib64/pkgconfig/gtest.pc +9 -0
  88. lib64/pkgconfig/gtest_main.pc +10 -0
@@ -0,0 +1,265 @@
1
+ #ifndef ETB_CUDA_COMMON_CUH
2
+ #define ETB_CUDA_COMMON_CUH
3
+
4
+ #include <cuda_runtime.h>
5
+ #include <device_launch_parameters.h>
6
+ #include <cstdint>
7
+ #include <stdexcept>
8
+ #include <string>
9
+
10
+ namespace etb {
11
+ namespace cuda {
12
+
13
+ // Error checking macro
14
+ #define ETB_CUDA_CHECK(call) \
15
+ do { \
16
+ cudaError_t err = call; \
17
+ if (err != cudaSuccess) { \
18
+ throw std::runtime_error(std::string("CUDA error: ") + \
19
+ cudaGetErrorString(err) + " at " + __FILE__ + ":" + std::to_string(__LINE__)); \
20
+ } \
21
+ } while(0)
22
+
23
+ // Architecture-specific constants
24
+ namespace arch {
25
+ // SM 90 (Hopper) configuration
26
+ constexpr int HOPPER_SM = 90;
27
+ constexpr int HOPPER_THREADS_PER_BLOCK = 256;
28
+ constexpr int HOPPER_SHARED_MEM_SIZE = 48 * 1024; // 48KB default
29
+ constexpr int HOPPER_MAX_SHARED_MEM = 228 * 1024; // 228KB max with opt-in
30
+
31
+ // SM 100 (Blackwell) configuration
32
+ constexpr int BLACKWELL_SM = 100;
33
+ constexpr int BLACKWELL_THREADS_PER_BLOCK = 512;
34
+ constexpr int BLACKWELL_SHARED_MEM_SIZE = 64 * 1024; // 64KB default
35
+ constexpr int BLACKWELL_MAX_SHARED_MEM = 256 * 1024; // 256KB max with opt-in
36
+
37
+ // Common constants
38
+ constexpr int WARP_SIZE = 32;
39
+ constexpr int MAX_GRID_DIM = 65535;
40
+ }
41
+
42
+ // Maximum sizes for constant memory structures
43
+ constexpr size_t MAX_SIGNATURES = 256;
44
+ constexpr size_t MAX_SIGNATURE_LENGTH = 32;
45
+ constexpr size_t MAX_FORMAT_NAME_LENGTH = 32;
46
+
47
+ // GPU-compatible bit coordinate (matches CPU version)
48
+ struct alignas(8) DeviceBitCoordinate {
49
+ uint32_t byte_index;
50
+ uint8_t bit_position;
51
+ uint8_t padding[3]; // Alignment padding
52
+
53
+ __host__ __device__ DeviceBitCoordinate()
54
+ : byte_index(0), bit_position(0), padding{0, 0, 0} {}
55
+
56
+ __host__ __device__ DeviceBitCoordinate(uint32_t byte_idx, uint8_t bit_pos)
57
+ : byte_index(byte_idx), bit_position(bit_pos), padding{0, 0, 0} {}
58
+
59
+ __host__ __device__ bool is_valid(uint32_t input_length) const {
60
+ return byte_index < input_length && bit_position <= 7;
61
+ }
62
+ };
63
+
64
+ // GPU-compatible path structure
65
+ struct DevicePath {
66
+ DeviceBitCoordinate* coordinates;
67
+ uint32_t length;
68
+ uint32_t capacity;
69
+
70
+ __host__ __device__ DevicePath()
71
+ : coordinates(nullptr), length(0), capacity(0) {}
72
+ };
73
+
74
+ // GPU-compatible file signature for constant memory
75
+ // Note: No constructor to allow __constant__ memory usage
76
+ struct alignas(64) DeviceFileSignature {
77
+ uint8_t magic_bytes[MAX_SIGNATURE_LENGTH];
78
+ uint8_t mask[MAX_SIGNATURE_LENGTH];
79
+ uint8_t length;
80
+ uint16_t offset;
81
+ uint16_t format_id;
82
+ float base_confidence;
83
+ uint8_t padding[1]; // Alignment
84
+ };
85
+
86
+ // GPU-compatible footer signature
87
+ // Note: No constructor to allow __constant__ memory usage
88
+ struct DeviceFooterSignature {
89
+ uint8_t magic_bytes[MAX_SIGNATURE_LENGTH];
90
+ uint8_t length;
91
+ bool required;
92
+ uint8_t padding[2];
93
+ };
94
+
95
+ // GPU-compatible heuristic result
96
+ struct DeviceHeuristicResult {
97
+ float entropy;
98
+ float printable_ratio;
99
+ float control_char_ratio;
100
+ uint32_t max_null_run;
101
+ float utf8_validity;
102
+ float composite_score;
103
+
104
+ __host__ __device__ DeviceHeuristicResult()
105
+ : entropy(0.0f), printable_ratio(0.0f), control_char_ratio(0.0f)
106
+ , max_null_run(0), utf8_validity(0.0f), composite_score(0.0f) {}
107
+ };
108
+
109
+ // GPU-compatible heuristic weights
110
+ struct DeviceHeuristicWeights {
111
+ float entropy_weight;
112
+ float printable_weight;
113
+ float control_char_weight;
114
+ float null_run_weight;
115
+ float utf8_weight;
116
+
117
+ __host__ __device__ DeviceHeuristicWeights()
118
+ : entropy_weight(0.25f), printable_weight(0.25f)
119
+ , control_char_weight(0.15f), null_run_weight(0.15f)
120
+ , utf8_weight(0.20f) {}
121
+ };
122
+
123
+ // GPU-compatible scoring weights
124
+ struct DeviceScoringWeights {
125
+ float signature_weight;
126
+ float heuristic_weight;
127
+ float length_weight;
128
+ float structure_weight;
129
+
130
+ __host__ __device__ DeviceScoringWeights()
131
+ : signature_weight(0.40f), heuristic_weight(0.30f)
132
+ , length_weight(0.15f), structure_weight(0.15f) {}
133
+ };
134
+
135
+ // Prefix trie node status
136
+ enum class DevicePrefixStatus : uint8_t {
137
+ UNKNOWN = 0,
138
+ VALID = 1,
139
+ PRUNED = 2
140
+ };
141
+
142
+ // GPU-compatible prefix trie node
143
+ struct alignas(16) DevicePrefixTrieNode {
144
+ uint8_t reconstructed_byte;
145
+ DevicePrefixStatus status;
146
+ uint8_t padding[2];
147
+ float best_score;
148
+ uint32_t children_offset;
149
+ uint32_t visit_count;
150
+
151
+ __host__ __device__ DevicePrefixTrieNode()
152
+ : reconstructed_byte(0), status(DevicePrefixStatus::UNKNOWN)
153
+ , padding{0, 0}, best_score(0.0f), children_offset(0), visit_count(0) {}
154
+ };
155
+
156
+ // Signature match result
157
+ struct DeviceSignatureMatch {
158
+ bool matched;
159
+ uint16_t format_id;
160
+ float confidence;
161
+ uint32_t match_offset;
162
+ bool header_matched;
163
+ bool footer_matched;
164
+ uint8_t padding[2];
165
+
166
+ __host__ __device__ DeviceSignatureMatch()
167
+ : matched(false), format_id(0), confidence(0.0f)
168
+ , match_offset(0), header_matched(false), footer_matched(false)
169
+ , padding{0, 0} {}
170
+ };
171
+
172
+ // Candidate structure for GPU
173
+ struct DeviceCandidate {
174
+ uint8_t* data;
175
+ uint32_t data_length;
176
+ uint16_t format_id;
177
+ float confidence;
178
+ float composite_score;
179
+ DeviceHeuristicResult heuristics;
180
+ DeviceSignatureMatch signature_match;
181
+
182
+ __host__ __device__ DeviceCandidate()
183
+ : data(nullptr), data_length(0), format_id(0)
184
+ , confidence(0.0f), composite_score(0.0f) {}
185
+ };
186
+
187
+ // Early stopping configuration
188
+ struct DeviceEarlyStoppingConfig {
189
+ uint32_t level1_bytes;
190
+ uint32_t level2_bytes;
191
+ uint32_t level3_bytes;
192
+ float entropy_min;
193
+ float entropy_max;
194
+ float prune_threshold;
195
+ bool adaptive_thresholds;
196
+ uint8_t padding[3];
197
+
198
+ __host__ __device__ DeviceEarlyStoppingConfig()
199
+ : level1_bytes(4), level2_bytes(8), level3_bytes(16)
200
+ , entropy_min(0.1f), entropy_max(7.9f), prune_threshold(0.3f)
201
+ , adaptive_thresholds(true), padding{0, 0, 0} {}
202
+ };
203
+
204
+ // Bit pruning mode
205
+ enum class DeviceBitPruningMode : uint8_t {
206
+ EXHAUSTIVE = 0,
207
+ MSB_ONLY = 1,
208
+ SINGLE_BIT = 2,
209
+ CUSTOM = 3
210
+ };
211
+
212
+ // Bit pruning configuration
213
+ struct DeviceBitPruningConfig {
214
+ DeviceBitPruningMode mode;
215
+ uint8_t bit_mask; // Bitmask for allowed bit positions
216
+ uint8_t padding[2];
217
+
218
+ __host__ __device__ DeviceBitPruningConfig()
219
+ : mode(DeviceBitPruningMode::EXHAUSTIVE), bit_mask(0xFF), padding{0, 0} {}
220
+ };
221
+
222
+ // Kernel configuration
223
+ struct KernelConfig {
224
+ int threads_per_block;
225
+ int blocks_per_grid;
226
+ size_t shared_mem_size;
227
+ int sm_version;
228
+
229
+ KernelConfig()
230
+ : threads_per_block(256), blocks_per_grid(1)
231
+ , shared_mem_size(0), sm_version(0) {}
232
+ };
233
+
234
+ // Device information
235
+ struct DeviceInfo {
236
+ int device_id;
237
+ int sm_version;
238
+ size_t total_global_mem;
239
+ size_t shared_mem_per_block;
240
+ size_t shared_mem_per_multiprocessor;
241
+ int multiprocessor_count;
242
+ int max_threads_per_block;
243
+ int warp_size;
244
+ bool supports_cooperative_groups;
245
+
246
+ DeviceInfo()
247
+ : device_id(-1), sm_version(0), total_global_mem(0)
248
+ , shared_mem_per_block(0), shared_mem_per_multiprocessor(0)
249
+ , multiprocessor_count(0), max_threads_per_block(0)
250
+ , warp_size(32), supports_cooperative_groups(false) {}
251
+ };
252
+
253
+ // Get device information
254
+ DeviceInfo get_device_info(int device_id = 0);
255
+
256
+ // Check if CUDA is available
257
+ bool is_cuda_available();
258
+
259
+ // Get optimal kernel configuration for the current device
260
+ KernelConfig get_optimal_config(int device_id, size_t work_items, size_t shared_mem_required = 0);
261
+
262
+ } // namespace cuda
263
+ } // namespace etb
264
+
265
+ #endif // ETB_CUDA_COMMON_CUH
@@ -0,0 +1,200 @@
1
+ #ifndef ETB_CUDA_CUH
2
+ #define ETB_CUDA_CUH
3
+
4
+ /**
5
+ * ExplodeThoseBits CUDA Library
6
+ *
7
+ * Main include header for all CUDA components.
8
+ *
9
+ * This library provides GPU-accelerated implementations of:
10
+ * - Path generation with work-stealing
11
+ * - Heuristics calculation with shared memory histograms
12
+ * - Signature matching with constant memory broadcast
13
+ * - Prefix pruning with atomic trie updates
14
+ *
15
+ * Optimized for:
16
+ * - NVIDIA Hopper (SM 90) architecture
17
+ * - NVIDIA Blackwell (SM 100) architecture
18
+ */
19
+
20
+ // Common definitions and utilities
21
+ #include "cuda_common.cuh"
22
+
23
+ // GPU memory management
24
+ #include "gpu_memory.cuh"
25
+
26
+ // CUDA kernels
27
+ #include "path_generator_kernel.cuh"
28
+ #include "heuristics_kernel.cuh"
29
+ #include "signature_kernel.cuh"
30
+ #include "prefix_pruner_kernel.cuh"
31
+
32
+ // Architecture-specific optimizations
33
+ #include "arch_optimizations.cuh"
34
+ #include "blackwell_optimizations.cuh"
35
+
36
+ namespace etb {
37
+ namespace cuda {
38
+
39
+ /**
40
+ * ETB CUDA Engine - Main interface for GPU-accelerated extraction.
41
+ *
42
+ * Provides a unified interface for all CUDA operations, handling
43
+ * memory management, kernel launches, and result retrieval.
44
+ */
45
+ class ETBCudaEngine {
46
+ public:
47
+ /**
48
+ * Configuration for the CUDA engine.
49
+ */
50
+ struct Config {
51
+ // Memory configuration
52
+ size_t max_input_size;
53
+ size_t prefix_trie_capacity;
54
+ size_t candidate_queue_capacity;
55
+
56
+ // Kernel configuration
57
+ int num_streams;
58
+ bool use_async_operations;
59
+
60
+ // Early stopping configuration
61
+ DeviceEarlyStoppingConfig early_stopping;
62
+
63
+ // Heuristic weights
64
+ DeviceHeuristicWeights heuristic_weights;
65
+
66
+ // Scoring weights
67
+ DeviceScoringWeights scoring_weights;
68
+
69
+ // Bit pruning configuration
70
+ DeviceBitPruningConfig bit_pruning;
71
+
72
+ Config()
73
+ : max_input_size(1024 * 1024)
74
+ , prefix_trie_capacity(65536)
75
+ , candidate_queue_capacity(1024)
76
+ , num_streams(4)
77
+ , use_async_operations(true) {}
78
+ };
79
+
80
+ /**
81
+ * Extraction result from GPU processing.
82
+ */
83
+ struct ExtractionResult {
84
+ std::vector<DeviceCandidate> candidates;
85
+ uint64_t paths_evaluated;
86
+ uint64_t paths_pruned;
87
+ float effective_branching_factor;
88
+ float wall_clock_ms;
89
+ float gpu_utilization;
90
+ bool success;
91
+ std::string error_message;
92
+
93
+ ExtractionResult()
94
+ : paths_evaluated(0), paths_pruned(0)
95
+ , effective_branching_factor(0.0f), wall_clock_ms(0.0f)
96
+ , gpu_utilization(0.0f), success(false) {}
97
+ };
98
+
99
+ ETBCudaEngine();
100
+ ~ETBCudaEngine();
101
+
102
+ // Non-copyable
103
+ ETBCudaEngine(const ETBCudaEngine&) = delete;
104
+ ETBCudaEngine& operator=(const ETBCudaEngine&) = delete;
105
+
106
+ /**
107
+ * Initialize the CUDA engine.
108
+ * @param config Engine configuration
109
+ * @param device_id CUDA device to use (default: 0)
110
+ * @return true if initialization succeeded
111
+ */
112
+ bool initialize(const Config& config, int device_id = 0);
113
+
114
+ /**
115
+ * Check if the engine is initialized.
116
+ */
117
+ bool is_initialized() const { return initialized_; }
118
+
119
+ /**
120
+ * Release all GPU resources.
121
+ */
122
+ void release();
123
+
124
+ /**
125
+ * Load signatures into constant memory.
126
+ * @param signatures Vector of file signatures
127
+ * @return true if upload succeeded
128
+ */
129
+ bool load_signatures(const std::vector<DeviceFileSignature>& signatures);
130
+
131
+ /**
132
+ * Extract data from input bytes.
133
+ * @param input Input byte data
134
+ * @param length Length of input
135
+ * @return Extraction result
136
+ */
137
+ ExtractionResult extract(const uint8_t* input, size_t length);
138
+
139
+ /**
140
+ * Get the current configuration.
141
+ */
142
+ const Config& get_config() const { return config_; }
143
+
144
+ /**
145
+ * Get architecture information for the current device.
146
+ */
147
+ const ArchitectureInfo& get_arch_info() const { return arch_info_; }
148
+
149
+ /**
150
+ * Get memory statistics.
151
+ */
152
+ GPUMemoryManager::MemoryStats get_memory_stats() const;
153
+
154
+ private:
155
+ Config config_;
156
+ bool initialized_;
157
+ int device_id_;
158
+ ArchitectureInfo arch_info_;
159
+
160
+ // GPU memory manager
161
+ std::unique_ptr<GPUMemoryManager> memory_manager_;
162
+
163
+ // Kernel launchers
164
+ std::unique_ptr<PathGeneratorKernel> path_generator_;
165
+ std::unique_ptr<HeuristicsKernel> heuristics_;
166
+ std::unique_ptr<SignatureMatcherKernel> signature_matcher_;
167
+ std::unique_ptr<PrefixPrunerKernel> prefix_pruner_;
168
+
169
+ // Adaptive launcher for architecture-specific optimizations
170
+ std::unique_ptr<AdaptiveKernelLauncher> adaptive_launcher_;
171
+
172
+ // Internal methods
173
+ void configure_kernels();
174
+ void run_extraction_pipeline(size_t input_length, cudaStream_t stream);
175
+ };
176
+
177
+ /**
178
+ * Check if CUDA is available and get device count.
179
+ * @return Number of CUDA devices, or 0 if CUDA is not available
180
+ */
181
+ int get_cuda_device_count();
182
+
183
+ /**
184
+ * Get information about a CUDA device.
185
+ * @param device_id Device ID
186
+ * @return Device information
187
+ */
188
+ DeviceInfo get_cuda_device_info(int device_id = 0);
189
+
190
+ /**
191
+ * Select the best CUDA device for ETB workloads.
192
+ * Prefers Blackwell > Hopper > other architectures.
193
+ * @return Best device ID, or -1 if no suitable device found
194
+ */
195
+ int select_best_device();
196
+
197
+ } // namespace cuda
198
+ } // namespace etb
199
+
200
+ #endif // ETB_CUDA_CUH