PyPI - explodethosebits - Versions diffs - 0.3.0__cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl - Mend

explodethosebits 0.3.0__cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

etb/__init__.py +351 -0
etb/__init__.pyi +976 -0
etb/_etb.cpython-39-x86_64-linux-gnu.so +0 -0
etb/_version.py +34 -0
etb/py.typed +2 -0
explodethosebits-0.3.0.dist-info/METADATA +405 -0
explodethosebits-0.3.0.dist-info/RECORD +88 -0
explodethosebits-0.3.0.dist-info/WHEEL +6 -0
explodethosebits-0.3.0.dist-info/licenses/LICENSE +21 -0
explodethosebits-0.3.0.dist-info/sboms/auditwheel.cdx.json +1 -0
explodethosebits.libs/libcudart-c3a75b33.so.12.8.90 +0 -0
include/etb/bit_coordinate.hpp +45 -0
include/etb/bit_extraction.hpp +79 -0
include/etb/bit_pruning.hpp +122 -0
include/etb/config.hpp +284 -0
include/etb/cuda/arch_optimizations.cuh +358 -0
include/etb/cuda/blackwell_optimizations.cuh +300 -0
include/etb/cuda/cuda_common.cuh +265 -0
include/etb/cuda/etb_cuda.cuh +200 -0
include/etb/cuda/gpu_memory.cuh +406 -0
include/etb/cuda/heuristics_kernel.cuh +315 -0
include/etb/cuda/path_generator_kernel.cuh +272 -0
include/etb/cuda/prefix_pruner_kernel.cuh +370 -0
include/etb/cuda/signature_kernel.cuh +328 -0
include/etb/early_stopping.hpp +246 -0
include/etb/etb.hpp +20 -0
include/etb/heuristics.hpp +165 -0
include/etb/memoization.hpp +285 -0
include/etb/path.hpp +86 -0
include/etb/path_count.hpp +87 -0
include/etb/path_generator.hpp +175 -0
include/etb/prefix_trie.hpp +339 -0
include/etb/reporting.hpp +437 -0
include/etb/scoring.hpp +269 -0
include/etb/signature.hpp +190 -0
include/gmock/gmock-actions.h +2297 -0
include/gmock/gmock-cardinalities.h +159 -0
include/gmock/gmock-function-mocker.h +518 -0
include/gmock/gmock-matchers.h +5623 -0
include/gmock/gmock-more-actions.h +658 -0
include/gmock/gmock-more-matchers.h +120 -0
include/gmock/gmock-nice-strict.h +277 -0
include/gmock/gmock-spec-builders.h +2148 -0
include/gmock/gmock.h +96 -0
include/gmock/internal/custom/README.md +18 -0
include/gmock/internal/custom/gmock-generated-actions.h +7 -0
include/gmock/internal/custom/gmock-matchers.h +37 -0
include/gmock/internal/custom/gmock-port.h +40 -0
include/gmock/internal/gmock-internal-utils.h +487 -0
include/gmock/internal/gmock-port.h +139 -0
include/gmock/internal/gmock-pp.h +279 -0
include/gtest/gtest-assertion-result.h +237 -0
include/gtest/gtest-death-test.h +345 -0
include/gtest/gtest-matchers.h +923 -0
include/gtest/gtest-message.h +252 -0
include/gtest/gtest-param-test.h +546 -0
include/gtest/gtest-printers.h +1161 -0
include/gtest/gtest-spi.h +250 -0
include/gtest/gtest-test-part.h +192 -0
include/gtest/gtest-typed-test.h +331 -0
include/gtest/gtest.h +2321 -0
include/gtest/gtest_pred_impl.h +279 -0
include/gtest/gtest_prod.h +60 -0
include/gtest/internal/custom/README.md +44 -0
include/gtest/internal/custom/gtest-port.h +37 -0
include/gtest/internal/custom/gtest-printers.h +42 -0
include/gtest/internal/custom/gtest.h +37 -0
include/gtest/internal/gtest-death-test-internal.h +307 -0
include/gtest/internal/gtest-filepath.h +227 -0
include/gtest/internal/gtest-internal.h +1560 -0
include/gtest/internal/gtest-param-util.h +1026 -0
include/gtest/internal/gtest-port-arch.h +122 -0
include/gtest/internal/gtest-port.h +2481 -0
include/gtest/internal/gtest-string.h +178 -0
include/gtest/internal/gtest-type-util.h +220 -0
lib/libetb_core.a +0 -0
lib64/cmake/GTest/GTestConfig.cmake +33 -0
lib64/cmake/GTest/GTestConfigVersion.cmake +43 -0
lib64/cmake/GTest/GTestTargets-release.cmake +49 -0
lib64/cmake/GTest/GTestTargets.cmake +139 -0
lib64/libgmock.a +0 -0
lib64/libgmock_main.a +0 -0
lib64/libgtest.a +0 -0
lib64/libgtest_main.a +0 -0
lib64/pkgconfig/gmock.pc +10 -0
lib64/pkgconfig/gmock_main.pc +10 -0
lib64/pkgconfig/gtest.pc +9 -0
lib64/pkgconfig/gtest_main.pc +10 -0

include/etb/cuda/cuda_common.cuh ADDED Viewed

@@ -0,0 +1,265 @@
+#ifndef ETB_CUDA_COMMON_CUH
+#define ETB_CUDA_COMMON_CUH
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+#include <cstdint>
+#include <stdexcept>
+#include <string>
+namespace etb {
+namespace cuda {
+// Error checking macro
+#define ETB_CUDA_CHECK(call) \
+    do { \
+        cudaError_t err = call; \
+        if (err != cudaSuccess) { \
+            throw std::runtime_error(std::string("CUDA error: ") + \
+                cudaGetErrorString(err) + " at " + __FILE__ + ":" + std::to_string(__LINE__)); \
+        } \
+    } while(0)
+// Architecture-specific constants
+namespace arch {
+    // SM 90 (Hopper) configuration
+    constexpr int HOPPER_SM = 90;
+    constexpr int HOPPER_THREADS_PER_BLOCK = 256;
+    constexpr int HOPPER_SHARED_MEM_SIZE = 48 * 1024;  // 48KB default
+    constexpr int HOPPER_MAX_SHARED_MEM = 228 * 1024;  // 228KB max with opt-in
+    // SM 100 (Blackwell) configuration
+    constexpr int BLACKWELL_SM = 100;
+    constexpr int BLACKWELL_THREADS_PER_BLOCK = 512;
+    constexpr int BLACKWELL_SHARED_MEM_SIZE = 64 * 1024;  // 64KB default
+    constexpr int BLACKWELL_MAX_SHARED_MEM = 256 * 1024;  // 256KB max with opt-in
+    // Common constants
+    constexpr int WARP_SIZE = 32;
+    constexpr int MAX_GRID_DIM = 65535;
+}
+// Maximum sizes for constant memory structures
+constexpr size_t MAX_SIGNATURES = 256;
+constexpr size_t MAX_SIGNATURE_LENGTH = 32;
+constexpr size_t MAX_FORMAT_NAME_LENGTH = 32;
+// GPU-compatible bit coordinate (matches CPU version)
+struct alignas(8) DeviceBitCoordinate {
+    uint32_t byte_index;
+    uint8_t bit_position;
+    uint8_t padding[3];  // Alignment padding
+    __host__ __device__ DeviceBitCoordinate()
+        : byte_index(0), bit_position(0), padding{0, 0, 0} {}
+    __host__ __device__ DeviceBitCoordinate(uint32_t byte_idx, uint8_t bit_pos)
+        : byte_index(byte_idx), bit_position(bit_pos), padding{0, 0, 0} {}
+    __host__ __device__ bool is_valid(uint32_t input_length) const {
+        return byte_index < input_length && bit_position <= 7;
+    }
+};
+// GPU-compatible path structure
+struct DevicePath {
+    DeviceBitCoordinate* coordinates;
+    uint32_t length;
+    uint32_t capacity;
+    __host__ __device__ DevicePath()
+        : coordinates(nullptr), length(0), capacity(0) {}
+};
+// GPU-compatible file signature for constant memory
+// Note: No constructor to allow __constant__ memory usage
+struct alignas(64) DeviceFileSignature {
+    uint8_t magic_bytes[MAX_SIGNATURE_LENGTH];
+    uint8_t mask[MAX_SIGNATURE_LENGTH];
+    uint8_t length;
+    uint16_t offset;
+    uint16_t format_id;
+    float base_confidence;
+    uint8_t padding[1];  // Alignment
+};
+// GPU-compatible footer signature
+// Note: No constructor to allow __constant__ memory usage
+struct DeviceFooterSignature {
+    uint8_t magic_bytes[MAX_SIGNATURE_LENGTH];
+    uint8_t length;
+    bool required;
+    uint8_t padding[2];
+};
+// GPU-compatible heuristic result
+struct DeviceHeuristicResult {
+    float entropy;
+    float printable_ratio;
+    float control_char_ratio;
+    uint32_t max_null_run;
+    float utf8_validity;
+    float composite_score;
+    __host__ __device__ DeviceHeuristicResult()
+        : entropy(0.0f), printable_ratio(0.0f), control_char_ratio(0.0f)
+        , max_null_run(0), utf8_validity(0.0f), composite_score(0.0f) {}
+};
+// GPU-compatible heuristic weights
+struct DeviceHeuristicWeights {
+    float entropy_weight;
+    float printable_weight;
+    float control_char_weight;
+    float null_run_weight;
+    float utf8_weight;
+    __host__ __device__ DeviceHeuristicWeights()
+        : entropy_weight(0.25f), printable_weight(0.25f)
+        , control_char_weight(0.15f), null_run_weight(0.15f)
+        , utf8_weight(0.20f) {}
+};
+// GPU-compatible scoring weights
+struct DeviceScoringWeights {
+    float signature_weight;
+    float heuristic_weight;
+    float length_weight;
+    float structure_weight;
+    __host__ __device__ DeviceScoringWeights()
+        : signature_weight(0.40f), heuristic_weight(0.30f)
+        , length_weight(0.15f), structure_weight(0.15f) {}
+};
+// Prefix trie node status
+enum class DevicePrefixStatus : uint8_t {
+    UNKNOWN = 0,
+    VALID = 1,
+    PRUNED = 2
+};
+// GPU-compatible prefix trie node
+struct alignas(16) DevicePrefixTrieNode {
+    uint8_t reconstructed_byte;
+    DevicePrefixStatus status;
+    uint8_t padding[2];
+    float best_score;
+    uint32_t children_offset;
+    uint32_t visit_count;
+    __host__ __device__ DevicePrefixTrieNode()
+        : reconstructed_byte(0), status(DevicePrefixStatus::UNKNOWN)
+        , padding{0, 0}, best_score(0.0f), children_offset(0), visit_count(0) {}
+};
+// Signature match result
+struct DeviceSignatureMatch {
+    bool matched;
+    uint16_t format_id;
+    float confidence;
+    uint32_t match_offset;
+    bool header_matched;
+    bool footer_matched;
+    uint8_t padding[2];
+    __host__ __device__ DeviceSignatureMatch()
+        : matched(false), format_id(0), confidence(0.0f)
+        , match_offset(0), header_matched(false), footer_matched(false)
+        , padding{0, 0} {}
+};
+// Candidate structure for GPU
+struct DeviceCandidate {
+    uint8_t* data;
+    uint32_t data_length;
+    uint16_t format_id;
+    float confidence;
+    float composite_score;
+    DeviceHeuristicResult heuristics;
+    DeviceSignatureMatch signature_match;
+    __host__ __device__ DeviceCandidate()
+        : data(nullptr), data_length(0), format_id(0)
+        , confidence(0.0f), composite_score(0.0f) {}
+};
+// Early stopping configuration
+struct DeviceEarlyStoppingConfig {
+    uint32_t level1_bytes;
+    uint32_t level2_bytes;
+    uint32_t level3_bytes;
+    float entropy_min;
+    float entropy_max;
+    float prune_threshold;
+    bool adaptive_thresholds;
+    uint8_t padding[3];
+    __host__ __device__ DeviceEarlyStoppingConfig()
+        : level1_bytes(4), level2_bytes(8), level3_bytes(16)
+        , entropy_min(0.1f), entropy_max(7.9f), prune_threshold(0.3f)
+        , adaptive_thresholds(true), padding{0, 0, 0} {}
+};
+// Bit pruning mode
+enum class DeviceBitPruningMode : uint8_t {
+    EXHAUSTIVE = 0,
+    MSB_ONLY = 1,
+    SINGLE_BIT = 2,
+    CUSTOM = 3
+};
+// Bit pruning configuration
+struct DeviceBitPruningConfig {
+    DeviceBitPruningMode mode;
+    uint8_t bit_mask;  // Bitmask for allowed bit positions
+    uint8_t padding[2];
+    __host__ __device__ DeviceBitPruningConfig()
+        : mode(DeviceBitPruningMode::EXHAUSTIVE), bit_mask(0xFF), padding{0, 0} {}
+};
+// Kernel configuration
+struct KernelConfig {
+    int threads_per_block;
+    int blocks_per_grid;
+    size_t shared_mem_size;
+    int sm_version;
+    KernelConfig()
+        : threads_per_block(256), blocks_per_grid(1)
+        , shared_mem_size(0), sm_version(0) {}
+};
+// Device information
+struct DeviceInfo {
+    int device_id;
+    int sm_version;
+    size_t total_global_mem;
+    size_t shared_mem_per_block;
+    size_t shared_mem_per_multiprocessor;
+    int multiprocessor_count;
+    int max_threads_per_block;
+    int warp_size;
+    bool supports_cooperative_groups;
+    DeviceInfo()
+        : device_id(-1), sm_version(0), total_global_mem(0)
+        , shared_mem_per_block(0), shared_mem_per_multiprocessor(0)
+        , multiprocessor_count(0), max_threads_per_block(0)
+        , warp_size(32), supports_cooperative_groups(false) {}
+};
+// Get device information
+DeviceInfo get_device_info(int device_id = 0);
+// Check if CUDA is available
+bool is_cuda_available();
+// Get optimal kernel configuration for the current device
+KernelConfig get_optimal_config(int device_id, size_t work_items, size_t shared_mem_required = 0);
+} // namespace cuda
+} // namespace etb
+#endif // ETB_CUDA_COMMON_CUH

include/etb/cuda/etb_cuda.cuh ADDED Viewed

@@ -0,0 +1,200 @@
+#ifndef ETB_CUDA_CUH
+#define ETB_CUDA_CUH
+/**
+ * ExplodeThoseBits CUDA Library
+ *
+ * Main include header for all CUDA components.
+ *
+ * This library provides GPU-accelerated implementations of:
+ * - Path generation with work-stealing
+ * - Heuristics calculation with shared memory histograms
+ * - Signature matching with constant memory broadcast
+ * - Prefix pruning with atomic trie updates
+ *
+ * Optimized for:
+ * - NVIDIA Hopper (SM 90) architecture
+ * - NVIDIA Blackwell (SM 100) architecture
+ */
+// Common definitions and utilities
+#include "cuda_common.cuh"
+// GPU memory management
+#include "gpu_memory.cuh"
+// CUDA kernels
+#include "path_generator_kernel.cuh"
+#include "heuristics_kernel.cuh"
+#include "signature_kernel.cuh"
+#include "prefix_pruner_kernel.cuh"
+// Architecture-specific optimizations
+#include "arch_optimizations.cuh"
+#include "blackwell_optimizations.cuh"
+namespace etb {
+namespace cuda {
+/**
+ * ETB CUDA Engine - Main interface for GPU-accelerated extraction.
+ *
+ * Provides a unified interface for all CUDA operations, handling
+ * memory management, kernel launches, and result retrieval.
+ */
+class ETBCudaEngine {
+public:
+    /**
+     * Configuration for the CUDA engine.
+     */
+    struct Config {
+        // Memory configuration
+        size_t max_input_size;
+        size_t prefix_trie_capacity;
+        size_t candidate_queue_capacity;
+        // Kernel configuration
+        int num_streams;
+        bool use_async_operations;
+        // Early stopping configuration
+        DeviceEarlyStoppingConfig early_stopping;
+        // Heuristic weights
+        DeviceHeuristicWeights heuristic_weights;
+        // Scoring weights
+        DeviceScoringWeights scoring_weights;
+        // Bit pruning configuration
+        DeviceBitPruningConfig bit_pruning;
+        Config()
+            : max_input_size(1024 * 1024)
+            , prefix_trie_capacity(65536)
+            , candidate_queue_capacity(1024)
+            , num_streams(4)
+            , use_async_operations(true) {}
+    };
+    /**
+     * Extraction result from GPU processing.
+     */
+    struct ExtractionResult {
+        std::vector<DeviceCandidate> candidates;
+        uint64_t paths_evaluated;
+        uint64_t paths_pruned;
+        float effective_branching_factor;
+        float wall_clock_ms;
+        float gpu_utilization;
+        bool success;
+        std::string error_message;
+        ExtractionResult()
+            : paths_evaluated(0), paths_pruned(0)
+            , effective_branching_factor(0.0f), wall_clock_ms(0.0f)
+            , gpu_utilization(0.0f), success(false) {}
+    };
+    ETBCudaEngine();
+    ~ETBCudaEngine();
+    // Non-copyable
+    ETBCudaEngine(const ETBCudaEngine&) = delete;
+    ETBCudaEngine& operator=(const ETBCudaEngine&) = delete;
+    /**
+     * Initialize the CUDA engine.
+     * @param config Engine configuration
+     * @param device_id CUDA device to use (default: 0)
+     * @return true if initialization succeeded
+     */
+    bool initialize(const Config& config, int device_id = 0);
+    /**
+     * Check if the engine is initialized.
+     */
+    bool is_initialized() const { return initialized_; }
+    /**
+     * Release all GPU resources.
+     */
+    void release();
+    /**
+     * Load signatures into constant memory.
+     * @param signatures Vector of file signatures
+     * @return true if upload succeeded
+     */
+    bool load_signatures(const std::vector<DeviceFileSignature>& signatures);
+    /**
+     * Extract data from input bytes.
+     * @param input Input byte data
+     * @param length Length of input
+     * @return Extraction result
+     */
+    ExtractionResult extract(const uint8_t* input, size_t length);
+    /**
+     * Get the current configuration.
+     */
+    const Config& get_config() const { return config_; }
+    /**
+     * Get architecture information for the current device.
+     */
+    const ArchitectureInfo& get_arch_info() const { return arch_info_; }
+    /**
+     * Get memory statistics.
+     */
+    GPUMemoryManager::MemoryStats get_memory_stats() const;
+private:
+    Config config_;
+    bool initialized_;
+    int device_id_;
+    ArchitectureInfo arch_info_;
+    // GPU memory manager
+    std::unique_ptr<GPUMemoryManager> memory_manager_;
+    // Kernel launchers
+    std::unique_ptr<PathGeneratorKernel> path_generator_;
+    std::unique_ptr<HeuristicsKernel> heuristics_;
+    std::unique_ptr<SignatureMatcherKernel> signature_matcher_;
+    std::unique_ptr<PrefixPrunerKernel> prefix_pruner_;
+    // Adaptive launcher for architecture-specific optimizations
+    std::unique_ptr<AdaptiveKernelLauncher> adaptive_launcher_;
+    // Internal methods
+    void configure_kernels();
+    void run_extraction_pipeline(size_t input_length, cudaStream_t stream);
+};
+/**
+ * Check if CUDA is available and get device count.
+ * @return Number of CUDA devices, or 0 if CUDA is not available
+ */
+int get_cuda_device_count();
+/**
+ * Get information about a CUDA device.
+ * @param device_id Device ID
+ * @return Device information
+ */
+DeviceInfo get_cuda_device_info(int device_id = 0);
+/**
+ * Select the best CUDA device for ETB workloads.
+ * Prefers Blackwell > Hopper > other architectures.
+ * @return Best device ID, or -1 if no suitable device found
+ */
+int select_best_device();
+} // namespace cuda
+} // namespace etb
+#endif // ETB_CUDA_CUH