explodethosebits 0.3.0__cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. etb/__init__.py +351 -0
  2. etb/__init__.pyi +976 -0
  3. etb/_etb.cpython-39-x86_64-linux-gnu.so +0 -0
  4. etb/_version.py +34 -0
  5. etb/py.typed +2 -0
  6. explodethosebits-0.3.0.dist-info/METADATA +405 -0
  7. explodethosebits-0.3.0.dist-info/RECORD +88 -0
  8. explodethosebits-0.3.0.dist-info/WHEEL +6 -0
  9. explodethosebits-0.3.0.dist-info/licenses/LICENSE +21 -0
  10. explodethosebits-0.3.0.dist-info/sboms/auditwheel.cdx.json +1 -0
  11. explodethosebits.libs/libcudart-c3a75b33.so.12.8.90 +0 -0
  12. include/etb/bit_coordinate.hpp +45 -0
  13. include/etb/bit_extraction.hpp +79 -0
  14. include/etb/bit_pruning.hpp +122 -0
  15. include/etb/config.hpp +284 -0
  16. include/etb/cuda/arch_optimizations.cuh +358 -0
  17. include/etb/cuda/blackwell_optimizations.cuh +300 -0
  18. include/etb/cuda/cuda_common.cuh +265 -0
  19. include/etb/cuda/etb_cuda.cuh +200 -0
  20. include/etb/cuda/gpu_memory.cuh +406 -0
  21. include/etb/cuda/heuristics_kernel.cuh +315 -0
  22. include/etb/cuda/path_generator_kernel.cuh +272 -0
  23. include/etb/cuda/prefix_pruner_kernel.cuh +370 -0
  24. include/etb/cuda/signature_kernel.cuh +328 -0
  25. include/etb/early_stopping.hpp +246 -0
  26. include/etb/etb.hpp +20 -0
  27. include/etb/heuristics.hpp +165 -0
  28. include/etb/memoization.hpp +285 -0
  29. include/etb/path.hpp +86 -0
  30. include/etb/path_count.hpp +87 -0
  31. include/etb/path_generator.hpp +175 -0
  32. include/etb/prefix_trie.hpp +339 -0
  33. include/etb/reporting.hpp +437 -0
  34. include/etb/scoring.hpp +269 -0
  35. include/etb/signature.hpp +190 -0
  36. include/gmock/gmock-actions.h +2297 -0
  37. include/gmock/gmock-cardinalities.h +159 -0
  38. include/gmock/gmock-function-mocker.h +518 -0
  39. include/gmock/gmock-matchers.h +5623 -0
  40. include/gmock/gmock-more-actions.h +658 -0
  41. include/gmock/gmock-more-matchers.h +120 -0
  42. include/gmock/gmock-nice-strict.h +277 -0
  43. include/gmock/gmock-spec-builders.h +2148 -0
  44. include/gmock/gmock.h +96 -0
  45. include/gmock/internal/custom/README.md +18 -0
  46. include/gmock/internal/custom/gmock-generated-actions.h +7 -0
  47. include/gmock/internal/custom/gmock-matchers.h +37 -0
  48. include/gmock/internal/custom/gmock-port.h +40 -0
  49. include/gmock/internal/gmock-internal-utils.h +487 -0
  50. include/gmock/internal/gmock-port.h +139 -0
  51. include/gmock/internal/gmock-pp.h +279 -0
  52. include/gtest/gtest-assertion-result.h +237 -0
  53. include/gtest/gtest-death-test.h +345 -0
  54. include/gtest/gtest-matchers.h +923 -0
  55. include/gtest/gtest-message.h +252 -0
  56. include/gtest/gtest-param-test.h +546 -0
  57. include/gtest/gtest-printers.h +1161 -0
  58. include/gtest/gtest-spi.h +250 -0
  59. include/gtest/gtest-test-part.h +192 -0
  60. include/gtest/gtest-typed-test.h +331 -0
  61. include/gtest/gtest.h +2321 -0
  62. include/gtest/gtest_pred_impl.h +279 -0
  63. include/gtest/gtest_prod.h +60 -0
  64. include/gtest/internal/custom/README.md +44 -0
  65. include/gtest/internal/custom/gtest-port.h +37 -0
  66. include/gtest/internal/custom/gtest-printers.h +42 -0
  67. include/gtest/internal/custom/gtest.h +37 -0
  68. include/gtest/internal/gtest-death-test-internal.h +307 -0
  69. include/gtest/internal/gtest-filepath.h +227 -0
  70. include/gtest/internal/gtest-internal.h +1560 -0
  71. include/gtest/internal/gtest-param-util.h +1026 -0
  72. include/gtest/internal/gtest-port-arch.h +122 -0
  73. include/gtest/internal/gtest-port.h +2481 -0
  74. include/gtest/internal/gtest-string.h +178 -0
  75. include/gtest/internal/gtest-type-util.h +220 -0
  76. lib/libetb_core.a +0 -0
  77. lib64/cmake/GTest/GTestConfig.cmake +33 -0
  78. lib64/cmake/GTest/GTestConfigVersion.cmake +43 -0
  79. lib64/cmake/GTest/GTestTargets-release.cmake +49 -0
  80. lib64/cmake/GTest/GTestTargets.cmake +139 -0
  81. lib64/libgmock.a +0 -0
  82. lib64/libgmock_main.a +0 -0
  83. lib64/libgtest.a +0 -0
  84. lib64/libgtest_main.a +0 -0
  85. lib64/pkgconfig/gmock.pc +10 -0
  86. lib64/pkgconfig/gmock_main.pc +10 -0
  87. lib64/pkgconfig/gtest.pc +9 -0
  88. lib64/pkgconfig/gtest_main.pc +10 -0
etb/__init__.pyi ADDED
@@ -0,0 +1,976 @@
1
+ """
2
+ Type stubs for the etb (ExplodeThoseBits) library.
3
+
4
+ ExplodeThoseBits is a CUDA-accelerated bit extraction library for forensic analysis.
5
+ """
6
+
7
+ from typing import List, Optional, Tuple, Iterator, Union
8
+ from enum import Enum
9
+
10
+ __version__: str
11
+ __author__: str
12
+
13
+ # ============================================================================
14
+ # Core Data Structures
15
+ # ============================================================================
16
+
17
+ class BitCoordinate:
18
+ """Represents a coordinate in the bit extraction space (byte_index, bit_position)."""
19
+
20
+ byte_index: int
21
+ """Index into the input byte array."""
22
+
23
+ bit_position: int
24
+ """Position within the byte [0-7], 0 = LSB."""
25
+
26
+ def __init__(self, byte_index: int = 0, bit_position: int = 0) -> None: ...
27
+ def is_valid(self, input_length: int) -> bool:
28
+ """Check if coordinate is valid for given input length."""
29
+ ...
30
+ def __eq__(self, other: object) -> bool: ...
31
+ def __ne__(self, other: object) -> bool: ...
32
+ def __lt__(self, other: "BitCoordinate") -> bool: ...
33
+ def __repr__(self) -> str: ...
34
+
35
+
36
+ class Path:
37
+ """A forward-only traversal sequence of bit coordinates."""
38
+
39
+ def __init__(self, capacity: int = 0) -> None: ...
40
+ def add(self, coord: BitCoordinate) -> None:
41
+ """Add a coordinate to the path."""
42
+ ...
43
+ def is_valid(self) -> bool:
44
+ """Check if path maintains forward-only constraint."""
45
+ ...
46
+ def length(self) -> int:
47
+ """Get the number of coordinates in the path."""
48
+ ...
49
+ def empty(self) -> bool:
50
+ """Check if path is empty."""
51
+ ...
52
+ def clear(self) -> None:
53
+ """Clear all coordinates from the path."""
54
+ ...
55
+ def reserve(self, capacity: int) -> None:
56
+ """Reserve capacity for coordinates."""
57
+ ...
58
+ def at(self, index: int) -> BitCoordinate:
59
+ """Get coordinate at index."""
60
+ ...
61
+ def back(self) -> BitCoordinate:
62
+ """Get the last coordinate."""
63
+ ...
64
+ def __getitem__(self, index: int) -> BitCoordinate: ...
65
+ def __len__(self) -> int: ...
66
+ def __iter__(self) -> Iterator[BitCoordinate]: ...
67
+ def __repr__(self) -> str: ...
68
+
69
+
70
+ # ============================================================================
71
+ # Bit Extraction Functions
72
+ # ============================================================================
73
+
74
+ def extract_bit(data: bytes, coord: BitCoordinate) -> int:
75
+ """Extract a single bit from byte data at the given coordinate."""
76
+ ...
77
+
78
+ def extract_bits_from_path(data: bytes, path: Path) -> List[int]:
79
+ """Extract bits at specified path coordinates from byte data."""
80
+ ...
81
+
82
+ def bits_to_bytes(bits: List[int]) -> bytes:
83
+ """Convert a sequence of bits to a byte array."""
84
+ ...
85
+
86
+ def path_to_bytes(source_data: bytes, path: Path) -> bytes:
87
+ """Convert a path with associated bit values to a byte array."""
88
+ ...
89
+
90
+ def bytes_to_bits(data: bytes) -> List[int]:
91
+ """Convert a byte array to a sequence of bits."""
92
+ ...
93
+
94
+
95
+ # ============================================================================
96
+ # Path Generation
97
+ # ============================================================================
98
+
99
+ class PathGeneratorConfig:
100
+ """Configuration for the path generator."""
101
+
102
+ input_length: int
103
+ max_path_length: int
104
+ starting_byte_index: int
105
+ bit_mask: int
106
+
107
+ def __init__(self, input_length: int) -> None: ...
108
+
109
+
110
+ class PathGenerator:
111
+ """Lazy path generator using iterator pattern."""
112
+
113
+ def __init__(self, input_length_or_config: Union[int, PathGeneratorConfig]) -> None: ...
114
+ def has_next(self) -> bool:
115
+ """Check if there are more paths to generate."""
116
+ ...
117
+ def next(self) -> Optional[Path]:
118
+ """Generate the next path."""
119
+ ...
120
+ def reset(self) -> None:
121
+ """Reset the generator to start from the beginning."""
122
+ ...
123
+ def paths_generated(self) -> int:
124
+ """Get the number of paths generated so far."""
125
+ ...
126
+ def __iter__(self) -> "PathGenerator": ...
127
+ def __next__(self) -> Path: ...
128
+
129
+
130
+ class PathCountResult:
131
+ """Result of path count estimation."""
132
+
133
+ estimated_count: int
134
+ is_exact: bool
135
+ exceeds_threshold: bool
136
+ log_count: float
137
+
138
+
139
+ def estimate_path_count(
140
+ input_length: int,
141
+ bits_per_byte: int = 8,
142
+ threshold: int = 0
143
+ ) -> PathCountResult:
144
+ """Estimate the path count with overflow detection."""
145
+ ...
146
+
147
+ def path_count_exceeds_threshold(
148
+ input_length: int,
149
+ bits_per_byte: int,
150
+ threshold: int
151
+ ) -> bool:
152
+ """Check if path count exceeds a threshold."""
153
+ ...
154
+
155
+
156
+ # ============================================================================
157
+ # Signature Detection
158
+ # ============================================================================
159
+
160
+ class FileSignature:
161
+ """Represents a single file signature (magic bytes)."""
162
+
163
+ magic_bytes: List[int]
164
+ mask: List[int]
165
+ offset: int
166
+ base_confidence: float
167
+
168
+ def __init__(self) -> None: ...
169
+
170
+
171
+ class FooterSignature:
172
+ """Represents a footer/trailer signature for a format."""
173
+
174
+ magic_bytes: List[int]
175
+ required: bool
176
+
177
+ def __init__(self) -> None: ...
178
+
179
+
180
+ class FormatDefinition:
181
+ """Represents a complete format definition with all its signatures."""
182
+
183
+ format_name: str
184
+ category: str
185
+ signatures: List[FileSignature]
186
+ format_id: int
187
+
188
+ def __init__(self) -> None: ...
189
+
190
+
191
+ class SignatureMatch:
192
+ """Result of a signature match operation."""
193
+
194
+ matched: bool
195
+ format_name: str
196
+ category: str
197
+ format_id: int
198
+ confidence: float
199
+ match_offset: int
200
+ header_matched: bool
201
+ footer_matched: bool
202
+
203
+ def __init__(self) -> None: ...
204
+ def __repr__(self) -> str: ...
205
+
206
+
207
+ class SignatureDictionary:
208
+ """Signature dictionary that loads and manages file signatures."""
209
+
210
+ def __init__(self) -> None: ...
211
+ def load_from_json(self, filepath: str) -> bool:
212
+ """Load signatures from a JSON file."""
213
+ ...
214
+ def load_from_json_string(self, json_content: str) -> bool:
215
+ """Load signatures from a JSON string."""
216
+ ...
217
+ def add_format(self, format: FormatDefinition) -> None:
218
+ """Add a format definition programmatically."""
219
+ ...
220
+ def get_formats(self) -> List[FormatDefinition]:
221
+ """Get all loaded format definitions."""
222
+ ...
223
+ def format_count(self) -> int:
224
+ """Get the number of loaded formats."""
225
+ ...
226
+ def clear(self) -> None:
227
+ """Clear all loaded signatures."""
228
+ ...
229
+ def empty(self) -> bool:
230
+ """Check if dictionary is empty."""
231
+ ...
232
+
233
+
234
+ class SignatureMatcher:
235
+ """Signature matcher that performs header and footer detection."""
236
+
237
+ def __init__(self, dictionary: SignatureDictionary) -> None: ...
238
+ def match(self, data: bytes, max_offset: int = 512) -> SignatureMatch:
239
+ """Match signatures against a byte sequence."""
240
+ ...
241
+
242
+
243
+ # ============================================================================
244
+ # Heuristics Engine
245
+ # ============================================================================
246
+
247
+ class HeuristicResult:
248
+ """Result of heuristic analysis on a byte sequence."""
249
+
250
+ entropy: float
251
+ """Shannon entropy [0.0, 8.0]."""
252
+
253
+ printable_ratio: float
254
+ """Ratio of printable ASCII [0.0, 1.0]."""
255
+
256
+ control_char_ratio: float
257
+ """Ratio of control characters [0.0, 1.0]."""
258
+
259
+ max_null_run: int
260
+ """Longest consecutive null byte run."""
261
+
262
+ utf8_validity: float
263
+ """UTF-8 sequence validity score [0.0, 1.0]."""
264
+
265
+ composite_score: float
266
+ """Weighted combination [0.0, 1.0]."""
267
+
268
+ def __init__(self) -> None: ...
269
+ def __repr__(self) -> str: ...
270
+
271
+
272
+ class HeuristicWeights:
273
+ """Configurable weights for composite heuristic scoring."""
274
+
275
+ entropy_weight: float
276
+ printable_weight: float
277
+ control_char_weight: float
278
+ null_run_weight: float
279
+ utf8_weight: float
280
+
281
+ def __init__(self) -> None: ...
282
+
283
+
284
+ class HeuristicsEngine:
285
+ """Heuristics Engine for analyzing byte sequences."""
286
+
287
+ def __init__(self, weights: Optional[HeuristicWeights] = None) -> None: ...
288
+ def set_weights(self, weights: HeuristicWeights) -> None: ...
289
+ def get_weights(self) -> HeuristicWeights: ...
290
+ def analyze(self, data: bytes) -> HeuristicResult:
291
+ """Perform full heuristic analysis on byte data."""
292
+ ...
293
+ @staticmethod
294
+ def calculate_entropy(data: bytes) -> float:
295
+ """Calculate Shannon entropy of byte data."""
296
+ ...
297
+ @staticmethod
298
+ def calculate_printable_ratio(data: bytes) -> float:
299
+ """Calculate the ratio of printable ASCII characters."""
300
+ ...
301
+ @staticmethod
302
+ def validate_utf8(data: bytes) -> float:
303
+ """Validate UTF-8 sequences and return a validity score."""
304
+ ...
305
+
306
+
307
+ # ============================================================================
308
+ # Early Stopping
309
+ # ============================================================================
310
+
311
+ class StopLevel(Enum):
312
+ """Stop levels for multi-level early stopping."""
313
+ LEVEL_1 = 4 # 2-4 bytes: signature prefix + basic heuristics
314
+ LEVEL_2 = 8 # 8 bytes: entropy bounds + checksum validation
315
+ LEVEL_3 = 16 # 16 bytes: structural coherence
316
+
317
+
318
+ class StopDecision:
319
+ """Result of an early stopping check."""
320
+
321
+ should_stop: bool
322
+ level: StopLevel
323
+ score: float
324
+ reason: str
325
+
326
+ def __init__(self) -> None: ...
327
+
328
+
329
+ class EarlyStoppingConfig:
330
+ """Configuration for early stopping thresholds."""
331
+
332
+ level1_bytes: int
333
+ level2_bytes: int
334
+ level3_bytes: int
335
+ min_entropy: float
336
+ max_entropy: float
337
+ level1_threshold: float
338
+ level2_threshold: float
339
+ level3_threshold: float
340
+ adaptive_thresholds: bool
341
+
342
+ def __init__(self) -> None: ...
343
+
344
+
345
+ class EarlyStoppingController:
346
+ """Early Stopping Controller for reducing search space."""
347
+
348
+ def __init__(self, config: Optional[EarlyStoppingConfig] = None) -> None: ...
349
+ def should_stop(self, data: bytes) -> StopDecision:
350
+ """Check if a path should be stopped at the current depth."""
351
+ ...
352
+ def update_best_score(self, score: float) -> None: ...
353
+ def get_adaptive_threshold(self) -> float: ...
354
+ @staticmethod
355
+ def is_repeated_byte_pattern(data: bytes) -> bool: ...
356
+ @staticmethod
357
+ def is_all_null(data: bytes) -> bool: ...
358
+
359
+
360
+ # ============================================================================
361
+ # Prefix Trie
362
+ # ============================================================================
363
+
364
+ class PrefixStatus(Enum):
365
+ """Status of a prefix trie node."""
366
+ UNKNOWN = 0 # Not yet evaluated
367
+ VALID = 1 # Prefix passed heuristics
368
+ PRUNED = 2 # Prefix failed heuristics
369
+
370
+
371
+ class PrefixTrieConfig:
372
+ """Configuration for the prefix trie."""
373
+
374
+ max_depth: int
375
+ initial_capacity: int
376
+ prune_threshold: float
377
+ branch_prune_count: int
378
+
379
+ def __init__(self) -> None: ...
380
+
381
+
382
+ class PrefixTrieStats:
383
+ """Statistics for prefix trie operations."""
384
+
385
+ total_lookups: int
386
+ cache_hits: int
387
+ nodes_created: int
388
+ nodes_pruned: int
389
+ children_eliminated: int
390
+
391
+ def __init__(self) -> None: ...
392
+
393
+
394
+ class PrefixTrieNode:
395
+ """Prefix trie node."""
396
+
397
+ reconstructed_byte: int
398
+ status: PrefixStatus
399
+ best_score: float
400
+ children_offset: int
401
+ visit_count: int
402
+ parent_index: int
403
+
404
+
405
+ class PrefixTrie:
406
+ """GPU-compatible trie for O(1) prefix lookup and pruning."""
407
+
408
+ def __init__(self, config: Optional[PrefixTrieConfig] = None) -> None: ...
409
+ def lookup(self, prefix: bytes) -> Optional[PrefixTrieNode]:
410
+ """Look up a prefix in the trie."""
411
+ ...
412
+ def insert(self, prefix: bytes, status: PrefixStatus, score: float) -> int:
413
+ """Insert or update a prefix in the trie."""
414
+ ...
415
+ def prune(self, prefix: bytes) -> int:
416
+ """Mark a prefix as pruned and eliminate all children."""
417
+ ...
418
+ def is_pruned(self, prefix: bytes) -> bool:
419
+ """Check if a prefix or any of its ancestors is pruned."""
420
+ ...
421
+ def get_effective_branching_factor(self) -> float: ...
422
+ def node_count(self) -> int: ...
423
+ def get_statistics(self) -> PrefixTrieStats: ...
424
+ def clear(self) -> None: ...
425
+
426
+
427
+ # ============================================================================
428
+ # Memoization
429
+ # ============================================================================
430
+
431
+ class MemoizationConfig:
432
+ """Configuration for the memoization cache."""
433
+
434
+ max_size_bytes: int
435
+ max_entries: int
436
+ enabled: bool
437
+
438
+ def __init__(self) -> None: ...
439
+
440
+
441
+ class MemoizationStats:
442
+ """Statistics for cache operations."""
443
+
444
+ hits: int
445
+ misses: int
446
+ insertions: int
447
+ evictions: int
448
+ current_entries: int
449
+ current_size_bytes: int
450
+
451
+ def __init__(self) -> None: ...
452
+ def hit_rate(self) -> float: ...
453
+
454
+
455
+ class PrefixCacheEntry:
456
+ """Result stored in the prefix cache."""
457
+
458
+ heuristics: HeuristicResult
459
+ score: float
460
+ should_prune: bool
461
+ access_count: int
462
+
463
+ def __init__(self) -> None: ...
464
+
465
+
466
+ class PrefixCache:
467
+ """Prefix Result Cache with LRU Eviction."""
468
+
469
+ def __init__(self, config: Optional[MemoizationConfig] = None) -> None: ...
470
+ def lookup(self, prefix: bytes) -> Optional[PrefixCacheEntry]:
471
+ """Look up a prefix in the cache."""
472
+ ...
473
+ def insert(
474
+ self,
475
+ prefix: bytes,
476
+ heuristics: HeuristicResult,
477
+ score: float,
478
+ should_prune: bool
479
+ ) -> bool:
480
+ """Insert or update a prefix result in the cache."""
481
+ ...
482
+ def contains(self, prefix: bytes) -> bool: ...
483
+ def size(self) -> int: ...
484
+ def empty(self) -> bool: ...
485
+ def clear(self) -> None: ...
486
+ def hit_rate(self) -> float: ...
487
+ def get_statistics(self) -> MemoizationStats: ...
488
+ def set_enabled(self, enabled: bool) -> None: ...
489
+ def is_enabled(self) -> bool: ...
490
+
491
+
492
+ # ============================================================================
493
+ # Bit Pruning
494
+ # ============================================================================
495
+
496
+ class BitPruningMode(Enum):
497
+ """Bit pruning modes that control which bit positions are explored."""
498
+ EXHAUSTIVE = 0 # All 8 bit positions (O(8^d))
499
+ MSB_ONLY = 1 # Only bits 4-7 (O(4^d))
500
+ SINGLE_BIT = 2 # Only 2 configured bit positions (O(2^d))
501
+ CUSTOM = 3 # User-defined bit mask
502
+
503
+
504
+ class BitPruningConfig:
505
+ """Configuration for the bit pruning system."""
506
+
507
+ mode: BitPruningMode
508
+ bit_mask: int
509
+
510
+ def __init__(
511
+ self,
512
+ mode_or_mask: Union[BitPruningMode, int, None] = None,
513
+ bit2: Optional[int] = None
514
+ ) -> None: ...
515
+ def is_bit_allowed(self, bit_pos: int) -> bool: ...
516
+ def allowed_bit_count(self) -> int: ...
517
+ def get_allowed_positions(self) -> List[int]: ...
518
+ def branching_factor(self) -> int: ...
519
+ def description(self) -> str: ...
520
+ def is_valid(self) -> bool: ...
521
+ def get_mask(self) -> int: ...
522
+
523
+
524
+ # ============================================================================
525
+ # Scoring System
526
+ # ============================================================================
527
+
528
+ class ScoringWeights:
529
+ """Configurable weights for composite scoring."""
530
+
531
+ signature_weight: float
532
+ heuristic_weight: float
533
+ length_weight: float
534
+ structure_weight: float
535
+
536
+ def __init__(self) -> None: ...
537
+ def is_valid(self) -> bool: ...
538
+ def normalize(self) -> None: ...
539
+
540
+
541
+ class StructuralValidation:
542
+ """Structural validation result for a candidate."""
543
+
544
+ validity_score: float
545
+ has_valid_length: bool
546
+ has_valid_checksum: bool
547
+ has_valid_pointers: bool
548
+
549
+ def __init__(self) -> None: ...
550
+
551
+
552
+ class Candidate:
553
+ """A candidate reconstruction with all associated metadata."""
554
+
555
+ path: Path
556
+ data: List[int]
557
+ format_id: int
558
+ format_name: str
559
+ confidence: float
560
+ heuristics: HeuristicResult
561
+ signature_match: SignatureMatch
562
+ structure: StructuralValidation
563
+ composite_score: float
564
+
565
+ def __init__(self) -> None: ...
566
+ def get_data_bytes(self) -> bytes:
567
+ """Get reconstructed data as Python bytes."""
568
+ ...
569
+ def __repr__(self) -> str: ...
570
+
571
+
572
+ class ScoreCalculator:
573
+ """Composite score calculator."""
574
+
575
+ def __init__(self, weights: Optional[ScoringWeights] = None) -> None: ...
576
+ def set_weights(self, weights: ScoringWeights) -> None: ...
577
+ def get_weights(self) -> ScoringWeights: ...
578
+ def calculate(
579
+ self,
580
+ signature_score: float,
581
+ heuristic_score: float,
582
+ length_score: float,
583
+ structure_score: float
584
+ ) -> float:
585
+ """Calculate composite score from component scores."""
586
+ ...
587
+ def score_candidate(self, candidate: Candidate, expected_length: int = 0) -> None:
588
+ """Calculate and populate a Candidate's composite score."""
589
+ ...
590
+
591
+
592
+ class CandidateQueue:
593
+ """Priority queue for tracking top-K candidates."""
594
+
595
+ def __init__(self, capacity: int = 10) -> None: ...
596
+ def push(self, candidate: Candidate) -> bool:
597
+ """Try to add a candidate to the queue."""
598
+ ...
599
+ def top(self) -> Candidate:
600
+ """Get the top candidate (highest score)."""
601
+ ...
602
+ def pop(self) -> Candidate:
603
+ """Remove and return the top candidate."""
604
+ ...
605
+ def get_top_k(self) -> List[Candidate]:
606
+ """Get all candidates sorted by score (descending)."""
607
+ ...
608
+ def min_score(self) -> float: ...
609
+ def would_accept(self, score: float) -> bool: ...
610
+ def size(self) -> int: ...
611
+ def capacity(self) -> int: ...
612
+ def empty(self) -> bool: ...
613
+ def full(self) -> bool: ...
614
+ def clear(self) -> None: ...
615
+ def set_capacity(self, new_capacity: int) -> None: ...
616
+
617
+
618
+ # ============================================================================
619
+ # Configuration System
620
+ # ============================================================================
621
+
622
+ class ConfigError(Enum):
623
+ """Error codes for configuration operations."""
624
+ NONE = 0
625
+ FILE_NOT_FOUND = 1
626
+ PARSE_ERROR = 2
627
+ INVALID_VALUE = 3
628
+ MISSING_REQUIRED_FIELD = 4
629
+ TYPE_MISMATCH = 5
630
+ OUT_OF_RANGE = 6
631
+
632
+
633
+ class ConfigResult:
634
+ """Result of a configuration operation."""
635
+
636
+ success: bool
637
+ error: ConfigError
638
+ message: str
639
+
640
+ def __init__(self) -> None: ...
641
+ def __bool__(self) -> bool: ...
642
+ def __repr__(self) -> str: ...
643
+
644
+
645
+ class OutputConfig:
646
+ """Output configuration options."""
647
+
648
+ top_n_results: int
649
+ save_partials: bool
650
+ include_paths: bool
651
+ metrics_verbosity: str
652
+
653
+ def __init__(self) -> None: ...
654
+
655
+
656
+ class PerformanceConfig:
657
+ """Performance configuration options."""
658
+
659
+ max_parallel_workers: int
660
+ cuda_streams: int
661
+ batch_size: int
662
+
663
+ def __init__(self) -> None: ...
664
+
665
+
666
+ class EtbConfig:
667
+ """Complete configuration for the etb library."""
668
+
669
+ signature_dictionary_path: str
670
+ early_stopping: EarlyStoppingConfig
671
+ heuristic_weights: HeuristicWeights
672
+ scoring_weights: ScoringWeights
673
+ bit_pruning: BitPruningConfig
674
+ memoization: MemoizationConfig
675
+ output: OutputConfig
676
+ performance: PerformanceConfig
677
+ entropy_min: float
678
+ entropy_max: float
679
+ min_printable_ratio: float
680
+ max_null_run: int
681
+
682
+ def __init__(self) -> None: ...
683
+ def validate(self) -> ConfigResult:
684
+ """Validate the entire configuration."""
685
+ ...
686
+
687
+
688
+ class ConfigManager:
689
+ """Configuration loader and manager."""
690
+
691
+ @staticmethod
692
+ def instance() -> "ConfigManager":
693
+ """Get the singleton instance."""
694
+ ...
695
+ def load_json(self, filepath: str) -> ConfigResult:
696
+ """Load configuration from a JSON file."""
697
+ ...
698
+ def load_json_string(self, json_content: str) -> ConfigResult:
699
+ """Load configuration from a JSON string."""
700
+ ...
701
+ def load_yaml(self, filepath: str) -> ConfigResult:
702
+ """Load configuration from a YAML file."""
703
+ ...
704
+ def load_yaml_string(self, yaml_content: str) -> ConfigResult:
705
+ """Load configuration from a YAML string."""
706
+ ...
707
+ def get_config(self) -> EtbConfig:
708
+ """Get the current configuration."""
709
+ ...
710
+ def set_config(self, config: EtbConfig) -> ConfigResult:
711
+ """Set the configuration."""
712
+ ...
713
+ def update_value(self, key: str, value: str) -> ConfigResult:
714
+ """Update a specific configuration value at runtime."""
715
+ ...
716
+ def reload(self) -> ConfigResult:
717
+ """Reload configuration from the last loaded file."""
718
+ ...
719
+ def is_loaded(self) -> bool: ...
720
+ def get_loaded_path(self) -> str: ...
721
+ def reset_to_defaults(self) -> None: ...
722
+ def save_json(self, filepath: str) -> ConfigResult: ...
723
+ def save_yaml(self, filepath: str) -> ConfigResult: ...
724
+ def to_json_string(self) -> str: ...
725
+ def to_yaml_string(self) -> str: ...
726
+
727
+
728
+ def load_config(filepath: str) -> ConfigResult:
729
+ """Load configuration from file (auto-detects format)."""
730
+ ...
731
+
732
+ def get_default_config() -> EtbConfig:
733
+ """Get the default configuration."""
734
+ ...
735
+
736
+
737
+ # ============================================================================
738
+ # High-Level Extract Function
739
+ # ============================================================================
740
+
741
+ def extract(
742
+ input: Union[bytes, str],
743
+ config: EtbConfig = ...,
744
+ max_paths: int = 1000000
745
+ ) -> List[Candidate]:
746
+ """
747
+ Extract hidden data from input bytes using bit-level reconstruction.
748
+
749
+ Args:
750
+ input: Input data as bytes or a file path string
751
+ config: EtbConfig object with extraction parameters
752
+ max_paths: Maximum number of paths to evaluate (default: 1,000,000)
753
+
754
+ Returns:
755
+ List of Candidate objects sorted by score (highest first)
756
+
757
+ Example:
758
+ >>> import etb
759
+ >>> candidates = etb.extract(b'\\x89PNG...', etb.EtbConfig())
760
+ >>> for c in candidates:
761
+ ... print(f"{c.format_name}: {c.confidence:.2f}")
762
+ """
763
+ ...
764
+
765
+ def extract_from_file(
766
+ filepath: str,
767
+ config: EtbConfig = ...,
768
+ max_paths: int = 1000000
769
+ ) -> List[Candidate]:
770
+ """Extract hidden data from a file."""
771
+ ...
772
+
773
+
774
+ # ============================================================================
775
+ # Reporting System
776
+ # ============================================================================
777
+
778
+ class ValidationReport:
779
+ """Validation report for a successful extraction."""
780
+
781
+ signature_valid: bool
782
+ structure_valid: bool
783
+ heuristics_valid: bool
784
+ overall_validity: float
785
+ validation_notes: str
786
+
787
+ def __init__(self) -> None: ...
788
+ def __repr__(self) -> str: ...
789
+
790
+
791
+ class SuccessResult:
792
+ """Success result containing extracted data and metadata."""
793
+
794
+ extracted_bytes: List[int]
795
+ detected_format: str
796
+ format_category: str
797
+ confidence: float
798
+ reconstruction_path: Path
799
+ validation: ValidationReport
800
+ heuristics: HeuristicResult
801
+ signature_match: SignatureMatch
802
+
803
+ def __init__(self) -> None: ...
804
+ def get_data_bytes(self) -> bytes:
805
+ """Get extracted data as Python bytes."""
806
+ ...
807
+ def __repr__(self) -> str: ...
808
+
809
+
810
+ class PartialMatch:
811
+ """Partial match information for failed extractions."""
812
+
813
+ partial_data: List[int]
814
+ possible_format: str
815
+ partial_score: float
816
+ depth_reached: int
817
+ failure_reason: str
818
+
819
+ def __init__(self) -> None: ...
820
+
821
+
822
+ class ParameterSuggestion:
823
+ """Suggestion for parameter adjustment when extraction fails."""
824
+
825
+ parameter_name: str
826
+ current_value: str
827
+ suggested_value: str
828
+ rationale: str
829
+
830
+ def __init__(
831
+ self,
832
+ parameter_name: str = "",
833
+ current_value: str = "",
834
+ suggested_value: str = "",
835
+ rationale: str = ""
836
+ ) -> None: ...
837
+
838
+
839
+ class FailureResult:
840
+ """Failure result containing diagnostic information."""
841
+
842
+ paths_explored: int
843
+ effective_depth_reached: int
844
+ best_partials: List[PartialMatch]
845
+ suggestions: List[ParameterSuggestion]
846
+ failure_summary: str
847
+
848
+ def __init__(self) -> None: ...
849
+ def __repr__(self) -> str: ...
850
+
851
+
852
+ class ExtractionMetrics:
853
+ """Extraction metrics for reporting."""
854
+
855
+ total_paths_possible: int
856
+ paths_evaluated: int
857
+ paths_pruned_level1: int
858
+ paths_pruned_level2: int
859
+ paths_pruned_level3: int
860
+ paths_pruned_prefix: int
861
+ effective_branching_factor: float
862
+ effective_depth: float
863
+ cache_hit_rate: float
864
+ level1_prune_rate: float
865
+ level2_prune_rate: float
866
+ level3_prune_rate: float
867
+ prefix_prune_rate: float
868
+ format_distribution: List[Tuple[str, int]]
869
+ wall_clock_seconds: float
870
+ average_time_per_path_us: float
871
+ gpu_utilization: float
872
+ complexity_reduction: str
873
+
874
+ def __init__(self) -> None: ...
875
+
876
+
877
+ class ExtractionResult:
878
+ """Complete extraction result combining success/failure with metrics."""
879
+
880
+ success: bool
881
+ candidates: List[SuccessResult]
882
+ failure: Optional[FailureResult]
883
+ metrics: ExtractionMetrics
884
+
885
+ def __init__(self) -> None: ...
886
+ def __repr__(self) -> str: ...
887
+
888
+
889
+ class SuccessResultBuilder:
890
+ """Builder for success results."""
891
+
892
+ def __init__(self) -> None: ...
893
+ def set_data(self, data: List[int]) -> "SuccessResultBuilder": ...
894
+ def set_format(self, format_name: str, category: str = "") -> "SuccessResultBuilder": ...
895
+ def set_confidence(self, confidence: float) -> "SuccessResultBuilder": ...
896
+ def set_path(self, path: Path) -> "SuccessResultBuilder": ...
897
+ def set_heuristics(self, heuristics: HeuristicResult) -> "SuccessResultBuilder": ...
898
+ def set_signature_match(self, match: SignatureMatch) -> "SuccessResultBuilder": ...
899
+ def set_structural_validation(self, structure: StructuralValidation) -> "SuccessResultBuilder": ...
900
+ def build_validation_report(self) -> "SuccessResultBuilder": ...
901
+ def build(self) -> SuccessResult: ...
902
+ @staticmethod
903
+ def from_candidate(candidate: Candidate) -> SuccessResult: ...
904
+
905
+
906
+ class FailureResultBuilder:
907
+ """Builder for failure results."""
908
+
909
+ def __init__(self) -> None: ...
910
+ def set_paths_explored(self, count: int) -> "FailureResultBuilder": ...
911
+ def set_effective_depth(self, depth: int) -> "FailureResultBuilder": ...
912
+ def add_partial_match(self, partial: PartialMatch) -> "FailureResultBuilder": ...
913
+ def add_partial_from_candidate(
914
+ self, candidate: Candidate, failure_reason: str
915
+ ) -> "FailureResultBuilder": ...
916
+ def add_suggestion(self, suggestion: ParameterSuggestion) -> "FailureResultBuilder": ...
917
+ def generate_suggestions(self, metrics: ExtractionMetrics) -> "FailureResultBuilder": ...
918
+ def set_summary(self, summary: str) -> "FailureResultBuilder": ...
919
+ def generate_summary(self) -> "FailureResultBuilder": ...
920
+ def build(self) -> FailureResult: ...
921
+
922
+
923
+ class MetricsReporter:
924
+ """Metrics reporter for extraction results."""
925
+
926
+ def __init__(self) -> None: ...
927
+ def set_total_paths_possible(self, count: int) -> "MetricsReporter": ...
928
+ def set_paths_evaluated(self, count: int) -> "MetricsReporter": ...
929
+ def set_paths_pruned_level1(self, count: int) -> "MetricsReporter": ...
930
+ def set_paths_pruned_level2(self, count: int) -> "MetricsReporter": ...
931
+ def set_paths_pruned_level3(self, count: int) -> "MetricsReporter": ...
932
+ def set_paths_pruned_prefix(self, count: int) -> "MetricsReporter": ...
933
+ def set_effective_branching_factor(self, factor: float) -> "MetricsReporter": ...
934
+ def set_effective_depth(self, depth: float) -> "MetricsReporter": ...
935
+ def set_cache_hit_rate(self, rate: float) -> "MetricsReporter": ...
936
+ def add_format_detection(self, format: str, count: int = 1) -> "MetricsReporter": ...
937
+ def set_wall_clock_time(self, seconds: float) -> "MetricsReporter": ...
938
+ def set_gpu_utilization(self, utilization: float) -> "MetricsReporter": ...
939
+ def calculate_derived_metrics(self) -> "MetricsReporter": ...
940
+ def generate_complexity_reduction(self, input_length: int) -> "MetricsReporter": ...
941
+ def build(self) -> ExtractionMetrics: ...
942
+ def to_string(self, verbosity: str = "full") -> str: ...
943
+
944
+
945
+ class ExtractionResultBuilder:
946
+ """Builder for complete extraction results."""
947
+
948
+ def __init__(self) -> None: ...
949
+ def set_success(self, success: bool) -> "ExtractionResultBuilder": ...
950
+ def add_candidate(self, result: SuccessResult) -> "ExtractionResultBuilder": ...
951
+ def add_candidates(self, candidates: List[Candidate]) -> "ExtractionResultBuilder": ...
952
+ def set_failure(self, failure: FailureResult) -> "ExtractionResultBuilder": ...
953
+ def set_metrics(self, metrics: ExtractionMetrics) -> "ExtractionResultBuilder": ...
954
+ def build(self) -> ExtractionResult: ...
955
+
956
+
957
+ # Utility functions
958
+ def format_path(path: Path, max_coords: int = 10) -> str:
959
+ """Format a path as a human-readable string."""
960
+ ...
961
+
962
+ def format_bytes_hex(data: bytes, max_bytes: int = 32) -> str:
963
+ """Format bytes as a hex string."""
964
+ ...
965
+
966
+ def format_confidence(confidence: float) -> str:
967
+ """Format a confidence score as a percentage string."""
968
+ ...
969
+
970
+ def format_duration(seconds: float) -> str:
971
+ """Format a duration in human-readable form."""
972
+ ...
973
+
974
+ def format_count(count: int) -> str:
975
+ """Format a large number with appropriate suffix (K, M, B)."""
976
+ ...