explodethosebits 0.3.0__cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etb/__init__.py +351 -0
- etb/__init__.pyi +976 -0
- etb/_etb.cpython-39-x86_64-linux-gnu.so +0 -0
- etb/_version.py +34 -0
- etb/py.typed +2 -0
- explodethosebits-0.3.0.dist-info/METADATA +405 -0
- explodethosebits-0.3.0.dist-info/RECORD +88 -0
- explodethosebits-0.3.0.dist-info/WHEEL +6 -0
- explodethosebits-0.3.0.dist-info/licenses/LICENSE +21 -0
- explodethosebits-0.3.0.dist-info/sboms/auditwheel.cdx.json +1 -0
- explodethosebits.libs/libcudart-c3a75b33.so.12.8.90 +0 -0
- include/etb/bit_coordinate.hpp +45 -0
- include/etb/bit_extraction.hpp +79 -0
- include/etb/bit_pruning.hpp +122 -0
- include/etb/config.hpp +284 -0
- include/etb/cuda/arch_optimizations.cuh +358 -0
- include/etb/cuda/blackwell_optimizations.cuh +300 -0
- include/etb/cuda/cuda_common.cuh +265 -0
- include/etb/cuda/etb_cuda.cuh +200 -0
- include/etb/cuda/gpu_memory.cuh +406 -0
- include/etb/cuda/heuristics_kernel.cuh +315 -0
- include/etb/cuda/path_generator_kernel.cuh +272 -0
- include/etb/cuda/prefix_pruner_kernel.cuh +370 -0
- include/etb/cuda/signature_kernel.cuh +328 -0
- include/etb/early_stopping.hpp +246 -0
- include/etb/etb.hpp +20 -0
- include/etb/heuristics.hpp +165 -0
- include/etb/memoization.hpp +285 -0
- include/etb/path.hpp +86 -0
- include/etb/path_count.hpp +87 -0
- include/etb/path_generator.hpp +175 -0
- include/etb/prefix_trie.hpp +339 -0
- include/etb/reporting.hpp +437 -0
- include/etb/scoring.hpp +269 -0
- include/etb/signature.hpp +190 -0
- include/gmock/gmock-actions.h +2297 -0
- include/gmock/gmock-cardinalities.h +159 -0
- include/gmock/gmock-function-mocker.h +518 -0
- include/gmock/gmock-matchers.h +5623 -0
- include/gmock/gmock-more-actions.h +658 -0
- include/gmock/gmock-more-matchers.h +120 -0
- include/gmock/gmock-nice-strict.h +277 -0
- include/gmock/gmock-spec-builders.h +2148 -0
- include/gmock/gmock.h +96 -0
- include/gmock/internal/custom/README.md +18 -0
- include/gmock/internal/custom/gmock-generated-actions.h +7 -0
- include/gmock/internal/custom/gmock-matchers.h +37 -0
- include/gmock/internal/custom/gmock-port.h +40 -0
- include/gmock/internal/gmock-internal-utils.h +487 -0
- include/gmock/internal/gmock-port.h +139 -0
- include/gmock/internal/gmock-pp.h +279 -0
- include/gtest/gtest-assertion-result.h +237 -0
- include/gtest/gtest-death-test.h +345 -0
- include/gtest/gtest-matchers.h +923 -0
- include/gtest/gtest-message.h +252 -0
- include/gtest/gtest-param-test.h +546 -0
- include/gtest/gtest-printers.h +1161 -0
- include/gtest/gtest-spi.h +250 -0
- include/gtest/gtest-test-part.h +192 -0
- include/gtest/gtest-typed-test.h +331 -0
- include/gtest/gtest.h +2321 -0
- include/gtest/gtest_pred_impl.h +279 -0
- include/gtest/gtest_prod.h +60 -0
- include/gtest/internal/custom/README.md +44 -0
- include/gtest/internal/custom/gtest-port.h +37 -0
- include/gtest/internal/custom/gtest-printers.h +42 -0
- include/gtest/internal/custom/gtest.h +37 -0
- include/gtest/internal/gtest-death-test-internal.h +307 -0
- include/gtest/internal/gtest-filepath.h +227 -0
- include/gtest/internal/gtest-internal.h +1560 -0
- include/gtest/internal/gtest-param-util.h +1026 -0
- include/gtest/internal/gtest-port-arch.h +122 -0
- include/gtest/internal/gtest-port.h +2481 -0
- include/gtest/internal/gtest-string.h +178 -0
- include/gtest/internal/gtest-type-util.h +220 -0
- lib/libetb_core.a +0 -0
- lib64/cmake/GTest/GTestConfig.cmake +33 -0
- lib64/cmake/GTest/GTestConfigVersion.cmake +43 -0
- lib64/cmake/GTest/GTestTargets-release.cmake +49 -0
- lib64/cmake/GTest/GTestTargets.cmake +139 -0
- lib64/libgmock.a +0 -0
- lib64/libgmock_main.a +0 -0
- lib64/libgtest.a +0 -0
- lib64/libgtest_main.a +0 -0
- lib64/pkgconfig/gmock.pc +10 -0
- lib64/pkgconfig/gmock_main.pc +10 -0
- lib64/pkgconfig/gtest.pc +9 -0
- lib64/pkgconfig/gtest_main.pc +10 -0
etb/__init__.pyi
ADDED
|
@@ -0,0 +1,976 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Type stubs for the etb (ExplodeThoseBits) library.
|
|
3
|
+
|
|
4
|
+
ExplodeThoseBits is a CUDA-accelerated bit extraction library for forensic analysis.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional, Tuple, Iterator, Union
|
|
8
|
+
from enum import Enum
|
|
9
|
+
|
|
10
|
+
__version__: str
|
|
11
|
+
__author__: str
|
|
12
|
+
|
|
13
|
+
# ============================================================================
|
|
14
|
+
# Core Data Structures
|
|
15
|
+
# ============================================================================
|
|
16
|
+
|
|
17
|
+
class BitCoordinate:
|
|
18
|
+
"""Represents a coordinate in the bit extraction space (byte_index, bit_position)."""
|
|
19
|
+
|
|
20
|
+
byte_index: int
|
|
21
|
+
"""Index into the input byte array."""
|
|
22
|
+
|
|
23
|
+
bit_position: int
|
|
24
|
+
"""Position within the byte [0-7], 0 = LSB."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, byte_index: int = 0, bit_position: int = 0) -> None: ...
|
|
27
|
+
def is_valid(self, input_length: int) -> bool:
|
|
28
|
+
"""Check if coordinate is valid for given input length."""
|
|
29
|
+
...
|
|
30
|
+
def __eq__(self, other: object) -> bool: ...
|
|
31
|
+
def __ne__(self, other: object) -> bool: ...
|
|
32
|
+
def __lt__(self, other: "BitCoordinate") -> bool: ...
|
|
33
|
+
def __repr__(self) -> str: ...
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Path:
|
|
37
|
+
"""A forward-only traversal sequence of bit coordinates."""
|
|
38
|
+
|
|
39
|
+
def __init__(self, capacity: int = 0) -> None: ...
|
|
40
|
+
def add(self, coord: BitCoordinate) -> None:
|
|
41
|
+
"""Add a coordinate to the path."""
|
|
42
|
+
...
|
|
43
|
+
def is_valid(self) -> bool:
|
|
44
|
+
"""Check if path maintains forward-only constraint."""
|
|
45
|
+
...
|
|
46
|
+
def length(self) -> int:
|
|
47
|
+
"""Get the number of coordinates in the path."""
|
|
48
|
+
...
|
|
49
|
+
def empty(self) -> bool:
|
|
50
|
+
"""Check if path is empty."""
|
|
51
|
+
...
|
|
52
|
+
def clear(self) -> None:
|
|
53
|
+
"""Clear all coordinates from the path."""
|
|
54
|
+
...
|
|
55
|
+
def reserve(self, capacity: int) -> None:
|
|
56
|
+
"""Reserve capacity for coordinates."""
|
|
57
|
+
...
|
|
58
|
+
def at(self, index: int) -> BitCoordinate:
|
|
59
|
+
"""Get coordinate at index."""
|
|
60
|
+
...
|
|
61
|
+
def back(self) -> BitCoordinate:
|
|
62
|
+
"""Get the last coordinate."""
|
|
63
|
+
...
|
|
64
|
+
def __getitem__(self, index: int) -> BitCoordinate: ...
|
|
65
|
+
def __len__(self) -> int: ...
|
|
66
|
+
def __iter__(self) -> Iterator[BitCoordinate]: ...
|
|
67
|
+
def __repr__(self) -> str: ...
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
# ============================================================================
|
|
71
|
+
# Bit Extraction Functions
|
|
72
|
+
# ============================================================================
|
|
73
|
+
|
|
74
|
+
def extract_bit(data: bytes, coord: BitCoordinate) -> int:
|
|
75
|
+
"""Extract a single bit from byte data at the given coordinate."""
|
|
76
|
+
...
|
|
77
|
+
|
|
78
|
+
def extract_bits_from_path(data: bytes, path: Path) -> List[int]:
|
|
79
|
+
"""Extract bits at specified path coordinates from byte data."""
|
|
80
|
+
...
|
|
81
|
+
|
|
82
|
+
def bits_to_bytes(bits: List[int]) -> bytes:
|
|
83
|
+
"""Convert a sequence of bits to a byte array."""
|
|
84
|
+
...
|
|
85
|
+
|
|
86
|
+
def path_to_bytes(source_data: bytes, path: Path) -> bytes:
|
|
87
|
+
"""Convert a path with associated bit values to a byte array."""
|
|
88
|
+
...
|
|
89
|
+
|
|
90
|
+
def bytes_to_bits(data: bytes) -> List[int]:
|
|
91
|
+
"""Convert a byte array to a sequence of bits."""
|
|
92
|
+
...
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# ============================================================================
|
|
96
|
+
# Path Generation
|
|
97
|
+
# ============================================================================
|
|
98
|
+
|
|
99
|
+
class PathGeneratorConfig:
|
|
100
|
+
"""Configuration for the path generator."""
|
|
101
|
+
|
|
102
|
+
input_length: int
|
|
103
|
+
max_path_length: int
|
|
104
|
+
starting_byte_index: int
|
|
105
|
+
bit_mask: int
|
|
106
|
+
|
|
107
|
+
def __init__(self, input_length: int) -> None: ...
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class PathGenerator:
|
|
111
|
+
"""Lazy path generator using iterator pattern."""
|
|
112
|
+
|
|
113
|
+
def __init__(self, input_length_or_config: Union[int, PathGeneratorConfig]) -> None: ...
|
|
114
|
+
def has_next(self) -> bool:
|
|
115
|
+
"""Check if there are more paths to generate."""
|
|
116
|
+
...
|
|
117
|
+
def next(self) -> Optional[Path]:
|
|
118
|
+
"""Generate the next path."""
|
|
119
|
+
...
|
|
120
|
+
def reset(self) -> None:
|
|
121
|
+
"""Reset the generator to start from the beginning."""
|
|
122
|
+
...
|
|
123
|
+
def paths_generated(self) -> int:
|
|
124
|
+
"""Get the number of paths generated so far."""
|
|
125
|
+
...
|
|
126
|
+
def __iter__(self) -> "PathGenerator": ...
|
|
127
|
+
def __next__(self) -> Path: ...
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class PathCountResult:
|
|
131
|
+
"""Result of path count estimation."""
|
|
132
|
+
|
|
133
|
+
estimated_count: int
|
|
134
|
+
is_exact: bool
|
|
135
|
+
exceeds_threshold: bool
|
|
136
|
+
log_count: float
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def estimate_path_count(
|
|
140
|
+
input_length: int,
|
|
141
|
+
bits_per_byte: int = 8,
|
|
142
|
+
threshold: int = 0
|
|
143
|
+
) -> PathCountResult:
|
|
144
|
+
"""Estimate the path count with overflow detection."""
|
|
145
|
+
...
|
|
146
|
+
|
|
147
|
+
def path_count_exceeds_threshold(
|
|
148
|
+
input_length: int,
|
|
149
|
+
bits_per_byte: int,
|
|
150
|
+
threshold: int
|
|
151
|
+
) -> bool:
|
|
152
|
+
"""Check if path count exceeds a threshold."""
|
|
153
|
+
...
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# ============================================================================
|
|
157
|
+
# Signature Detection
|
|
158
|
+
# ============================================================================
|
|
159
|
+
|
|
160
|
+
class FileSignature:
|
|
161
|
+
"""Represents a single file signature (magic bytes)."""
|
|
162
|
+
|
|
163
|
+
magic_bytes: List[int]
|
|
164
|
+
mask: List[int]
|
|
165
|
+
offset: int
|
|
166
|
+
base_confidence: float
|
|
167
|
+
|
|
168
|
+
def __init__(self) -> None: ...
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class FooterSignature:
|
|
172
|
+
"""Represents a footer/trailer signature for a format."""
|
|
173
|
+
|
|
174
|
+
magic_bytes: List[int]
|
|
175
|
+
required: bool
|
|
176
|
+
|
|
177
|
+
def __init__(self) -> None: ...
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class FormatDefinition:
|
|
181
|
+
"""Represents a complete format definition with all its signatures."""
|
|
182
|
+
|
|
183
|
+
format_name: str
|
|
184
|
+
category: str
|
|
185
|
+
signatures: List[FileSignature]
|
|
186
|
+
format_id: int
|
|
187
|
+
|
|
188
|
+
def __init__(self) -> None: ...
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class SignatureMatch:
|
|
192
|
+
"""Result of a signature match operation."""
|
|
193
|
+
|
|
194
|
+
matched: bool
|
|
195
|
+
format_name: str
|
|
196
|
+
category: str
|
|
197
|
+
format_id: int
|
|
198
|
+
confidence: float
|
|
199
|
+
match_offset: int
|
|
200
|
+
header_matched: bool
|
|
201
|
+
footer_matched: bool
|
|
202
|
+
|
|
203
|
+
def __init__(self) -> None: ...
|
|
204
|
+
def __repr__(self) -> str: ...
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class SignatureDictionary:
|
|
208
|
+
"""Signature dictionary that loads and manages file signatures."""
|
|
209
|
+
|
|
210
|
+
def __init__(self) -> None: ...
|
|
211
|
+
def load_from_json(self, filepath: str) -> bool:
|
|
212
|
+
"""Load signatures from a JSON file."""
|
|
213
|
+
...
|
|
214
|
+
def load_from_json_string(self, json_content: str) -> bool:
|
|
215
|
+
"""Load signatures from a JSON string."""
|
|
216
|
+
...
|
|
217
|
+
def add_format(self, format: FormatDefinition) -> None:
|
|
218
|
+
"""Add a format definition programmatically."""
|
|
219
|
+
...
|
|
220
|
+
def get_formats(self) -> List[FormatDefinition]:
|
|
221
|
+
"""Get all loaded format definitions."""
|
|
222
|
+
...
|
|
223
|
+
def format_count(self) -> int:
|
|
224
|
+
"""Get the number of loaded formats."""
|
|
225
|
+
...
|
|
226
|
+
def clear(self) -> None:
|
|
227
|
+
"""Clear all loaded signatures."""
|
|
228
|
+
...
|
|
229
|
+
def empty(self) -> bool:
|
|
230
|
+
"""Check if dictionary is empty."""
|
|
231
|
+
...
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
class SignatureMatcher:
|
|
235
|
+
"""Signature matcher that performs header and footer detection."""
|
|
236
|
+
|
|
237
|
+
def __init__(self, dictionary: SignatureDictionary) -> None: ...
|
|
238
|
+
def match(self, data: bytes, max_offset: int = 512) -> SignatureMatch:
|
|
239
|
+
"""Match signatures against a byte sequence."""
|
|
240
|
+
...
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
# ============================================================================
|
|
244
|
+
# Heuristics Engine
|
|
245
|
+
# ============================================================================
|
|
246
|
+
|
|
247
|
+
class HeuristicResult:
|
|
248
|
+
"""Result of heuristic analysis on a byte sequence."""
|
|
249
|
+
|
|
250
|
+
entropy: float
|
|
251
|
+
"""Shannon entropy [0.0, 8.0]."""
|
|
252
|
+
|
|
253
|
+
printable_ratio: float
|
|
254
|
+
"""Ratio of printable ASCII [0.0, 1.0]."""
|
|
255
|
+
|
|
256
|
+
control_char_ratio: float
|
|
257
|
+
"""Ratio of control characters [0.0, 1.0]."""
|
|
258
|
+
|
|
259
|
+
max_null_run: int
|
|
260
|
+
"""Longest consecutive null byte run."""
|
|
261
|
+
|
|
262
|
+
utf8_validity: float
|
|
263
|
+
"""UTF-8 sequence validity score [0.0, 1.0]."""
|
|
264
|
+
|
|
265
|
+
composite_score: float
|
|
266
|
+
"""Weighted combination [0.0, 1.0]."""
|
|
267
|
+
|
|
268
|
+
def __init__(self) -> None: ...
|
|
269
|
+
def __repr__(self) -> str: ...
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
class HeuristicWeights:
|
|
273
|
+
"""Configurable weights for composite heuristic scoring."""
|
|
274
|
+
|
|
275
|
+
entropy_weight: float
|
|
276
|
+
printable_weight: float
|
|
277
|
+
control_char_weight: float
|
|
278
|
+
null_run_weight: float
|
|
279
|
+
utf8_weight: float
|
|
280
|
+
|
|
281
|
+
def __init__(self) -> None: ...
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
class HeuristicsEngine:
|
|
285
|
+
"""Heuristics Engine for analyzing byte sequences."""
|
|
286
|
+
|
|
287
|
+
def __init__(self, weights: Optional[HeuristicWeights] = None) -> None: ...
|
|
288
|
+
def set_weights(self, weights: HeuristicWeights) -> None: ...
|
|
289
|
+
def get_weights(self) -> HeuristicWeights: ...
|
|
290
|
+
def analyze(self, data: bytes) -> HeuristicResult:
|
|
291
|
+
"""Perform full heuristic analysis on byte data."""
|
|
292
|
+
...
|
|
293
|
+
@staticmethod
|
|
294
|
+
def calculate_entropy(data: bytes) -> float:
|
|
295
|
+
"""Calculate Shannon entropy of byte data."""
|
|
296
|
+
...
|
|
297
|
+
@staticmethod
|
|
298
|
+
def calculate_printable_ratio(data: bytes) -> float:
|
|
299
|
+
"""Calculate the ratio of printable ASCII characters."""
|
|
300
|
+
...
|
|
301
|
+
@staticmethod
|
|
302
|
+
def validate_utf8(data: bytes) -> float:
|
|
303
|
+
"""Validate UTF-8 sequences and return a validity score."""
|
|
304
|
+
...
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
# ============================================================================
|
|
308
|
+
# Early Stopping
|
|
309
|
+
# ============================================================================
|
|
310
|
+
|
|
311
|
+
class StopLevel(Enum):
|
|
312
|
+
"""Stop levels for multi-level early stopping."""
|
|
313
|
+
LEVEL_1 = 4 # 2-4 bytes: signature prefix + basic heuristics
|
|
314
|
+
LEVEL_2 = 8 # 8 bytes: entropy bounds + checksum validation
|
|
315
|
+
LEVEL_3 = 16 # 16 bytes: structural coherence
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
class StopDecision:
|
|
319
|
+
"""Result of an early stopping check."""
|
|
320
|
+
|
|
321
|
+
should_stop: bool
|
|
322
|
+
level: StopLevel
|
|
323
|
+
score: float
|
|
324
|
+
reason: str
|
|
325
|
+
|
|
326
|
+
def __init__(self) -> None: ...
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
class EarlyStoppingConfig:
|
|
330
|
+
"""Configuration for early stopping thresholds."""
|
|
331
|
+
|
|
332
|
+
level1_bytes: int
|
|
333
|
+
level2_bytes: int
|
|
334
|
+
level3_bytes: int
|
|
335
|
+
min_entropy: float
|
|
336
|
+
max_entropy: float
|
|
337
|
+
level1_threshold: float
|
|
338
|
+
level2_threshold: float
|
|
339
|
+
level3_threshold: float
|
|
340
|
+
adaptive_thresholds: bool
|
|
341
|
+
|
|
342
|
+
def __init__(self) -> None: ...
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
class EarlyStoppingController:
|
|
346
|
+
"""Early Stopping Controller for reducing search space."""
|
|
347
|
+
|
|
348
|
+
def __init__(self, config: Optional[EarlyStoppingConfig] = None) -> None: ...
|
|
349
|
+
def should_stop(self, data: bytes) -> StopDecision:
|
|
350
|
+
"""Check if a path should be stopped at the current depth."""
|
|
351
|
+
...
|
|
352
|
+
def update_best_score(self, score: float) -> None: ...
|
|
353
|
+
def get_adaptive_threshold(self) -> float: ...
|
|
354
|
+
@staticmethod
|
|
355
|
+
def is_repeated_byte_pattern(data: bytes) -> bool: ...
|
|
356
|
+
@staticmethod
|
|
357
|
+
def is_all_null(data: bytes) -> bool: ...
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
# ============================================================================
|
|
361
|
+
# Prefix Trie
|
|
362
|
+
# ============================================================================
|
|
363
|
+
|
|
364
|
+
class PrefixStatus(Enum):
|
|
365
|
+
"""Status of a prefix trie node."""
|
|
366
|
+
UNKNOWN = 0 # Not yet evaluated
|
|
367
|
+
VALID = 1 # Prefix passed heuristics
|
|
368
|
+
PRUNED = 2 # Prefix failed heuristics
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
class PrefixTrieConfig:
|
|
372
|
+
"""Configuration for the prefix trie."""
|
|
373
|
+
|
|
374
|
+
max_depth: int
|
|
375
|
+
initial_capacity: int
|
|
376
|
+
prune_threshold: float
|
|
377
|
+
branch_prune_count: int
|
|
378
|
+
|
|
379
|
+
def __init__(self) -> None: ...
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
class PrefixTrieStats:
|
|
383
|
+
"""Statistics for prefix trie operations."""
|
|
384
|
+
|
|
385
|
+
total_lookups: int
|
|
386
|
+
cache_hits: int
|
|
387
|
+
nodes_created: int
|
|
388
|
+
nodes_pruned: int
|
|
389
|
+
children_eliminated: int
|
|
390
|
+
|
|
391
|
+
def __init__(self) -> None: ...
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
class PrefixTrieNode:
|
|
395
|
+
"""Prefix trie node."""
|
|
396
|
+
|
|
397
|
+
reconstructed_byte: int
|
|
398
|
+
status: PrefixStatus
|
|
399
|
+
best_score: float
|
|
400
|
+
children_offset: int
|
|
401
|
+
visit_count: int
|
|
402
|
+
parent_index: int
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
class PrefixTrie:
|
|
406
|
+
"""GPU-compatible trie for O(1) prefix lookup and pruning."""
|
|
407
|
+
|
|
408
|
+
def __init__(self, config: Optional[PrefixTrieConfig] = None) -> None: ...
|
|
409
|
+
def lookup(self, prefix: bytes) -> Optional[PrefixTrieNode]:
|
|
410
|
+
"""Look up a prefix in the trie."""
|
|
411
|
+
...
|
|
412
|
+
def insert(self, prefix: bytes, status: PrefixStatus, score: float) -> int:
|
|
413
|
+
"""Insert or update a prefix in the trie."""
|
|
414
|
+
...
|
|
415
|
+
def prune(self, prefix: bytes) -> int:
|
|
416
|
+
"""Mark a prefix as pruned and eliminate all children."""
|
|
417
|
+
...
|
|
418
|
+
def is_pruned(self, prefix: bytes) -> bool:
|
|
419
|
+
"""Check if a prefix or any of its ancestors is pruned."""
|
|
420
|
+
...
|
|
421
|
+
def get_effective_branching_factor(self) -> float: ...
|
|
422
|
+
def node_count(self) -> int: ...
|
|
423
|
+
def get_statistics(self) -> PrefixTrieStats: ...
|
|
424
|
+
def clear(self) -> None: ...
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
# ============================================================================
|
|
428
|
+
# Memoization
|
|
429
|
+
# ============================================================================
|
|
430
|
+
|
|
431
|
+
class MemoizationConfig:
|
|
432
|
+
"""Configuration for the memoization cache."""
|
|
433
|
+
|
|
434
|
+
max_size_bytes: int
|
|
435
|
+
max_entries: int
|
|
436
|
+
enabled: bool
|
|
437
|
+
|
|
438
|
+
def __init__(self) -> None: ...
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
class MemoizationStats:
|
|
442
|
+
"""Statistics for cache operations."""
|
|
443
|
+
|
|
444
|
+
hits: int
|
|
445
|
+
misses: int
|
|
446
|
+
insertions: int
|
|
447
|
+
evictions: int
|
|
448
|
+
current_entries: int
|
|
449
|
+
current_size_bytes: int
|
|
450
|
+
|
|
451
|
+
def __init__(self) -> None: ...
|
|
452
|
+
def hit_rate(self) -> float: ...
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
class PrefixCacheEntry:
|
|
456
|
+
"""Result stored in the prefix cache."""
|
|
457
|
+
|
|
458
|
+
heuristics: HeuristicResult
|
|
459
|
+
score: float
|
|
460
|
+
should_prune: bool
|
|
461
|
+
access_count: int
|
|
462
|
+
|
|
463
|
+
def __init__(self) -> None: ...
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
class PrefixCache:
|
|
467
|
+
"""Prefix Result Cache with LRU Eviction."""
|
|
468
|
+
|
|
469
|
+
def __init__(self, config: Optional[MemoizationConfig] = None) -> None: ...
|
|
470
|
+
def lookup(self, prefix: bytes) -> Optional[PrefixCacheEntry]:
|
|
471
|
+
"""Look up a prefix in the cache."""
|
|
472
|
+
...
|
|
473
|
+
def insert(
|
|
474
|
+
self,
|
|
475
|
+
prefix: bytes,
|
|
476
|
+
heuristics: HeuristicResult,
|
|
477
|
+
score: float,
|
|
478
|
+
should_prune: bool
|
|
479
|
+
) -> bool:
|
|
480
|
+
"""Insert or update a prefix result in the cache."""
|
|
481
|
+
...
|
|
482
|
+
def contains(self, prefix: bytes) -> bool: ...
|
|
483
|
+
def size(self) -> int: ...
|
|
484
|
+
def empty(self) -> bool: ...
|
|
485
|
+
def clear(self) -> None: ...
|
|
486
|
+
def hit_rate(self) -> float: ...
|
|
487
|
+
def get_statistics(self) -> MemoizationStats: ...
|
|
488
|
+
def set_enabled(self, enabled: bool) -> None: ...
|
|
489
|
+
def is_enabled(self) -> bool: ...
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
# ============================================================================
|
|
493
|
+
# Bit Pruning
|
|
494
|
+
# ============================================================================
|
|
495
|
+
|
|
496
|
+
class BitPruningMode(Enum):
|
|
497
|
+
"""Bit pruning modes that control which bit positions are explored."""
|
|
498
|
+
EXHAUSTIVE = 0 # All 8 bit positions (O(8^d))
|
|
499
|
+
MSB_ONLY = 1 # Only bits 4-7 (O(4^d))
|
|
500
|
+
SINGLE_BIT = 2 # Only 2 configured bit positions (O(2^d))
|
|
501
|
+
CUSTOM = 3 # User-defined bit mask
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
class BitPruningConfig:
|
|
505
|
+
"""Configuration for the bit pruning system."""
|
|
506
|
+
|
|
507
|
+
mode: BitPruningMode
|
|
508
|
+
bit_mask: int
|
|
509
|
+
|
|
510
|
+
def __init__(
|
|
511
|
+
self,
|
|
512
|
+
mode_or_mask: Union[BitPruningMode, int, None] = None,
|
|
513
|
+
bit2: Optional[int] = None
|
|
514
|
+
) -> None: ...
|
|
515
|
+
def is_bit_allowed(self, bit_pos: int) -> bool: ...
|
|
516
|
+
def allowed_bit_count(self) -> int: ...
|
|
517
|
+
def get_allowed_positions(self) -> List[int]: ...
|
|
518
|
+
def branching_factor(self) -> int: ...
|
|
519
|
+
def description(self) -> str: ...
|
|
520
|
+
def is_valid(self) -> bool: ...
|
|
521
|
+
def get_mask(self) -> int: ...
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
# ============================================================================
|
|
525
|
+
# Scoring System
|
|
526
|
+
# ============================================================================
|
|
527
|
+
|
|
528
|
+
class ScoringWeights:
|
|
529
|
+
"""Configurable weights for composite scoring."""
|
|
530
|
+
|
|
531
|
+
signature_weight: float
|
|
532
|
+
heuristic_weight: float
|
|
533
|
+
length_weight: float
|
|
534
|
+
structure_weight: float
|
|
535
|
+
|
|
536
|
+
def __init__(self) -> None: ...
|
|
537
|
+
def is_valid(self) -> bool: ...
|
|
538
|
+
def normalize(self) -> None: ...
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
class StructuralValidation:
|
|
542
|
+
"""Structural validation result for a candidate."""
|
|
543
|
+
|
|
544
|
+
validity_score: float
|
|
545
|
+
has_valid_length: bool
|
|
546
|
+
has_valid_checksum: bool
|
|
547
|
+
has_valid_pointers: bool
|
|
548
|
+
|
|
549
|
+
def __init__(self) -> None: ...
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
class Candidate:
|
|
553
|
+
"""A candidate reconstruction with all associated metadata."""
|
|
554
|
+
|
|
555
|
+
path: Path
|
|
556
|
+
data: List[int]
|
|
557
|
+
format_id: int
|
|
558
|
+
format_name: str
|
|
559
|
+
confidence: float
|
|
560
|
+
heuristics: HeuristicResult
|
|
561
|
+
signature_match: SignatureMatch
|
|
562
|
+
structure: StructuralValidation
|
|
563
|
+
composite_score: float
|
|
564
|
+
|
|
565
|
+
def __init__(self) -> None: ...
|
|
566
|
+
def get_data_bytes(self) -> bytes:
|
|
567
|
+
"""Get reconstructed data as Python bytes."""
|
|
568
|
+
...
|
|
569
|
+
def __repr__(self) -> str: ...
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
class ScoreCalculator:
|
|
573
|
+
"""Composite score calculator."""
|
|
574
|
+
|
|
575
|
+
def __init__(self, weights: Optional[ScoringWeights] = None) -> None: ...
|
|
576
|
+
def set_weights(self, weights: ScoringWeights) -> None: ...
|
|
577
|
+
def get_weights(self) -> ScoringWeights: ...
|
|
578
|
+
def calculate(
|
|
579
|
+
self,
|
|
580
|
+
signature_score: float,
|
|
581
|
+
heuristic_score: float,
|
|
582
|
+
length_score: float,
|
|
583
|
+
structure_score: float
|
|
584
|
+
) -> float:
|
|
585
|
+
"""Calculate composite score from component scores."""
|
|
586
|
+
...
|
|
587
|
+
def score_candidate(self, candidate: Candidate, expected_length: int = 0) -> None:
|
|
588
|
+
"""Calculate and populate a Candidate's composite score."""
|
|
589
|
+
...
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
class CandidateQueue:
|
|
593
|
+
"""Priority queue for tracking top-K candidates."""
|
|
594
|
+
|
|
595
|
+
def __init__(self, capacity: int = 10) -> None: ...
|
|
596
|
+
def push(self, candidate: Candidate) -> bool:
|
|
597
|
+
"""Try to add a candidate to the queue."""
|
|
598
|
+
...
|
|
599
|
+
def top(self) -> Candidate:
|
|
600
|
+
"""Get the top candidate (highest score)."""
|
|
601
|
+
...
|
|
602
|
+
def pop(self) -> Candidate:
|
|
603
|
+
"""Remove and return the top candidate."""
|
|
604
|
+
...
|
|
605
|
+
def get_top_k(self) -> List[Candidate]:
|
|
606
|
+
"""Get all candidates sorted by score (descending)."""
|
|
607
|
+
...
|
|
608
|
+
def min_score(self) -> float: ...
|
|
609
|
+
def would_accept(self, score: float) -> bool: ...
|
|
610
|
+
def size(self) -> int: ...
|
|
611
|
+
def capacity(self) -> int: ...
|
|
612
|
+
def empty(self) -> bool: ...
|
|
613
|
+
def full(self) -> bool: ...
|
|
614
|
+
def clear(self) -> None: ...
|
|
615
|
+
def set_capacity(self, new_capacity: int) -> None: ...
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
# ============================================================================
|
|
619
|
+
# Configuration System
|
|
620
|
+
# ============================================================================
|
|
621
|
+
|
|
622
|
+
class ConfigError(Enum):
|
|
623
|
+
"""Error codes for configuration operations."""
|
|
624
|
+
NONE = 0
|
|
625
|
+
FILE_NOT_FOUND = 1
|
|
626
|
+
PARSE_ERROR = 2
|
|
627
|
+
INVALID_VALUE = 3
|
|
628
|
+
MISSING_REQUIRED_FIELD = 4
|
|
629
|
+
TYPE_MISMATCH = 5
|
|
630
|
+
OUT_OF_RANGE = 6
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
class ConfigResult:
|
|
634
|
+
"""Result of a configuration operation."""
|
|
635
|
+
|
|
636
|
+
success: bool
|
|
637
|
+
error: ConfigError
|
|
638
|
+
message: str
|
|
639
|
+
|
|
640
|
+
def __init__(self) -> None: ...
|
|
641
|
+
def __bool__(self) -> bool: ...
|
|
642
|
+
def __repr__(self) -> str: ...
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
class OutputConfig:
|
|
646
|
+
"""Output configuration options."""
|
|
647
|
+
|
|
648
|
+
top_n_results: int
|
|
649
|
+
save_partials: bool
|
|
650
|
+
include_paths: bool
|
|
651
|
+
metrics_verbosity: str
|
|
652
|
+
|
|
653
|
+
def __init__(self) -> None: ...
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
class PerformanceConfig:
|
|
657
|
+
"""Performance configuration options."""
|
|
658
|
+
|
|
659
|
+
max_parallel_workers: int
|
|
660
|
+
cuda_streams: int
|
|
661
|
+
batch_size: int
|
|
662
|
+
|
|
663
|
+
def __init__(self) -> None: ...
|
|
664
|
+
|
|
665
|
+
|
|
666
|
+
class EtbConfig:
|
|
667
|
+
"""Complete configuration for the etb library."""
|
|
668
|
+
|
|
669
|
+
signature_dictionary_path: str
|
|
670
|
+
early_stopping: EarlyStoppingConfig
|
|
671
|
+
heuristic_weights: HeuristicWeights
|
|
672
|
+
scoring_weights: ScoringWeights
|
|
673
|
+
bit_pruning: BitPruningConfig
|
|
674
|
+
memoization: MemoizationConfig
|
|
675
|
+
output: OutputConfig
|
|
676
|
+
performance: PerformanceConfig
|
|
677
|
+
entropy_min: float
|
|
678
|
+
entropy_max: float
|
|
679
|
+
min_printable_ratio: float
|
|
680
|
+
max_null_run: int
|
|
681
|
+
|
|
682
|
+
def __init__(self) -> None: ...
|
|
683
|
+
def validate(self) -> ConfigResult:
|
|
684
|
+
"""Validate the entire configuration."""
|
|
685
|
+
...
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
class ConfigManager:
|
|
689
|
+
"""Configuration loader and manager."""
|
|
690
|
+
|
|
691
|
+
@staticmethod
|
|
692
|
+
def instance() -> "ConfigManager":
|
|
693
|
+
"""Get the singleton instance."""
|
|
694
|
+
...
|
|
695
|
+
def load_json(self, filepath: str) -> ConfigResult:
|
|
696
|
+
"""Load configuration from a JSON file."""
|
|
697
|
+
...
|
|
698
|
+
def load_json_string(self, json_content: str) -> ConfigResult:
|
|
699
|
+
"""Load configuration from a JSON string."""
|
|
700
|
+
...
|
|
701
|
+
def load_yaml(self, filepath: str) -> ConfigResult:
|
|
702
|
+
"""Load configuration from a YAML file."""
|
|
703
|
+
...
|
|
704
|
+
def load_yaml_string(self, yaml_content: str) -> ConfigResult:
|
|
705
|
+
"""Load configuration from a YAML string."""
|
|
706
|
+
...
|
|
707
|
+
def get_config(self) -> EtbConfig:
|
|
708
|
+
"""Get the current configuration."""
|
|
709
|
+
...
|
|
710
|
+
def set_config(self, config: EtbConfig) -> ConfigResult:
|
|
711
|
+
"""Set the configuration."""
|
|
712
|
+
...
|
|
713
|
+
def update_value(self, key: str, value: str) -> ConfigResult:
|
|
714
|
+
"""Update a specific configuration value at runtime."""
|
|
715
|
+
...
|
|
716
|
+
def reload(self) -> ConfigResult:
|
|
717
|
+
"""Reload configuration from the last loaded file."""
|
|
718
|
+
...
|
|
719
|
+
def is_loaded(self) -> bool: ...
|
|
720
|
+
def get_loaded_path(self) -> str: ...
|
|
721
|
+
def reset_to_defaults(self) -> None: ...
|
|
722
|
+
def save_json(self, filepath: str) -> ConfigResult: ...
|
|
723
|
+
def save_yaml(self, filepath: str) -> ConfigResult: ...
|
|
724
|
+
def to_json_string(self) -> str: ...
|
|
725
|
+
def to_yaml_string(self) -> str: ...
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def load_config(filepath: str) -> ConfigResult:
|
|
729
|
+
"""Load configuration from file (auto-detects format)."""
|
|
730
|
+
...
|
|
731
|
+
|
|
732
|
+
def get_default_config() -> EtbConfig:
|
|
733
|
+
"""Get the default configuration."""
|
|
734
|
+
...
|
|
735
|
+
|
|
736
|
+
|
|
737
|
+
# ============================================================================
|
|
738
|
+
# High-Level Extract Function
|
|
739
|
+
# ============================================================================
|
|
740
|
+
|
|
741
|
+
def extract(
|
|
742
|
+
input: Union[bytes, str],
|
|
743
|
+
config: EtbConfig = ...,
|
|
744
|
+
max_paths: int = 1000000
|
|
745
|
+
) -> List[Candidate]:
|
|
746
|
+
"""
|
|
747
|
+
Extract hidden data from input bytes using bit-level reconstruction.
|
|
748
|
+
|
|
749
|
+
Args:
|
|
750
|
+
input: Input data as bytes or a file path string
|
|
751
|
+
config: EtbConfig object with extraction parameters
|
|
752
|
+
max_paths: Maximum number of paths to evaluate (default: 1,000,000)
|
|
753
|
+
|
|
754
|
+
Returns:
|
|
755
|
+
List of Candidate objects sorted by score (highest first)
|
|
756
|
+
|
|
757
|
+
Example:
|
|
758
|
+
>>> import etb
|
|
759
|
+
>>> candidates = etb.extract(b'\\x89PNG...', etb.EtbConfig())
|
|
760
|
+
>>> for c in candidates:
|
|
761
|
+
... print(f"{c.format_name}: {c.confidence:.2f}")
|
|
762
|
+
"""
|
|
763
|
+
...
|
|
764
|
+
|
|
765
|
+
def extract_from_file(
|
|
766
|
+
filepath: str,
|
|
767
|
+
config: EtbConfig = ...,
|
|
768
|
+
max_paths: int = 1000000
|
|
769
|
+
) -> List[Candidate]:
|
|
770
|
+
"""Extract hidden data from a file."""
|
|
771
|
+
...
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
# ============================================================================
|
|
775
|
+
# Reporting System
|
|
776
|
+
# ============================================================================
|
|
777
|
+
|
|
778
|
+
class ValidationReport:
|
|
779
|
+
"""Validation report for a successful extraction."""
|
|
780
|
+
|
|
781
|
+
signature_valid: bool
|
|
782
|
+
structure_valid: bool
|
|
783
|
+
heuristics_valid: bool
|
|
784
|
+
overall_validity: float
|
|
785
|
+
validation_notes: str
|
|
786
|
+
|
|
787
|
+
def __init__(self) -> None: ...
|
|
788
|
+
def __repr__(self) -> str: ...
|
|
789
|
+
|
|
790
|
+
|
|
791
|
+
class SuccessResult:
|
|
792
|
+
"""Success result containing extracted data and metadata."""
|
|
793
|
+
|
|
794
|
+
extracted_bytes: List[int]
|
|
795
|
+
detected_format: str
|
|
796
|
+
format_category: str
|
|
797
|
+
confidence: float
|
|
798
|
+
reconstruction_path: Path
|
|
799
|
+
validation: ValidationReport
|
|
800
|
+
heuristics: HeuristicResult
|
|
801
|
+
signature_match: SignatureMatch
|
|
802
|
+
|
|
803
|
+
def __init__(self) -> None: ...
|
|
804
|
+
def get_data_bytes(self) -> bytes:
|
|
805
|
+
"""Get extracted data as Python bytes."""
|
|
806
|
+
...
|
|
807
|
+
def __repr__(self) -> str: ...
|
|
808
|
+
|
|
809
|
+
|
|
810
|
+
class PartialMatch:
|
|
811
|
+
"""Partial match information for failed extractions."""
|
|
812
|
+
|
|
813
|
+
partial_data: List[int]
|
|
814
|
+
possible_format: str
|
|
815
|
+
partial_score: float
|
|
816
|
+
depth_reached: int
|
|
817
|
+
failure_reason: str
|
|
818
|
+
|
|
819
|
+
def __init__(self) -> None: ...
|
|
820
|
+
|
|
821
|
+
|
|
822
|
+
class ParameterSuggestion:
|
|
823
|
+
"""Suggestion for parameter adjustment when extraction fails."""
|
|
824
|
+
|
|
825
|
+
parameter_name: str
|
|
826
|
+
current_value: str
|
|
827
|
+
suggested_value: str
|
|
828
|
+
rationale: str
|
|
829
|
+
|
|
830
|
+
def __init__(
|
|
831
|
+
self,
|
|
832
|
+
parameter_name: str = "",
|
|
833
|
+
current_value: str = "",
|
|
834
|
+
suggested_value: str = "",
|
|
835
|
+
rationale: str = ""
|
|
836
|
+
) -> None: ...
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
class FailureResult:
|
|
840
|
+
"""Failure result containing diagnostic information."""
|
|
841
|
+
|
|
842
|
+
paths_explored: int
|
|
843
|
+
effective_depth_reached: int
|
|
844
|
+
best_partials: List[PartialMatch]
|
|
845
|
+
suggestions: List[ParameterSuggestion]
|
|
846
|
+
failure_summary: str
|
|
847
|
+
|
|
848
|
+
def __init__(self) -> None: ...
|
|
849
|
+
def __repr__(self) -> str: ...
|
|
850
|
+
|
|
851
|
+
|
|
852
|
+
class ExtractionMetrics:
|
|
853
|
+
"""Extraction metrics for reporting."""
|
|
854
|
+
|
|
855
|
+
total_paths_possible: int
|
|
856
|
+
paths_evaluated: int
|
|
857
|
+
paths_pruned_level1: int
|
|
858
|
+
paths_pruned_level2: int
|
|
859
|
+
paths_pruned_level3: int
|
|
860
|
+
paths_pruned_prefix: int
|
|
861
|
+
effective_branching_factor: float
|
|
862
|
+
effective_depth: float
|
|
863
|
+
cache_hit_rate: float
|
|
864
|
+
level1_prune_rate: float
|
|
865
|
+
level2_prune_rate: float
|
|
866
|
+
level3_prune_rate: float
|
|
867
|
+
prefix_prune_rate: float
|
|
868
|
+
format_distribution: List[Tuple[str, int]]
|
|
869
|
+
wall_clock_seconds: float
|
|
870
|
+
average_time_per_path_us: float
|
|
871
|
+
gpu_utilization: float
|
|
872
|
+
complexity_reduction: str
|
|
873
|
+
|
|
874
|
+
def __init__(self) -> None: ...
|
|
875
|
+
|
|
876
|
+
|
|
877
|
+
class ExtractionResult:
|
|
878
|
+
"""Complete extraction result combining success/failure with metrics."""
|
|
879
|
+
|
|
880
|
+
success: bool
|
|
881
|
+
candidates: List[SuccessResult]
|
|
882
|
+
failure: Optional[FailureResult]
|
|
883
|
+
metrics: ExtractionMetrics
|
|
884
|
+
|
|
885
|
+
def __init__(self) -> None: ...
|
|
886
|
+
def __repr__(self) -> str: ...
|
|
887
|
+
|
|
888
|
+
|
|
889
|
+
class SuccessResultBuilder:
|
|
890
|
+
"""Builder for success results."""
|
|
891
|
+
|
|
892
|
+
def __init__(self) -> None: ...
|
|
893
|
+
def set_data(self, data: List[int]) -> "SuccessResultBuilder": ...
|
|
894
|
+
def set_format(self, format_name: str, category: str = "") -> "SuccessResultBuilder": ...
|
|
895
|
+
def set_confidence(self, confidence: float) -> "SuccessResultBuilder": ...
|
|
896
|
+
def set_path(self, path: Path) -> "SuccessResultBuilder": ...
|
|
897
|
+
def set_heuristics(self, heuristics: HeuristicResult) -> "SuccessResultBuilder": ...
|
|
898
|
+
def set_signature_match(self, match: SignatureMatch) -> "SuccessResultBuilder": ...
|
|
899
|
+
def set_structural_validation(self, structure: StructuralValidation) -> "SuccessResultBuilder": ...
|
|
900
|
+
def build_validation_report(self) -> "SuccessResultBuilder": ...
|
|
901
|
+
def build(self) -> SuccessResult: ...
|
|
902
|
+
@staticmethod
|
|
903
|
+
def from_candidate(candidate: Candidate) -> SuccessResult: ...
|
|
904
|
+
|
|
905
|
+
|
|
906
|
+
class FailureResultBuilder:
|
|
907
|
+
"""Builder for failure results."""
|
|
908
|
+
|
|
909
|
+
def __init__(self) -> None: ...
|
|
910
|
+
def set_paths_explored(self, count: int) -> "FailureResultBuilder": ...
|
|
911
|
+
def set_effective_depth(self, depth: int) -> "FailureResultBuilder": ...
|
|
912
|
+
def add_partial_match(self, partial: PartialMatch) -> "FailureResultBuilder": ...
|
|
913
|
+
def add_partial_from_candidate(
|
|
914
|
+
self, candidate: Candidate, failure_reason: str
|
|
915
|
+
) -> "FailureResultBuilder": ...
|
|
916
|
+
def add_suggestion(self, suggestion: ParameterSuggestion) -> "FailureResultBuilder": ...
|
|
917
|
+
def generate_suggestions(self, metrics: ExtractionMetrics) -> "FailureResultBuilder": ...
|
|
918
|
+
def set_summary(self, summary: str) -> "FailureResultBuilder": ...
|
|
919
|
+
def generate_summary(self) -> "FailureResultBuilder": ...
|
|
920
|
+
def build(self) -> FailureResult: ...
|
|
921
|
+
|
|
922
|
+
|
|
923
|
+
class MetricsReporter:
|
|
924
|
+
"""Metrics reporter for extraction results."""
|
|
925
|
+
|
|
926
|
+
def __init__(self) -> None: ...
|
|
927
|
+
def set_total_paths_possible(self, count: int) -> "MetricsReporter": ...
|
|
928
|
+
def set_paths_evaluated(self, count: int) -> "MetricsReporter": ...
|
|
929
|
+
def set_paths_pruned_level1(self, count: int) -> "MetricsReporter": ...
|
|
930
|
+
def set_paths_pruned_level2(self, count: int) -> "MetricsReporter": ...
|
|
931
|
+
def set_paths_pruned_level3(self, count: int) -> "MetricsReporter": ...
|
|
932
|
+
def set_paths_pruned_prefix(self, count: int) -> "MetricsReporter": ...
|
|
933
|
+
def set_effective_branching_factor(self, factor: float) -> "MetricsReporter": ...
|
|
934
|
+
def set_effective_depth(self, depth: float) -> "MetricsReporter": ...
|
|
935
|
+
def set_cache_hit_rate(self, rate: float) -> "MetricsReporter": ...
|
|
936
|
+
def add_format_detection(self, format: str, count: int = 1) -> "MetricsReporter": ...
|
|
937
|
+
def set_wall_clock_time(self, seconds: float) -> "MetricsReporter": ...
|
|
938
|
+
def set_gpu_utilization(self, utilization: float) -> "MetricsReporter": ...
|
|
939
|
+
def calculate_derived_metrics(self) -> "MetricsReporter": ...
|
|
940
|
+
def generate_complexity_reduction(self, input_length: int) -> "MetricsReporter": ...
|
|
941
|
+
def build(self) -> ExtractionMetrics: ...
|
|
942
|
+
def to_string(self, verbosity: str = "full") -> str: ...
|
|
943
|
+
|
|
944
|
+
|
|
945
|
+
class ExtractionResultBuilder:
|
|
946
|
+
"""Builder for complete extraction results."""
|
|
947
|
+
|
|
948
|
+
def __init__(self) -> None: ...
|
|
949
|
+
def set_success(self, success: bool) -> "ExtractionResultBuilder": ...
|
|
950
|
+
def add_candidate(self, result: SuccessResult) -> "ExtractionResultBuilder": ...
|
|
951
|
+
def add_candidates(self, candidates: List[Candidate]) -> "ExtractionResultBuilder": ...
|
|
952
|
+
def set_failure(self, failure: FailureResult) -> "ExtractionResultBuilder": ...
|
|
953
|
+
def set_metrics(self, metrics: ExtractionMetrics) -> "ExtractionResultBuilder": ...
|
|
954
|
+
def build(self) -> ExtractionResult: ...
|
|
955
|
+
|
|
956
|
+
|
|
957
|
+
# Utility functions
|
|
958
|
+
def format_path(path: Path, max_coords: int = 10) -> str:
|
|
959
|
+
"""Format a path as a human-readable string."""
|
|
960
|
+
...
|
|
961
|
+
|
|
962
|
+
def format_bytes_hex(data: bytes, max_bytes: int = 32) -> str:
|
|
963
|
+
"""Format bytes as a hex string."""
|
|
964
|
+
...
|
|
965
|
+
|
|
966
|
+
def format_confidence(confidence: float) -> str:
|
|
967
|
+
"""Format a confidence score as a percentage string."""
|
|
968
|
+
...
|
|
969
|
+
|
|
970
|
+
def format_duration(seconds: float) -> str:
|
|
971
|
+
"""Format a duration in human-readable form."""
|
|
972
|
+
...
|
|
973
|
+
|
|
974
|
+
def format_count(count: int) -> str:
|
|
975
|
+
"""Format a large number with appropriate suffix (K, M, B)."""
|
|
976
|
+
...
|