oscura 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oscura/__init__.py +1 -1
- oscura/__main__.py +4 -0
- oscura/analyzers/binary/__init__.py +36 -0
- oscura/analyzers/binary/core/__init__.py +29 -0
- oscura/analyzers/binary/core/file_access.py +193 -0
- oscura/analyzers/binary/core/pipeline.py +161 -0
- oscura/analyzers/binary/core/results.py +217 -0
- oscura/analyzers/binary/detection/__init__.py +10 -0
- oscura/analyzers/binary/detection/encoding.py +624 -0
- oscura/analyzers/binary/detection/patterns.py +320 -0
- oscura/analyzers/binary/detection/structure.py +630 -0
- oscura/analyzers/binary/export/__init__.py +9 -0
- oscura/analyzers/binary/export/dissector.py +174 -0
- oscura/analyzers/binary/inference/__init__.py +15 -0
- oscura/analyzers/binary/inference/checksums.py +214 -0
- oscura/analyzers/binary/inference/fields.py +150 -0
- oscura/analyzers/binary/inference/sequences.py +232 -0
- oscura/analyzers/binary/inference/timestamps.py +210 -0
- oscura/analyzers/binary/visualization/__init__.py +9 -0
- oscura/analyzers/binary/visualization/structure_view.py +182 -0
- oscura/analyzers/ml/signal_classifier.py +6 -0
- oscura/analyzers/waveform/spectral.py +18 -11
- oscura/automotive/__init__.py +1 -1
- oscura/automotive/flexray/fibex.py +9 -1
- oscura/loaders/__init__.py +4 -1
- oscura/loaders/binary.py +284 -1
- oscura/loaders/validation.py +17 -10
- oscura/sessions/legacy.py +110 -1
- oscura/workflows/batch/aggregate.py +5 -1
- oscura-0.12.0.dist-info/METADATA +460 -0
- {oscura-0.10.0.dist-info → oscura-0.12.0.dist-info}/RECORD +34 -16
- oscura-0.10.0.dist-info/METADATA +0 -641
- {oscura-0.10.0.dist-info → oscura-0.12.0.dist-info}/WHEEL +0 -0
- {oscura-0.10.0.dist-info → oscura-0.12.0.dist-info}/entry_points.txt +0 -0
- {oscura-0.10.0.dist-info → oscura-0.12.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -17,6 +17,7 @@ References:
|
|
|
17
17
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
|
+
import threading
|
|
20
21
|
from functools import lru_cache
|
|
21
22
|
from typing import TYPE_CHECKING, Any, Literal
|
|
22
23
|
|
|
@@ -32,8 +33,9 @@ if TYPE_CHECKING:
|
|
|
32
33
|
|
|
33
34
|
from oscura.core.types import MeasurementResult, WaveformTrace
|
|
34
35
|
|
|
35
|
-
# Global FFT cache statistics
|
|
36
|
+
# Global FFT cache statistics (thread-safe)
|
|
36
37
|
_fft_cache_stats = {"hits": 0, "misses": 0, "size": 128}
|
|
38
|
+
_fft_cache_lock = threading.Lock()
|
|
37
39
|
|
|
38
40
|
|
|
39
41
|
def get_fft_cache_stats() -> dict[str, int]:
|
|
@@ -46,7 +48,8 @@ def get_fft_cache_stats() -> dict[str, int]:
|
|
|
46
48
|
>>> stats = get_fft_cache_stats()
|
|
47
49
|
>>> print(f"Cache hit rate: {stats['hits'] / (stats['hits'] + stats['misses']):.1%}")
|
|
48
50
|
"""
|
|
49
|
-
|
|
51
|
+
with _fft_cache_lock:
|
|
52
|
+
return _fft_cache_stats.copy()
|
|
50
53
|
|
|
51
54
|
|
|
52
55
|
def clear_fft_cache() -> None:
|
|
@@ -58,8 +61,9 @@ def clear_fft_cache() -> None:
|
|
|
58
61
|
>>> clear_fft_cache() # Clear cached FFT results
|
|
59
62
|
"""
|
|
60
63
|
_compute_fft_cached.cache_clear()
|
|
61
|
-
|
|
62
|
-
|
|
64
|
+
with _fft_cache_lock:
|
|
65
|
+
_fft_cache_stats["hits"] = 0
|
|
66
|
+
_fft_cache_stats["misses"] = 0
|
|
63
67
|
|
|
64
68
|
|
|
65
69
|
def configure_fft_cache(size: int) -> None:
|
|
@@ -72,11 +76,12 @@ def configure_fft_cache(size: int) -> None:
|
|
|
72
76
|
>>> configure_fft_cache(256) # Increase cache size for better hit rate
|
|
73
77
|
"""
|
|
74
78
|
global _compute_fft_cached
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
79
|
+
with _fft_cache_lock:
|
|
80
|
+
_fft_cache_stats["size"] = size
|
|
81
|
+
# Recreate cache with new size
|
|
82
|
+
_compute_fft_cached = lru_cache(maxsize=size)(_compute_fft_impl)
|
|
83
|
+
_fft_cache_stats["hits"] = 0
|
|
84
|
+
_fft_cache_stats["misses"] = 0
|
|
80
85
|
|
|
81
86
|
|
|
82
87
|
def _compute_fft_impl(
|
|
@@ -270,7 +275,8 @@ def _fft_cached_path(
|
|
|
270
275
|
freq, magnitude_db, phase = _compute_fft_cached(
|
|
271
276
|
data_bytes, n, window, nfft_computed, detrend, sample_rate
|
|
272
277
|
)
|
|
273
|
-
|
|
278
|
+
with _fft_cache_lock:
|
|
279
|
+
_fft_cache_stats["hits"] += 1
|
|
274
280
|
|
|
275
281
|
if return_phase:
|
|
276
282
|
return freq, magnitude_db, phase
|
|
@@ -302,7 +308,8 @@ def _fft_direct_path(
|
|
|
302
308
|
Returns:
|
|
303
309
|
FFT results (with or without phase).
|
|
304
310
|
"""
|
|
305
|
-
|
|
311
|
+
with _fft_cache_lock:
|
|
312
|
+
_fft_cache_stats["misses"] += 1
|
|
306
313
|
|
|
307
314
|
w = get_window(window, n)
|
|
308
315
|
data_windowed = data_processed * w
|
oscura/automotive/__init__.py
CHANGED
|
@@ -299,7 +299,15 @@ class FIBEXImporter:
|
|
|
299
299
|
if not fibex_path.exists():
|
|
300
300
|
raise FileNotFoundError(f"FIBEX file not found: {fibex_path}")
|
|
301
301
|
|
|
302
|
-
|
|
302
|
+
# SEC-004: Protect against XXE attacks by disabling entity expansion
|
|
303
|
+
parser = ET.XMLParser()
|
|
304
|
+
try:
|
|
305
|
+
# Python < 3.12: entity attribute is writable
|
|
306
|
+
parser.entity = {} # type: ignore[misc]
|
|
307
|
+
except AttributeError:
|
|
308
|
+
# Python >= 3.12: entity attribute is read-only, default behavior is safe
|
|
309
|
+
pass
|
|
310
|
+
tree = ET.parse(fibex_path, parser=parser)
|
|
303
311
|
root = tree.getroot()
|
|
304
312
|
|
|
305
313
|
# Extract cluster configuration
|
oscura/loaders/__init__.py
CHANGED
|
@@ -41,6 +41,7 @@ _LOADER_REGISTRY: dict[str, tuple[str, str]] = {
|
|
|
41
41
|
"tdms": ("oscura.loaders.tdms", "load_tdms"),
|
|
42
42
|
"touchstone": ("oscura.loaders.touchstone", "load_touchstone"),
|
|
43
43
|
"chipwhisperer": ("oscura.loaders.chipwhisperer", "load_chipwhisperer"),
|
|
44
|
+
"binary": ("oscura.loaders.binary", "load_binary_auto"),
|
|
44
45
|
}
|
|
45
46
|
|
|
46
47
|
|
|
@@ -96,7 +97,7 @@ from oscura.loaders import (
|
|
|
96
97
|
csv,
|
|
97
98
|
hdf5,
|
|
98
99
|
)
|
|
99
|
-
from oscura.loaders.binary import load_binary
|
|
100
|
+
from oscura.loaders.binary import load_binary, load_binary_auto
|
|
100
101
|
|
|
101
102
|
# Import configurable binary loading functionality
|
|
102
103
|
from oscura.loaders.configurable import (
|
|
@@ -182,6 +183,7 @@ logger = logging.getLogger(__name__)
|
|
|
182
183
|
SUPPORTED_FORMATS: dict[str, str] = {
|
|
183
184
|
".wfm": "auto_wfm", # Auto-detect Tektronix vs Rigol
|
|
184
185
|
".tss": "tss", # Tektronix session files
|
|
186
|
+
".bin": "binary", # Raw binary with auto-detection
|
|
185
187
|
".npz": "numpy",
|
|
186
188
|
".csv": "csv",
|
|
187
189
|
".h5": "hdf5",
|
|
@@ -655,6 +657,7 @@ __all__ = [
|
|
|
655
657
|
"load_all_channels",
|
|
656
658
|
"load_auto",
|
|
657
659
|
"load_binary",
|
|
660
|
+
"load_binary_auto",
|
|
658
661
|
"load_binary_packets",
|
|
659
662
|
"load_lazy",
|
|
660
663
|
"load_packets_streaming",
|
oscura/loaders/binary.py
CHANGED
|
@@ -153,4 +153,287 @@ def _load_binary_mmap(
|
|
|
153
153
|
mm.close()
|
|
154
154
|
|
|
155
155
|
|
|
156
|
-
|
|
156
|
+
def detect_binary_dtype(
|
|
157
|
+
path: str | PathLike[str], sample_size: int = 8192
|
|
158
|
+
) -> tuple[str, dict[str, Any]]:
|
|
159
|
+
"""Auto-detect most likely dtype for binary file using intelligent multi-heuristic analysis.
|
|
160
|
+
|
|
161
|
+
Performs comprehensive analysis including:
|
|
162
|
+
- Multi-location sampling (beginning, middle, end)
|
|
163
|
+
- Byte entropy and distribution analysis
|
|
164
|
+
- IEEE 754 floating point pattern detection
|
|
165
|
+
- Value range validation for each dtype
|
|
166
|
+
- Alignment and padding pattern detection
|
|
167
|
+
- Statistical confidence scoring
|
|
168
|
+
|
|
169
|
+
Designed to handle completely unknown binary formats with no prior knowledge.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
path: Path to binary file.
|
|
173
|
+
sample_size: Bytes to sample per location (default: 8KB).
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Tuple of (detected_dtype, confidence_scores).
|
|
177
|
+
confidence_scores maps each dtype to its normalized confidence (0-1).
|
|
178
|
+
|
|
179
|
+
Example:
|
|
180
|
+
>>> dtype, confidence = detect_binary_dtype("unknown.bin")
|
|
181
|
+
>>> print(f"Detected: {dtype} (confidence: {confidence[dtype]:.1%})")
|
|
182
|
+
Detected: uint16 (confidence: 85.3%)
|
|
183
|
+
"""
|
|
184
|
+
path = Path(path)
|
|
185
|
+
file_size = path.stat().st_size
|
|
186
|
+
|
|
187
|
+
# Sample from multiple locations for robust detection
|
|
188
|
+
samples_to_check = []
|
|
189
|
+
with open(path, "rb") as f:
|
|
190
|
+
# Beginning
|
|
191
|
+
samples_to_check.append(f.read(min(sample_size, file_size)))
|
|
192
|
+
|
|
193
|
+
# Middle (if large enough)
|
|
194
|
+
if file_size > sample_size * 2:
|
|
195
|
+
f.seek(file_size // 2)
|
|
196
|
+
samples_to_check.append(f.read(min(sample_size, file_size - f.tell())))
|
|
197
|
+
|
|
198
|
+
# End (if large enough)
|
|
199
|
+
if file_size > sample_size * 3:
|
|
200
|
+
f.seek(max(0, file_size - sample_size))
|
|
201
|
+
samples_to_check.append(f.read())
|
|
202
|
+
|
|
203
|
+
sample = b"".join(samples_to_check)
|
|
204
|
+
|
|
205
|
+
if len(sample) < 16:
|
|
206
|
+
return "uint8", {"uint8": 1.0}
|
|
207
|
+
|
|
208
|
+
from collections import Counter
|
|
209
|
+
|
|
210
|
+
# Byte entropy calculation
|
|
211
|
+
byte_counts = Counter(sample)
|
|
212
|
+
total = len(sample)
|
|
213
|
+
entropy = -sum((count / total) * np.log2(count / total) for count in byte_counts.values())
|
|
214
|
+
zero_density = sample.count(b"\x00") / len(sample)
|
|
215
|
+
|
|
216
|
+
# Score each dtype possibility
|
|
217
|
+
scores: dict[str, float] = {
|
|
218
|
+
"uint8": 0.0,
|
|
219
|
+
"int8": 0.0,
|
|
220
|
+
"uint16": 0.0,
|
|
221
|
+
"int16": 0.0,
|
|
222
|
+
"uint32": 0.0,
|
|
223
|
+
"int32": 0.0,
|
|
224
|
+
"float32": 0.0,
|
|
225
|
+
"float64": 0.0,
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
# Test 1: IEEE 754 floating point validation
|
|
229
|
+
float32_valid = 0
|
|
230
|
+
for i in range(0, min(len(sample) - 3, 4096), 4):
|
|
231
|
+
try:
|
|
232
|
+
val = np.frombuffer(sample[i : i + 4], dtype=np.float32)[0]
|
|
233
|
+
if np.isfinite(val) and -1e10 < val < 1e10:
|
|
234
|
+
float32_valid += 1
|
|
235
|
+
except Exception:
|
|
236
|
+
pass
|
|
237
|
+
|
|
238
|
+
float64_valid = 0
|
|
239
|
+
for i in range(0, min(len(sample) - 7, 4096), 8):
|
|
240
|
+
try:
|
|
241
|
+
val = np.frombuffer(sample[i : i + 8], dtype=np.float64)[0]
|
|
242
|
+
if np.isfinite(val) and -1e10 < val < 1e10:
|
|
243
|
+
float64_valid += 1
|
|
244
|
+
except Exception:
|
|
245
|
+
pass
|
|
246
|
+
|
|
247
|
+
scores["float32"] = (float32_valid / (min(len(sample), 4096) / 4)) * 3.0
|
|
248
|
+
scores["float64"] = (float64_valid / (min(len(sample), 4096) / 8)) * 3.0
|
|
249
|
+
|
|
250
|
+
# Test 2: Entropy-based scoring
|
|
251
|
+
if entropy > 7.0:
|
|
252
|
+
scores["float32"] += 2.0
|
|
253
|
+
scores["float64"] += 2.0
|
|
254
|
+
elif entropy > 6.0:
|
|
255
|
+
scores["int32"] += 1.5
|
|
256
|
+
scores["uint32"] += 1.5
|
|
257
|
+
elif entropy > 4.5:
|
|
258
|
+
scores["int16"] += 2.0
|
|
259
|
+
scores["uint16"] += 2.0
|
|
260
|
+
else:
|
|
261
|
+
scores["int8"] += 2.0
|
|
262
|
+
scores["uint8"] += 2.0
|
|
263
|
+
|
|
264
|
+
# Test 3: Zero density (structured data indicator)
|
|
265
|
+
if zero_density > 0.6:
|
|
266
|
+
scores["int16"] += 1.5
|
|
267
|
+
scores["uint16"] += 1.5
|
|
268
|
+
elif zero_density > 0.4:
|
|
269
|
+
scores["int16"] += 1.0
|
|
270
|
+
scores["uint16"] += 1.0
|
|
271
|
+
|
|
272
|
+
# Test 4: Value range reasonableness
|
|
273
|
+
uint8_reasonable = sum(1 for b in sample[: min(1000, len(sample))] if b < 128) / min(
|
|
274
|
+
1000, len(sample)
|
|
275
|
+
)
|
|
276
|
+
if uint8_reasonable > 0.8:
|
|
277
|
+
scores["uint8"] += 1.5
|
|
278
|
+
|
|
279
|
+
# Find best dtype
|
|
280
|
+
best_dtype = max(scores.items(), key=lambda x: x[1])[0]
|
|
281
|
+
|
|
282
|
+
# Normalize confidence scores
|
|
283
|
+
max_score = max(scores.values()) if scores.values() else 1.0
|
|
284
|
+
confidence = {k: v / max_score for k, v in scores.items()} if max_score > 0 else scores
|
|
285
|
+
|
|
286
|
+
return best_dtype, confidence
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def detect_packet_structure(path: str | PathLike[str], sample_size: int = 8192) -> tuple[bool, int]:
|
|
290
|
+
"""Detect if binary file contains structured packet data.
|
|
291
|
+
|
|
292
|
+
Looks for repeating header patterns and regular spacing indicating
|
|
293
|
+
packet boundaries.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
path: Path to binary file.
|
|
297
|
+
sample_size: Number of bytes to sample for detection.
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
Tuple of (is_packet_data, packet_size_estimate).
|
|
301
|
+
packet_size_estimate is 0 if not packet data.
|
|
302
|
+
|
|
303
|
+
Example:
|
|
304
|
+
>>> is_packets, size = detect_packet_structure("capture.bin")
|
|
305
|
+
>>> if is_packets:
|
|
306
|
+
... print(f"Detected packet structure with ~{size} byte packets")
|
|
307
|
+
"""
|
|
308
|
+
path = Path(path)
|
|
309
|
+
|
|
310
|
+
with open(path, "rb") as f:
|
|
311
|
+
sample = f.read(sample_size)
|
|
312
|
+
|
|
313
|
+
if len(sample) < 512:
|
|
314
|
+
return False, 0
|
|
315
|
+
|
|
316
|
+
# Look for sequence numbers (common in packet headers)
|
|
317
|
+
# Check for patterns like: 00 00, 01 00, 02 00, 03 00 (little-endian sequence)
|
|
318
|
+
sequence_positions = []
|
|
319
|
+
for seq_byte in range(10): # Check first 10 sequence numbers
|
|
320
|
+
pattern = seq_byte.to_bytes(1, "little") + b"\x00"
|
|
321
|
+
pos = sample.find(pattern)
|
|
322
|
+
if pos != -1:
|
|
323
|
+
sequence_positions.append(pos)
|
|
324
|
+
|
|
325
|
+
# If we found multiple sequence numbers at regular intervals = likely packets
|
|
326
|
+
if len(sequence_positions) >= 3:
|
|
327
|
+
# Calculate intervals between sequence numbers
|
|
328
|
+
intervals = [
|
|
329
|
+
sequence_positions[i + 1] - sequence_positions[i]
|
|
330
|
+
for i in range(len(sequence_positions) - 1)
|
|
331
|
+
]
|
|
332
|
+
|
|
333
|
+
# Check if intervals are consistent (within 10% variation)
|
|
334
|
+
if intervals:
|
|
335
|
+
avg_interval = sum(intervals) / len(intervals)
|
|
336
|
+
variation = max(abs(i - avg_interval) for i in intervals) / avg_interval
|
|
337
|
+
|
|
338
|
+
if variation < 0.1 and 100 < avg_interval < 10000:
|
|
339
|
+
# Consistent spacing in reasonable range = packet structure
|
|
340
|
+
return True, int(avg_interval)
|
|
341
|
+
|
|
342
|
+
# Look for repeating byte patterns (common header markers)
|
|
343
|
+
# Check 4-byte patterns that repeat regularly
|
|
344
|
+
pattern_positions: dict[bytes, list[int]] = {}
|
|
345
|
+
for i in range(0, min(1024, len(sample) - 4), 4):
|
|
346
|
+
pattern = sample[i : i + 4]
|
|
347
|
+
if pattern not in pattern_positions:
|
|
348
|
+
pattern_positions[pattern] = []
|
|
349
|
+
pattern_positions[pattern].append(i)
|
|
350
|
+
|
|
351
|
+
# Find patterns that repeat with consistent spacing
|
|
352
|
+
for pattern, positions in pattern_positions.items():
|
|
353
|
+
if len(positions) >= 3 and pattern != b"\x00\x00\x00\x00":
|
|
354
|
+
intervals = [positions[i + 1] - positions[i] for i in range(len(positions) - 1)]
|
|
355
|
+
if intervals:
|
|
356
|
+
avg_interval = sum(intervals) / len(intervals)
|
|
357
|
+
variation = (
|
|
358
|
+
max(abs(i - avg_interval) for i in intervals) / avg_interval
|
|
359
|
+
if intervals
|
|
360
|
+
else 1.0
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
if variation < 0.1 and 100 < avg_interval < 10000:
|
|
364
|
+
return True, int(avg_interval)
|
|
365
|
+
|
|
366
|
+
return False, 0
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def load_binary_auto(
|
|
370
|
+
path: str | PathLike[str],
|
|
371
|
+
*,
|
|
372
|
+
sample_rate: float | None = None,
|
|
373
|
+
max_samples: int = 100_000,
|
|
374
|
+
channels: int = 1,
|
|
375
|
+
channel: int = 0,
|
|
376
|
+
) -> WaveformTrace:
|
|
377
|
+
"""Load binary file with automatic dtype detection and intelligent defaults.
|
|
378
|
+
|
|
379
|
+
This is a smart wrapper around load_binary() that:
|
|
380
|
+
- Auto-detects dtype
|
|
381
|
+
- Limits samples to prevent memory issues
|
|
382
|
+
- Uses memory-mapped I/O for large files
|
|
383
|
+
- Detects packet structures
|
|
384
|
+
|
|
385
|
+
Designed for use with unknown binary formats where manual
|
|
386
|
+
configuration is not available.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
path: Path to binary file.
|
|
390
|
+
sample_rate: Sample rate in Hz. If None, estimates from file.
|
|
391
|
+
max_samples: Maximum number of samples to load (default: 100K).
|
|
392
|
+
channels: Number of interleaved channels.
|
|
393
|
+
channel: Channel index to load.
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
WaveformTrace with loaded data and metadata.
|
|
397
|
+
|
|
398
|
+
Example:
|
|
399
|
+
>>> trace = load_binary_auto("unknown_capture.bin")
|
|
400
|
+
>>> print(f"Loaded {len(trace.data)} samples, dtype: {trace.metadata.source_file}")
|
|
401
|
+
"""
|
|
402
|
+
path = Path(path)
|
|
403
|
+
file_size = path.stat().st_size
|
|
404
|
+
|
|
405
|
+
# Detect dtype with confidence scoring
|
|
406
|
+
dtype, confidence = detect_binary_dtype(path)
|
|
407
|
+
|
|
408
|
+
# Detect packet structure (informational)
|
|
409
|
+
is_packets, packet_size = detect_packet_structure(path)
|
|
410
|
+
|
|
411
|
+
# Estimate sample rate if not provided
|
|
412
|
+
if sample_rate is None:
|
|
413
|
+
# Common sample rates for oscilloscopes/DAQ
|
|
414
|
+
sample_rate = 1.0e6 # 1 MS/s default
|
|
415
|
+
|
|
416
|
+
# Use mmap for files > 10MB
|
|
417
|
+
use_mmap = file_size > 10 * 1024 * 1024
|
|
418
|
+
|
|
419
|
+
# Calculate potential samples based on detected dtype
|
|
420
|
+
bytes_per_sample = np.dtype(dtype).itemsize
|
|
421
|
+
potential_samples = file_size // bytes_per_sample
|
|
422
|
+
|
|
423
|
+
# Limit samples for analysis performance
|
|
424
|
+
count = min(max_samples, potential_samples)
|
|
425
|
+
|
|
426
|
+
# Load with detected parameters
|
|
427
|
+
return load_binary(
|
|
428
|
+
path,
|
|
429
|
+
dtype=dtype,
|
|
430
|
+
sample_rate=sample_rate,
|
|
431
|
+
channels=channels,
|
|
432
|
+
channel=channel,
|
|
433
|
+
offset=0,
|
|
434
|
+
count=count,
|
|
435
|
+
mmap_mode=use_mmap,
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
__all__ = ["detect_binary_dtype", "detect_packet_structure", "load_binary", "load_binary_auto"]
|
oscura/loaders/validation.py
CHANGED
|
@@ -475,24 +475,31 @@ class PacketValidator:
|
|
|
475
475
|
|
|
476
476
|
@staticmethod
|
|
477
477
|
def _crc32(data: bytes, poly: int = 0xEDB88320) -> int:
|
|
478
|
-
"""Compute CRC-32 checksum.
|
|
478
|
+
"""Compute CRC-32 checksum using native implementation.
|
|
479
479
|
|
|
480
480
|
Args:
|
|
481
481
|
data: Data to checksum.
|
|
482
482
|
poly: CRC polynomial (default: 0xEDB88320 for CRC-32).
|
|
483
|
+
Note: Only standard CRC-32 polynomial is supported by native implementation.
|
|
483
484
|
|
|
484
485
|
Returns:
|
|
485
486
|
CRC-32 value.
|
|
487
|
+
|
|
488
|
+
Note:
|
|
489
|
+
Uses zlib.crc32() for performance (~100x faster than pure Python).
|
|
490
|
+
Custom polynomials are not supported - raises ValueError if non-standard poly provided.
|
|
486
491
|
"""
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
492
|
+
import zlib
|
|
493
|
+
|
|
494
|
+
# Verify standard CRC-32 polynomial (zlib only supports this)
|
|
495
|
+
if poly != 0xEDB88320:
|
|
496
|
+
raise ValueError(
|
|
497
|
+
f"Non-standard CRC polynomial {poly:#x} not supported by native implementation. "
|
|
498
|
+
"Only standard CRC-32 (0xEDB88320) is available."
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
# zlib.crc32 returns signed int on some platforms, mask to unsigned
|
|
502
|
+
return zlib.crc32(data) & 0xFFFFFFFF
|
|
496
503
|
|
|
497
504
|
def get_statistics(self) -> ValidationStats:
|
|
498
505
|
"""Get aggregate validation statistics.
|
oscura/sessions/legacy.py
CHANGED
|
@@ -17,6 +17,8 @@ import gzip
|
|
|
17
17
|
import hashlib
|
|
18
18
|
import hmac
|
|
19
19
|
import pickle
|
|
20
|
+
import secrets
|
|
21
|
+
import threading
|
|
20
22
|
from dataclasses import dataclass, field
|
|
21
23
|
from datetime import datetime
|
|
22
24
|
from enum import Enum
|
|
@@ -25,10 +27,108 @@ from typing import Any
|
|
|
25
27
|
|
|
26
28
|
from oscura.core.exceptions import SecurityError
|
|
27
29
|
|
|
30
|
+
# Global lock for security key generation to prevent race conditions (thread-level)
|
|
31
|
+
_KEY_GENERATION_LOCK = threading.Lock()
|
|
32
|
+
|
|
33
|
+
# Try to import fcntl for process-level file locking (Unix only)
|
|
34
|
+
try:
|
|
35
|
+
import fcntl
|
|
36
|
+
|
|
37
|
+
HAS_FCNTL = True
|
|
38
|
+
except ImportError:
|
|
39
|
+
HAS_FCNTL = False
|
|
40
|
+
|
|
28
41
|
# Session file format constants
|
|
29
42
|
_SESSION_MAGIC = b"OSC1" # Magic bytes for new format with signature
|
|
30
43
|
_SESSION_SIGNATURE_SIZE = 32 # SHA256 hash size in bytes
|
|
31
|
-
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _get_security_key() -> bytes:
|
|
47
|
+
"""Get or generate per-installation session security key.
|
|
48
|
+
|
|
49
|
+
The key is generated once per installation and stored in ~/.oscura/session_key
|
|
50
|
+
with restrictive permissions (0o600). This provides better security than a
|
|
51
|
+
shared hardcoded key.
|
|
52
|
+
|
|
53
|
+
Uses both thread-level and process-level locking to prevent race conditions
|
|
54
|
+
during parallel key generation.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
32-byte security key for HMAC signing.
|
|
58
|
+
"""
|
|
59
|
+
key_file = Path.home() / ".oscura" / "session_key"
|
|
60
|
+
lock_file = Path.home() / ".oscura" / "session_key.lock"
|
|
61
|
+
|
|
62
|
+
# Thread-level lock first
|
|
63
|
+
with _KEY_GENERATION_LOCK:
|
|
64
|
+
# Check if key exists
|
|
65
|
+
if key_file.exists():
|
|
66
|
+
try:
|
|
67
|
+
return key_file.read_bytes()
|
|
68
|
+
except (OSError, PermissionError):
|
|
69
|
+
# Fall back to generating new key if can't read
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
# Create parent directory
|
|
73
|
+
key_file.parent.mkdir(parents=True, exist_ok=True)
|
|
74
|
+
|
|
75
|
+
# Process-level file lock for parallel pytest workers
|
|
76
|
+
if HAS_FCNTL:
|
|
77
|
+
# Use file locking on Unix systems
|
|
78
|
+
lock_file.parent.mkdir(parents=True, exist_ok=True)
|
|
79
|
+
with open(lock_file, "w") as lock_fd:
|
|
80
|
+
try:
|
|
81
|
+
fcntl.flock(lock_fd.fileno(), fcntl.LOCK_EX)
|
|
82
|
+
|
|
83
|
+
# Double-check after acquiring file lock
|
|
84
|
+
if key_file.exists():
|
|
85
|
+
try:
|
|
86
|
+
key = key_file.read_bytes()
|
|
87
|
+
fcntl.flock(lock_fd.fileno(), fcntl.LOCK_UN)
|
|
88
|
+
return key
|
|
89
|
+
except (OSError, PermissionError):
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
# Generate new random key
|
|
93
|
+
key = secrets.token_bytes(32)
|
|
94
|
+
|
|
95
|
+
# Write with restrictive permissions
|
|
96
|
+
try:
|
|
97
|
+
key_file.write_bytes(key)
|
|
98
|
+
key_file.chmod(0o600) # Owner read/write only
|
|
99
|
+
except (OSError, PermissionError):
|
|
100
|
+
# Can't write key file - continue with ephemeral key
|
|
101
|
+
pass
|
|
102
|
+
|
|
103
|
+
fcntl.flock(lock_fd.fileno(), fcntl.LOCK_UN)
|
|
104
|
+
return key
|
|
105
|
+
except OSError:
|
|
106
|
+
# File locking failed, continue without lock
|
|
107
|
+
pass
|
|
108
|
+
|
|
109
|
+
# Fallback without file locking (Windows or locking unavailable)
|
|
110
|
+
# Double-check one more time
|
|
111
|
+
if key_file.exists():
|
|
112
|
+
try:
|
|
113
|
+
return key_file.read_bytes()
|
|
114
|
+
except (OSError, PermissionError):
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
# Generate new random key
|
|
118
|
+
key = secrets.token_bytes(32)
|
|
119
|
+
|
|
120
|
+
# Write with restrictive permissions
|
|
121
|
+
try:
|
|
122
|
+
key_file.write_bytes(key)
|
|
123
|
+
key_file.chmod(0o600) # Owner read/write only
|
|
124
|
+
except (OSError, PermissionError):
|
|
125
|
+
# Can't write key file - continue with ephemeral key
|
|
126
|
+
pass
|
|
127
|
+
|
|
128
|
+
return key
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
_SECURITY_KEY = _get_security_key()
|
|
32
132
|
|
|
33
133
|
|
|
34
134
|
class AnnotationType(Enum):
|
|
@@ -709,6 +809,15 @@ class Session:
|
|
|
709
809
|
def load_session(path: str | Path) -> Session:
|
|
710
810
|
"""Load session from file.
|
|
711
811
|
|
|
812
|
+
This function implements HMAC-SHA256 signature verification before deserializing
|
|
813
|
+
session data to protect against tampering and malicious file modifications.
|
|
814
|
+
|
|
815
|
+
Security:
|
|
816
|
+
Session files are protected with HMAC-SHA256 signatures. Only load session
|
|
817
|
+
files from trusted sources. While HMAC verification prevents tampering,
|
|
818
|
+
the shared security key means all installations can verify each other's
|
|
819
|
+
files. Consider using per-installation keys for sensitive deployments.
|
|
820
|
+
|
|
712
821
|
Args:
|
|
713
822
|
path: Path to session file (.tks).
|
|
714
823
|
|
|
@@ -339,7 +339,11 @@ def _create_metric_plot(
|
|
|
339
339
|
plot_file.parent.mkdir(parents=True, exist_ok=True)
|
|
340
340
|
plt.savefig(plot_file)
|
|
341
341
|
else:
|
|
342
|
-
|
|
342
|
+
# Try to show, but gracefully handle non-interactive backends
|
|
343
|
+
try:
|
|
344
|
+
plt.show()
|
|
345
|
+
except Exception:
|
|
346
|
+
pass # Silently skip if backend doesn't support interactive display
|
|
343
347
|
|
|
344
348
|
|
|
345
349
|
def _plot_histogram(
|