PyPI - oscura - Versions diffs - 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

oscura 0.10.0py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

oscura/__init__.py +1 -1
oscura/__main__.py +4 -0
oscura/analyzers/binary/__init__.py +36 -0
oscura/analyzers/binary/core/__init__.py +29 -0
oscura/analyzers/binary/core/file_access.py +193 -0
oscura/analyzers/binary/core/pipeline.py +161 -0
oscura/analyzers/binary/core/results.py +217 -0
oscura/analyzers/binary/detection/__init__.py +10 -0
oscura/analyzers/binary/detection/encoding.py +624 -0
oscura/analyzers/binary/detection/patterns.py +320 -0
oscura/analyzers/binary/detection/structure.py +630 -0
oscura/analyzers/binary/export/__init__.py +9 -0
oscura/analyzers/binary/export/dissector.py +174 -0
oscura/analyzers/binary/inference/__init__.py +15 -0
oscura/analyzers/binary/inference/checksums.py +214 -0
oscura/analyzers/binary/inference/fields.py +150 -0
oscura/analyzers/binary/inference/sequences.py +232 -0
oscura/analyzers/binary/inference/timestamps.py +210 -0
oscura/analyzers/binary/visualization/__init__.py +9 -0
oscura/analyzers/binary/visualization/structure_view.py +182 -0
oscura/analyzers/ml/signal_classifier.py +6 -0
oscura/analyzers/waveform/spectral.py +18 -11
oscura/automotive/__init__.py +1 -1
oscura/automotive/flexray/fibex.py +9 -1
oscura/loaders/__init__.py +4 -1
oscura/loaders/binary.py +284 -1
oscura/loaders/validation.py +17 -10
oscura/sessions/legacy.py +110 -1
oscura/workflows/batch/aggregate.py +5 -1
oscura-0.12.0.dist-info/METADATA +460 -0
{oscura-0.10.0.dist-info → oscura-0.12.0.dist-info}/RECORD +34 -16
oscura-0.10.0.dist-info/METADATA +0 -641
{oscura-0.10.0.dist-info → oscura-0.12.0.dist-info}/WHEEL +0 -0
{oscura-0.10.0.dist-info → oscura-0.12.0.dist-info}/entry_points.txt +0 -0
{oscura-0.10.0.dist-info → oscura-0.12.0.dist-info}/licenses/LICENSE +0 -0

oscura/analyzers/waveform/spectral.py CHANGED Viewed

@@ -17,6 +17,7 @@ References:
 from __future__ import annotations
+import threading
 from functools import lru_cache
 from typing import TYPE_CHECKING, Any, Literal
@@ -32,8 +33,9 @@ if TYPE_CHECKING:
     from oscura.core.types import MeasurementResult, WaveformTrace
-# Global FFT cache statistics
+# Global FFT cache statistics (thread-safe)
 _fft_cache_stats = {"hits": 0, "misses": 0, "size": 128}
+_fft_cache_lock = threading.Lock()
 def get_fft_cache_stats() -> dict[str, int]:
@@ -46,7 +48,8 @@ def get_fft_cache_stats() -> dict[str, int]:
         >>> stats = get_fft_cache_stats()
         >>> print(f"Cache hit rate: {stats['hits'] / (stats['hits'] + stats['misses']):.1%}")
     """
-    return _fft_cache_stats.copy()
+    with _fft_cache_lock:
+        return _fft_cache_stats.copy()
 def clear_fft_cache() -> None:
@@ -58,8 +61,9 @@ def clear_fft_cache() -> None:
         >>> clear_fft_cache()  # Clear cached FFT results
     """
     _compute_fft_cached.cache_clear()
-    _fft_cache_stats["hits"] = 0
-    _fft_cache_stats["misses"] = 0
+    with _fft_cache_lock:
+        _fft_cache_stats["hits"] = 0
+        _fft_cache_stats["misses"] = 0
 def configure_fft_cache(size: int) -> None:
@@ -72,11 +76,12 @@ def configure_fft_cache(size: int) -> None:
         >>> configure_fft_cache(256)  # Increase cache size for better hit rate
     """
     global _compute_fft_cached
-    _fft_cache_stats["size"] = size
-    # Recreate cache with new size
-    _compute_fft_cached = lru_cache(maxsize=size)(_compute_fft_impl)
-    _fft_cache_stats["hits"] = 0
-    _fft_cache_stats["misses"] = 0
+    with _fft_cache_lock:
+        _fft_cache_stats["size"] = size
+        # Recreate cache with new size
+        _compute_fft_cached = lru_cache(maxsize=size)(_compute_fft_impl)
+        _fft_cache_stats["hits"] = 0
+        _fft_cache_stats["misses"] = 0
 def _compute_fft_impl(
@@ -270,7 +275,8 @@ def _fft_cached_path(
     freq, magnitude_db, phase = _compute_fft_cached(
         data_bytes, n, window, nfft_computed, detrend, sample_rate
     )
-    _fft_cache_stats["hits"] += 1
+    with _fft_cache_lock:
+        _fft_cache_stats["hits"] += 1
     if return_phase:
         return freq, magnitude_db, phase
@@ -302,7 +308,8 @@ def _fft_direct_path(
     Returns:
         FFT results (with or without phase).
     """
-    _fft_cache_stats["misses"] += 1
+    with _fft_cache_lock:
+        _fft_cache_stats["misses"] += 1
     w = get_window(window, n)
     data_windowed = data_processed * w

oscura/automotive/__init__.py CHANGED Viewed

@@ -49,7 +49,7 @@ try:
     __version__ = version("oscura")
 except Exception:
     # Fallback for development/testing when package not installed
-    __version__ = "0.10.0"
+    __version__ = "0.12.0"
 __all__ = [
     "CANMessage",

oscura/automotive/flexray/fibex.py CHANGED Viewed

@@ -299,7 +299,15 @@ class FIBEXImporter:
         if not fibex_path.exists():
             raise FileNotFoundError(f"FIBEX file not found: {fibex_path}")
-        tree = ET.parse(fibex_path)
+        # SEC-004: Protect against XXE attacks by disabling entity expansion
+        parser = ET.XMLParser()
+        try:
+            # Python < 3.12: entity attribute is writable
+            parser.entity = {}  # type: ignore[misc]
+        except AttributeError:
+            # Python >= 3.12: entity attribute is read-only, default behavior is safe
+            pass
+        tree = ET.parse(fibex_path, parser=parser)
         root = tree.getroot()
         # Extract cluster configuration

oscura/loaders/__init__.py CHANGED Viewed

@@ -41,6 +41,7 @@ _LOADER_REGISTRY: dict[str, tuple[str, str]] = {
     "tdms": ("oscura.loaders.tdms", "load_tdms"),
     "touchstone": ("oscura.loaders.touchstone", "load_touchstone"),
     "chipwhisperer": ("oscura.loaders.chipwhisperer", "load_chipwhisperer"),
+    "binary": ("oscura.loaders.binary", "load_binary_auto"),
 }
@@ -96,7 +97,7 @@ from oscura.loaders import (
     csv,
     hdf5,
 )
-from oscura.loaders.binary import load_binary
+from oscura.loaders.binary import load_binary, load_binary_auto
 # Import configurable binary loading functionality
 from oscura.loaders.configurable import (
@@ -182,6 +183,7 @@ logger = logging.getLogger(__name__)
 SUPPORTED_FORMATS: dict[str, str] = {
     ".wfm": "auto_wfm",  # Auto-detect Tektronix vs Rigol
     ".tss": "tss",  # Tektronix session files
+    ".bin": "binary",  # Raw binary with auto-detection
     ".npz": "numpy",
     ".csv": "csv",
     ".h5": "hdf5",
@@ -655,6 +657,7 @@ __all__ = [
     "load_all_channels",
     "load_auto",
     "load_binary",
+    "load_binary_auto",
     "load_binary_packets",
     "load_lazy",
     "load_packets_streaming",

oscura/loaders/binary.py CHANGED Viewed

@@ -153,4 +153,287 @@ def _load_binary_mmap(
             mm.close()
-__all__ = ["load_binary"]
+def detect_binary_dtype(
+    path: str | PathLike[str], sample_size: int = 8192
+) -> tuple[str, dict[str, Any]]:
+    """Auto-detect most likely dtype for binary file using intelligent multi-heuristic analysis.
+    Performs comprehensive analysis including:
+    - Multi-location sampling (beginning, middle, end)
+    - Byte entropy and distribution analysis
+    - IEEE 754 floating point pattern detection
+    - Value range validation for each dtype
+    - Alignment and padding pattern detection
+    - Statistical confidence scoring
+    Designed to handle completely unknown binary formats with no prior knowledge.
+    Args:
+        path: Path to binary file.
+        sample_size: Bytes to sample per location (default: 8KB).
+    Returns:
+        Tuple of (detected_dtype, confidence_scores).
+        confidence_scores maps each dtype to its normalized confidence (0-1).
+    Example:
+        >>> dtype, confidence = detect_binary_dtype("unknown.bin")
+        >>> print(f"Detected: {dtype} (confidence: {confidence[dtype]:.1%})")
+        Detected: uint16 (confidence: 85.3%)
+    """
+    path = Path(path)
+    file_size = path.stat().st_size
+    # Sample from multiple locations for robust detection
+    samples_to_check = []
+    with open(path, "rb") as f:
+        # Beginning
+        samples_to_check.append(f.read(min(sample_size, file_size)))
+        # Middle (if large enough)
+        if file_size > sample_size * 2:
+            f.seek(file_size // 2)
+            samples_to_check.append(f.read(min(sample_size, file_size - f.tell())))
+        # End (if large enough)
+        if file_size > sample_size * 3:
+            f.seek(max(0, file_size - sample_size))
+            samples_to_check.append(f.read())
+    sample = b"".join(samples_to_check)
+    if len(sample) < 16:
+        return "uint8", {"uint8": 1.0}
+    from collections import Counter
+    # Byte entropy calculation
+    byte_counts = Counter(sample)
+    total = len(sample)
+    entropy = -sum((count / total) * np.log2(count / total) for count in byte_counts.values())
+    zero_density = sample.count(b"\x00") / len(sample)
+    # Score each dtype possibility
+    scores: dict[str, float] = {
+        "uint8": 0.0,
+        "int8": 0.0,
+        "uint16": 0.0,
+        "int16": 0.0,
+        "uint32": 0.0,
+        "int32": 0.0,
+        "float32": 0.0,
+        "float64": 0.0,
+    }
+    # Test 1: IEEE 754 floating point validation
+    float32_valid = 0
+    for i in range(0, min(len(sample) - 3, 4096), 4):
+        try:
+            val = np.frombuffer(sample[i : i + 4], dtype=np.float32)[0]
+            if np.isfinite(val) and -1e10 < val < 1e10:
+                float32_valid += 1
+        except Exception:
+            pass
+    float64_valid = 0
+    for i in range(0, min(len(sample) - 7, 4096), 8):
+        try:
+            val = np.frombuffer(sample[i : i + 8], dtype=np.float64)[0]
+            if np.isfinite(val) and -1e10 < val < 1e10:
+                float64_valid += 1
+        except Exception:
+            pass
+    scores["float32"] = (float32_valid / (min(len(sample), 4096) / 4)) * 3.0
+    scores["float64"] = (float64_valid / (min(len(sample), 4096) / 8)) * 3.0
+    # Test 2: Entropy-based scoring
+    if entropy > 7.0:
+        scores["float32"] += 2.0
+        scores["float64"] += 2.0
+    elif entropy > 6.0:
+        scores["int32"] += 1.5
+        scores["uint32"] += 1.5
+    elif entropy > 4.5:
+        scores["int16"] += 2.0
+        scores["uint16"] += 2.0
+    else:
+        scores["int8"] += 2.0
+        scores["uint8"] += 2.0
+    # Test 3: Zero density (structured data indicator)
+    if zero_density > 0.6:
+        scores["int16"] += 1.5
+        scores["uint16"] += 1.5
+    elif zero_density > 0.4:
+        scores["int16"] += 1.0
+        scores["uint16"] += 1.0
+    # Test 4: Value range reasonableness
+    uint8_reasonable = sum(1 for b in sample[: min(1000, len(sample))] if b < 128) / min(
+        1000, len(sample)
+    )
+    if uint8_reasonable > 0.8:
+        scores["uint8"] += 1.5
+    # Find best dtype
+    best_dtype = max(scores.items(), key=lambda x: x[1])[0]
+    # Normalize confidence scores
+    max_score = max(scores.values()) if scores.values() else 1.0
+    confidence = {k: v / max_score for k, v in scores.items()} if max_score > 0 else scores
+    return best_dtype, confidence
+def detect_packet_structure(path: str | PathLike[str], sample_size: int = 8192) -> tuple[bool, int]:
+    """Detect if binary file contains structured packet data.
+    Looks for repeating header patterns and regular spacing indicating
+    packet boundaries.
+    Args:
+        path: Path to binary file.
+        sample_size: Number of bytes to sample for detection.
+    Returns:
+        Tuple of (is_packet_data, packet_size_estimate).
+        packet_size_estimate is 0 if not packet data.
+    Example:
+        >>> is_packets, size = detect_packet_structure("capture.bin")
+        >>> if is_packets:
+        ...     print(f"Detected packet structure with ~{size} byte packets")
+    """
+    path = Path(path)
+    with open(path, "rb") as f:
+        sample = f.read(sample_size)
+    if len(sample) < 512:
+        return False, 0
+    # Look for sequence numbers (common in packet headers)
+    # Check for patterns like: 00 00, 01 00, 02 00, 03 00 (little-endian sequence)
+    sequence_positions = []
+    for seq_byte in range(10):  # Check first 10 sequence numbers
+        pattern = seq_byte.to_bytes(1, "little") + b"\x00"
+        pos = sample.find(pattern)
+        if pos != -1:
+            sequence_positions.append(pos)
+    # If we found multiple sequence numbers at regular intervals = likely packets
+    if len(sequence_positions) >= 3:
+        # Calculate intervals between sequence numbers
+        intervals = [
+            sequence_positions[i + 1] - sequence_positions[i]
+            for i in range(len(sequence_positions) - 1)
+        ]
+        # Check if intervals are consistent (within 10% variation)
+        if intervals:
+            avg_interval = sum(intervals) / len(intervals)
+            variation = max(abs(i - avg_interval) for i in intervals) / avg_interval
+            if variation < 0.1 and 100 < avg_interval < 10000:
+                # Consistent spacing in reasonable range = packet structure
+                return True, int(avg_interval)
+    # Look for repeating byte patterns (common header markers)
+    # Check 4-byte patterns that repeat regularly
+    pattern_positions: dict[bytes, list[int]] = {}
+    for i in range(0, min(1024, len(sample) - 4), 4):
+        pattern = sample[i : i + 4]
+        if pattern not in pattern_positions:
+            pattern_positions[pattern] = []
+        pattern_positions[pattern].append(i)
+    # Find patterns that repeat with consistent spacing
+    for pattern, positions in pattern_positions.items():
+        if len(positions) >= 3 and pattern != b"\x00\x00\x00\x00":
+            intervals = [positions[i + 1] - positions[i] for i in range(len(positions) - 1)]
+            if intervals:
+                avg_interval = sum(intervals) / len(intervals)
+                variation = (
+                    max(abs(i - avg_interval) for i in intervals) / avg_interval
+                    if intervals
+                    else 1.0
+                )
+                if variation < 0.1 and 100 < avg_interval < 10000:
+                    return True, int(avg_interval)
+    return False, 0
+def load_binary_auto(
+    path: str | PathLike[str],
+    *,
+    sample_rate: float | None = None,
+    max_samples: int = 100_000,
+    channels: int = 1,
+    channel: int = 0,
+) -> WaveformTrace:
+    """Load binary file with automatic dtype detection and intelligent defaults.
+    This is a smart wrapper around load_binary() that:
+    - Auto-detects dtype
+    - Limits samples to prevent memory issues
+    - Uses memory-mapped I/O for large files
+    - Detects packet structures
+    Designed for use with unknown binary formats where manual
+    configuration is not available.
+    Args:
+        path: Path to binary file.
+        sample_rate: Sample rate in Hz. If None, estimates from file.
+        max_samples: Maximum number of samples to load (default: 100K).
+        channels: Number of interleaved channels.
+        channel: Channel index to load.
+    Returns:
+        WaveformTrace with loaded data and metadata.
+    Example:
+        >>> trace = load_binary_auto("unknown_capture.bin")
+        >>> print(f"Loaded {len(trace.data)} samples, dtype: {trace.metadata.source_file}")
+    """
+    path = Path(path)
+    file_size = path.stat().st_size
+    # Detect dtype with confidence scoring
+    dtype, confidence = detect_binary_dtype(path)
+    # Detect packet structure (informational)
+    is_packets, packet_size = detect_packet_structure(path)
+    # Estimate sample rate if not provided
+    if sample_rate is None:
+        # Common sample rates for oscilloscopes/DAQ
+        sample_rate = 1.0e6  # 1 MS/s default
+    # Use mmap for files > 10MB
+    use_mmap = file_size > 10 * 1024 * 1024
+    # Calculate potential samples based on detected dtype
+    bytes_per_sample = np.dtype(dtype).itemsize
+    potential_samples = file_size // bytes_per_sample
+    # Limit samples for analysis performance
+    count = min(max_samples, potential_samples)
+    # Load with detected parameters
+    return load_binary(
+        path,
+        dtype=dtype,
+        sample_rate=sample_rate,
+        channels=channels,
+        channel=channel,
+        offset=0,
+        count=count,
+        mmap_mode=use_mmap,
+    )
+__all__ = ["detect_binary_dtype", "detect_packet_structure", "load_binary", "load_binary_auto"]

oscura/loaders/validation.py CHANGED Viewed

@@ -475,24 +475,31 @@ class PacketValidator:
     @staticmethod
     def _crc32(data: bytes, poly: int = 0xEDB88320) -> int:
-        """Compute CRC-32 checksum.
+        """Compute CRC-32 checksum using native implementation.
         Args:
             data: Data to checksum.
             poly: CRC polynomial (default: 0xEDB88320 for CRC-32).
+                  Note: Only standard CRC-32 polynomial is supported by native implementation.
         Returns:
             CRC-32 value.
+        Note:
+            Uses zlib.crc32() for performance (~100x faster than pure Python).
+            Custom polynomials are not supported - raises ValueError if non-standard poly provided.
         """
-        crc = 0xFFFFFFFF
-        for byte in data:
-            crc ^= byte
-            for _ in range(8):
-                if crc & 1:
-                    crc = (crc >> 1) ^ poly
-                else:
-                    crc >>= 1
-        return crc ^ 0xFFFFFFFF
+        import zlib
+        # Verify standard CRC-32 polynomial (zlib only supports this)
+        if poly != 0xEDB88320:
+            raise ValueError(
+                f"Non-standard CRC polynomial {poly:#x} not supported by native implementation. "
+                "Only standard CRC-32 (0xEDB88320) is available."
+            )
+        # zlib.crc32 returns signed int on some platforms, mask to unsigned
+        return zlib.crc32(data) & 0xFFFFFFFF
     def get_statistics(self) -> ValidationStats:
         """Get aggregate validation statistics.

oscura/sessions/legacy.py CHANGED Viewed

@@ -17,6 +17,8 @@ import gzip
 import hashlib
 import hmac
 import pickle
+import secrets
+import threading
 from dataclasses import dataclass, field
 from datetime import datetime
 from enum import Enum
@@ -25,10 +27,108 @@ from typing import Any
 from oscura.core.exceptions import SecurityError
+# Global lock for security key generation to prevent race conditions (thread-level)
+_KEY_GENERATION_LOCK = threading.Lock()
+# Try to import fcntl for process-level file locking (Unix only)
+try:
+    import fcntl
+    HAS_FCNTL = True
+except ImportError:
+    HAS_FCNTL = False
 # Session file format constants
 _SESSION_MAGIC = b"OSC1"  # Magic bytes for new format with signature
 _SESSION_SIGNATURE_SIZE = 32  # SHA256 hash size in bytes
-_SECURITY_KEY = hashlib.sha256(b"oscura-session-v1").digest()
+def _get_security_key() -> bytes:
+    """Get or generate per-installation session security key.
+    The key is generated once per installation and stored in ~/.oscura/session_key
+    with restrictive permissions (0o600). This provides better security than a
+    shared hardcoded key.
+    Uses both thread-level and process-level locking to prevent race conditions
+    during parallel key generation.
+    Returns:
+        32-byte security key for HMAC signing.
+    """
+    key_file = Path.home() / ".oscura" / "session_key"
+    lock_file = Path.home() / ".oscura" / "session_key.lock"
+    # Thread-level lock first
+    with _KEY_GENERATION_LOCK:
+        # Check if key exists
+        if key_file.exists():
+            try:
+                return key_file.read_bytes()
+            except (OSError, PermissionError):
+                # Fall back to generating new key if can't read
+                pass
+        # Create parent directory
+        key_file.parent.mkdir(parents=True, exist_ok=True)
+        # Process-level file lock for parallel pytest workers
+        if HAS_FCNTL:
+            # Use file locking on Unix systems
+            lock_file.parent.mkdir(parents=True, exist_ok=True)
+            with open(lock_file, "w") as lock_fd:
+                try:
+                    fcntl.flock(lock_fd.fileno(), fcntl.LOCK_EX)
+                    # Double-check after acquiring file lock
+                    if key_file.exists():
+                        try:
+                            key = key_file.read_bytes()
+                            fcntl.flock(lock_fd.fileno(), fcntl.LOCK_UN)
+                            return key
+                        except (OSError, PermissionError):
+                            pass
+                    # Generate new random key
+                    key = secrets.token_bytes(32)
+                    # Write with restrictive permissions
+                    try:
+                        key_file.write_bytes(key)
+                        key_file.chmod(0o600)  # Owner read/write only
+                    except (OSError, PermissionError):
+                        # Can't write key file - continue with ephemeral key
+                        pass
+                    fcntl.flock(lock_fd.fileno(), fcntl.LOCK_UN)
+                    return key
+                except OSError:
+                    # File locking failed, continue without lock
+                    pass
+        # Fallback without file locking (Windows or locking unavailable)
+        # Double-check one more time
+        if key_file.exists():
+            try:
+                return key_file.read_bytes()
+            except (OSError, PermissionError):
+                pass
+        # Generate new random key
+        key = secrets.token_bytes(32)
+        # Write with restrictive permissions
+        try:
+            key_file.write_bytes(key)
+            key_file.chmod(0o600)  # Owner read/write only
+        except (OSError, PermissionError):
+            # Can't write key file - continue with ephemeral key
+            pass
+        return key
+_SECURITY_KEY = _get_security_key()
 class AnnotationType(Enum):
@@ -709,6 +809,15 @@ class Session:
 def load_session(path: str | Path) -> Session:
     """Load session from file.
+    This function implements HMAC-SHA256 signature verification before deserializing
+    session data to protect against tampering and malicious file modifications.
+    Security:
+        Session files are protected with HMAC-SHA256 signatures. Only load session
+        files from trusted sources. While HMAC verification prevents tampering,
+        the shared security key means all installations can verify each other's
+        files. Consider using per-installation keys for sensitive deployments.
     Args:
         path: Path to session file (.tks).

oscura/workflows/batch/aggregate.py CHANGED Viewed

@@ -339,7 +339,11 @@ def _create_metric_plot(
         plot_file.parent.mkdir(parents=True, exist_ok=True)
         plt.savefig(plot_file)
     else:
-        plt.show()
+        # Try to show, but gracefully handle non-interactive backends
+        try:
+            plt.show()
+        except Exception:
+            pass  # Silently skip if backend doesn't support interactive display
 def _plot_histogram(

oscura 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

oscura 0.10.0py3-none-any.whl → 0.12.0py3-none-any.whl