PyPI - oscura - Versions diffs - 0.8.0__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

oscura 0.8.0py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

oscura/__init__.py +19 -19
oscura/__main__.py +4 -0
oscura/analyzers/__init__.py +2 -0
oscura/analyzers/digital/extraction.py +2 -3
oscura/analyzers/digital/quality.py +1 -1
oscura/analyzers/digital/timing.py +1 -1
oscura/analyzers/ml/signal_classifier.py +6 -0
oscura/analyzers/patterns/__init__.py +66 -0
oscura/analyzers/power/basic.py +3 -3
oscura/analyzers/power/soa.py +1 -1
oscura/analyzers/power/switching.py +3 -3
oscura/analyzers/signal_classification.py +529 -0
oscura/analyzers/signal_integrity/sparams.py +3 -3
oscura/analyzers/statistics/basic.py +10 -7
oscura/analyzers/validation.py +1 -1
oscura/analyzers/waveform/measurements.py +200 -156
oscura/analyzers/waveform/measurements_with_uncertainty.py +91 -35
oscura/analyzers/waveform/spectral.py +182 -84
oscura/api/dsl/commands.py +15 -6
oscura/api/server/templates/base.html +137 -146
oscura/api/server/templates/export.html +84 -110
oscura/api/server/templates/home.html +248 -267
oscura/api/server/templates/protocols.html +44 -48
oscura/api/server/templates/reports.html +27 -35
oscura/api/server/templates/session_detail.html +68 -78
oscura/api/server/templates/sessions.html +62 -72
oscura/api/server/templates/waveforms.html +54 -64
oscura/automotive/__init__.py +1 -1
oscura/automotive/can/session.py +1 -1
oscura/automotive/dbc/generator.py +638 -23
oscura/automotive/dtc/data.json +17 -102
oscura/automotive/flexray/fibex.py +9 -1
oscura/automotive/uds/decoder.py +99 -6
oscura/cli/analyze.py +8 -2
oscura/cli/batch.py +36 -5
oscura/cli/characterize.py +18 -4
oscura/cli/export.py +47 -5
oscura/cli/main.py +2 -0
oscura/cli/onboarding/wizard.py +10 -6
oscura/cli/pipeline.py +585 -0
oscura/cli/visualize.py +6 -4
oscura/convenience.py +400 -32
oscura/core/measurement_result.py +286 -0
oscura/core/progress.py +1 -1
oscura/core/schemas/device_mapping.json +2 -8
oscura/core/schemas/packet_format.json +4 -24
oscura/core/schemas/protocol_definition.json +2 -12
oscura/core/types.py +232 -239
oscura/correlation/multi_protocol.py +1 -1
oscura/export/legacy/__init__.py +11 -0
oscura/export/legacy/wav.py +75 -0
oscura/exporters/__init__.py +19 -0
oscura/exporters/wireshark.py +809 -0
oscura/hardware/acquisition/file.py +5 -19
oscura/hardware/acquisition/saleae.py +10 -10
oscura/hardware/acquisition/socketcan.py +4 -6
oscura/hardware/acquisition/synthetic.py +1 -5
oscura/hardware/acquisition/visa.py +6 -6
oscura/hardware/security/side_channel_detector.py +5 -508
oscura/inference/message_format.py +686 -1
oscura/jupyter/display.py +2 -2
oscura/jupyter/magic.py +3 -3
oscura/loaders/__init__.py +17 -12
oscura/loaders/binary.py +1 -1
oscura/loaders/chipwhisperer.py +1 -2
oscura/loaders/configurable.py +1 -1
oscura/loaders/csv_loader.py +2 -2
oscura/loaders/hdf5_loader.py +1 -1
oscura/loaders/lazy.py +6 -1
oscura/loaders/mmap_loader.py +0 -1
oscura/loaders/numpy_loader.py +8 -7
oscura/loaders/preprocessing.py +3 -5
oscura/loaders/rigol.py +21 -7
oscura/loaders/sigrok.py +2 -5
oscura/loaders/tdms.py +3 -2
oscura/loaders/tektronix.py +38 -32
oscura/loaders/tss.py +20 -27
oscura/loaders/validation.py +17 -10
oscura/loaders/vcd.py +13 -8
oscura/loaders/wav.py +1 -6
oscura/pipeline/__init__.py +76 -0
oscura/pipeline/handlers/__init__.py +165 -0
oscura/pipeline/handlers/analyzers.py +1045 -0
oscura/pipeline/handlers/decoders.py +899 -0
oscura/pipeline/handlers/exporters.py +1103 -0
oscura/pipeline/handlers/filters.py +891 -0
oscura/pipeline/handlers/loaders.py +640 -0
oscura/pipeline/handlers/transforms.py +768 -0
oscura/reporting/formatting/measurements.py +55 -14
oscura/reporting/templates/enhanced/protocol_re.html +504 -503
oscura/sessions/legacy.py +49 -1
oscura/side_channel/__init__.py +38 -57
oscura/utils/builders/signal_builder.py +5 -5
oscura/utils/comparison/compare.py +7 -9
oscura/utils/comparison/golden.py +1 -1
oscura/utils/filtering/convenience.py +2 -2
oscura/utils/math/arithmetic.py +38 -62
oscura/utils/math/interpolation.py +20 -20
oscura/utils/pipeline/__init__.py +4 -17
oscura/utils/progressive.py +1 -4
oscura/utils/triggering/edge.py +1 -1
oscura/utils/triggering/pattern.py +2 -2
oscura/utils/triggering/pulse.py +2 -2
oscura/utils/triggering/window.py +3 -3
oscura/validation/hil_testing.py +11 -11
oscura/visualization/__init__.py +46 -284
oscura/visualization/batch.py +72 -433
oscura/visualization/plot.py +542 -53
oscura/visualization/styles.py +184 -318
oscura/workflows/batch/advanced.py +1 -1
oscura/workflows/batch/aggregate.py +12 -9
oscura/workflows/complete_re.py +251 -23
oscura/workflows/digital.py +27 -4
oscura/workflows/multi_trace.py +136 -17
oscura/workflows/waveform.py +11 -6
oscura-0.11.0.dist-info/METADATA +460 -0
{oscura-0.8.0.dist-info → oscura-0.11.0.dist-info}/RECORD +120 -145
oscura/side_channel/dpa.py +0 -1025
oscura/utils/optimization/__init__.py +0 -19
oscura/utils/optimization/parallel.py +0 -443
oscura/utils/optimization/search.py +0 -532
oscura/utils/pipeline/base.py +0 -338
oscura/utils/pipeline/composition.py +0 -248
oscura/utils/pipeline/parallel.py +0 -449
oscura/utils/pipeline/pipeline.py +0 -375
oscura/utils/search/__init__.py +0 -16
oscura/utils/search/anomaly.py +0 -424
oscura/utils/search/context.py +0 -294
oscura/utils/search/pattern.py +0 -288
oscura/utils/storage/__init__.py +0 -61
oscura/utils/storage/database.py +0 -1166
oscura/visualization/accessibility.py +0 -526
oscura/visualization/annotations.py +0 -371
oscura/visualization/axis_scaling.py +0 -305
oscura/visualization/colors.py +0 -451
oscura/visualization/digital.py +0 -436
oscura/visualization/eye.py +0 -571
oscura/visualization/histogram.py +0 -281
oscura/visualization/interactive.py +0 -1035
oscura/visualization/jitter.py +0 -1042
oscura/visualization/keyboard.py +0 -394
oscura/visualization/layout.py +0 -400
oscura/visualization/optimization.py +0 -1079
oscura/visualization/palettes.py +0 -446
oscura/visualization/power.py +0 -508
oscura/visualization/power_extended.py +0 -955
oscura/visualization/presets.py +0 -469
oscura/visualization/protocols.py +0 -1246
oscura/visualization/render.py +0 -223
oscura/visualization/rendering.py +0 -444
oscura/visualization/reverse_engineering.py +0 -838
oscura/visualization/signal_integrity.py +0 -989
oscura/visualization/specialized.py +0 -643
oscura/visualization/spectral.py +0 -1226
oscura/visualization/thumbnails.py +0 -340
oscura/visualization/time_axis.py +0 -351
oscura/visualization/waveform.py +0 -454
oscura-0.8.0.dist-info/METADATA +0 -661
{oscura-0.8.0.dist-info → oscura-0.11.0.dist-info}/WHEEL +0 -0
{oscura-0.8.0.dist-info → oscura-0.11.0.dist-info}/entry_points.txt +0 -0
{oscura-0.8.0.dist-info → oscura-0.11.0.dist-info}/licenses/LICENSE +0 -0

oscura/inference/message_format.py CHANGED Viewed

@@ -30,6 +30,23 @@ from numpy.typing import NDArray
 from oscura.inference.alignment import align_local
+@dataclass
+class FieldDetectionResult:
+    """Result from a field type detector.
+    Attributes:
+        confidence: Detection confidence score (0.0-1.0)
+        field_offset: Field offset in bytes
+        field_length: Field length in bytes
+        evidence: Supporting data for detection
+    """
+    confidence: float
+    field_offset: int
+    field_length: int
+    evidence: dict[str, Any]
 @dataclass
 class InferredField:
     """An inferred message field.
@@ -51,7 +68,18 @@ class InferredField:
     name: str
     offset: int
     size: int
-    field_type: Literal["constant", "counter", "timestamp", "length", "checksum", "data", "unknown"]
+    field_type: Literal[
+        "constant",
+        "counter",
+        "timestamp",
+        "length",
+        "checksum",
+        "data",
+        "float",
+        "enum",
+        "reserved",
+        "unknown",
+    ]
     entropy: float
     variance: float
     confidence: float
@@ -1369,6 +1397,663 @@ class MessageFormatInferrer:
             # Return first byte for larger fields
             return int(msg[field.offset])
+    # =============================================================================
+    # Advanced Field Type Detectors
+    # =============================================================================
+    def detect_timestamp_field(
+        self, messages: list[NDArray[np.uint8]], offset: int, size: int
+    ) -> FieldDetectionResult:
+        """Detect timestamp fields in message data.
+        : Timestamp field detection with wraparound handling.
+        Analyzes field values for characteristics of timestamp fields:
+        - Monotonically increasing values (with wraparound support)
+        - Regular intervals (milliseconds, seconds, microseconds)
+        - Unix timestamp patterns (seconds since epoch)
+        - 32-bit or 64-bit timestamp detection
+        Args:
+            messages: List of message arrays to analyze
+            offset: Field offset in bytes
+            size: Field size in bytes (typically 4 or 8)
+        Returns:
+            FieldDetectionResult with confidence, offset, length, and evidence
+        Example:
+            >>> messages = [np.array([0, 0, 0, 100], dtype=np.uint8),
+            ...             np.array([0, 0, 0, 200], dtype=np.uint8)]
+            >>> result = inferrer.detect_timestamp_field(messages, 0, 4)
+            >>> result.confidence > 0.8  # High confidence for regular intervals
+            True
+        """
+        if size not in [4, 8]:
+            return FieldDetectionResult(
+                confidence=0.0,
+                field_offset=offset,
+                field_length=size,
+                evidence={"reason": "Invalid size for timestamp (must be 4 or 8 bytes)"},
+            )
+        # Extract field values (big-endian)
+        values: list[int] = []
+        for msg in messages:
+            if size == 4:
+                val = (
+                    int(msg[offset]) << 24
+                    | int(msg[offset + 1]) << 16
+                    | int(msg[offset + 2]) << 8
+                    | int(msg[offset + 3])
+                )
+            else:  # size == 8
+                val = (
+                    int(msg[offset]) << 56
+                    | int(msg[offset + 1]) << 48
+                    | int(msg[offset + 2]) << 40
+                    | int(msg[offset + 3]) << 32
+                    | int(msg[offset + 4]) << 24
+                    | int(msg[offset + 5]) << 16
+                    | int(msg[offset + 6]) << 8
+                    | int(msg[offset + 7])
+                )
+            values.append(val)
+        if len(values) < 3:
+            return FieldDetectionResult(
+                confidence=0.0,
+                field_offset=offset,
+                field_length=size,
+                evidence={"reason": "Insufficient samples for timestamp detection"},
+            )
+        # Calculate differences between consecutive values
+        diffs = [values[i + 1] - values[i] for i in range(len(values) - 1)]
+        # Check for monotonically increasing (handle wraparound for 32-bit)
+        max_val = (1 << (size * 8)) - 1
+        increasing_count = 0
+        wraparound_count = 0
+        for diff in diffs:
+            if diff > 0:
+                increasing_count += 1
+            elif diff < -(max_val // 2):  # Likely wraparound
+                wraparound_count += 1
+                increasing_count += 1
+        monotonic_ratio = increasing_count / len(diffs)
+        if monotonic_ratio < 0.7:
+            return FieldDetectionResult(
+                confidence=0.0,
+                field_offset=offset,
+                field_length=size,
+                evidence={
+                    "reason": "Values not monotonically increasing",
+                    "monotonic_ratio": monotonic_ratio,
+                },
+            )
+        # Analyze intervals for regularity
+        positive_diffs = [d for d in diffs if d > 0]
+        if not positive_diffs:
+            return FieldDetectionResult(
+                confidence=0.0,
+                field_offset=offset,
+                field_length=size,
+                evidence={"reason": "No positive increments"},
+            )
+        avg_interval = sum(positive_diffs) / len(positive_diffs)
+        interval_variance = np.var(positive_diffs)
+        interval_std = np.sqrt(interval_variance)
+        # Regular intervals have low coefficient of variation
+        cv = interval_std / avg_interval if avg_interval > 0 else float("inf")
+        # Check for Unix timestamp patterns (seconds since 1970-01-01)
+        unix_epoch_2020 = 1577836800  # 2020-01-01 00:00:00 UTC
+        unix_epoch_2030 = 1893456000  # 2030-01-01 00:00:00 UTC
+        is_unix_timestamp = False
+        if size == 4 and all(unix_epoch_2020 <= v <= unix_epoch_2030 for v in values[:5]):
+            is_unix_timestamp = True
+        # Calculate confidence based on multiple factors
+        confidence = 0.0
+        # Factor 1: Monotonic increase (0-0.4)
+        confidence += min(monotonic_ratio, 1.0) * 0.4
+        # Factor 2: Regular intervals (0-0.4)
+        if cv < 0.1:  # Very regular
+            confidence += 0.4
+        elif cv < 0.5:  # Somewhat regular
+            confidence += 0.2
+        elif cv < 1.0:  # Loosely regular
+            confidence += 0.1
+        # Factor 3: Unix timestamp detection (0-0.2)
+        if is_unix_timestamp:
+            confidence += 0.2
+        evidence = {
+            "monotonic_ratio": float(monotonic_ratio),
+            "avg_interval": float(avg_interval),
+            "interval_std": float(interval_std),
+            "coefficient_of_variation": float(cv),
+            "wraparound_detected": wraparound_count > 0,
+            "is_unix_timestamp": is_unix_timestamp,
+            "sample_values": values[:5],
+        }
+        return FieldDetectionResult(
+            confidence=min(confidence, 1.0),
+            field_offset=offset,
+            field_length=size,
+            evidence=evidence,
+        )
+    def detect_float_field(
+        self, messages: list[NDArray[np.uint8]], offset: int, size: int
+    ) -> FieldDetectionResult:
+        """Detect IEEE 754 floating-point fields.
+        : Floating-point field detection.
+        Identifies floating-point encoded fields by:
+        - Checking for valid IEEE 754 patterns (sign, exponent, mantissa)
+        - Detecting 32-bit (single) and 64-bit (double) precision floats
+        - Validating reasonable ranges (not NaN, not Inf)
+        - Checking value distribution consistency
+        Args:
+            messages: List of message arrays to analyze
+            offset: Field offset in bytes
+            size: Field size in bytes (4 for float32, 8 for float64)
+        Returns:
+            FieldDetectionResult with confidence, offset, length, and evidence
+        Example:
+            >>> messages = [np.array([0x40, 0x49, 0x0F, 0xDB], dtype=np.uint8)]  # 3.14159
+            >>> result = inferrer.detect_float_field(messages, 0, 4)
+            >>> result.confidence > 0.5
+            True
+        """
+        if size not in [4, 8]:
+            return FieldDetectionResult(
+                confidence=0.0,
+                field_offset=offset,
+                field_length=size,
+                evidence={"reason": "Invalid size for float (must be 4 or 8 bytes)"},
+            )
+        # Extract raw bytes and attempt IEEE 754 interpretation
+        float_values: list[float] = []
+        valid_count = 0
+        for msg in messages:
+            raw_bytes = bytes(msg[offset : offset + size])
+            try:
+                if size == 4:
+                    # 32-bit float (big-endian)
+                    val = np.frombuffer(raw_bytes, dtype=">f4")[0]
+                else:  # size == 8
+                    # 64-bit double (big-endian)
+                    val = np.frombuffer(raw_bytes, dtype=">f8")[0]
+                # Check for valid float (not NaN, not Inf)
+                if np.isfinite(val):
+                    float_values.append(float(val))
+                    valid_count += 1
+                else:
+                    float_values.append(0.0)
+            except (ValueError, IndexError):
+                float_values.append(0.0)
+        if len(messages) == 0:
+            return FieldDetectionResult(
+                confidence=0.0,
+                field_offset=offset,
+                field_length=size,
+                evidence={"reason": "No messages to analyze"},
+            )
+        valid_ratio = valid_count / len(messages)
+        # Need majority valid floats
+        if valid_ratio < 0.5:
+            return FieldDetectionResult(
+                confidence=0.0,
+                field_offset=offset,
+                field_length=size,
+                evidence={"reason": "Too many invalid floats", "valid_ratio": valid_ratio},
+            )
+        # Analyze value distribution
+        valid_floats = [v for v in float_values if v != 0.0 or valid_count == len(messages)]
+        if not valid_floats:
+            return FieldDetectionResult(
+                confidence=0.0,
+                field_offset=offset,
+                field_length=size,
+                evidence={"reason": "No valid float values"},
+            )
+        # Check for reasonable range (not all zeros, some variance)
+        float_variance = float(np.var(valid_floats))
+        float_mean = float(np.mean(valid_floats))
+        float_range = float(np.max(valid_floats) - np.min(valid_floats))
+        # Calculate confidence
+        confidence = 0.0
+        # Factor 1: Valid float ratio (0-0.5)
+        confidence += valid_ratio * 0.5
+        # Factor 2: Non-zero variance (0-0.3)
+        if float_variance > 1e-6:
+            confidence += 0.3
+        elif float_variance > 1e-12:
+            confidence += 0.15
+        # Factor 3: Reasonable range (0-0.2)
+        if float_range > 0:
+            confidence += 0.2
+        evidence = {
+            "valid_float_ratio": float(valid_ratio),
+            "float_mean": float(float_mean),
+            "float_variance": float(float_variance),
+            "float_range": float(float_range),
+            "sample_values": valid_floats[:5],
+            "size_bits": size * 8,
+        }
+        return FieldDetectionResult(
+            confidence=min(confidence, 1.0),
+            field_offset=offset,
+            field_length=size,
+            evidence=evidence,
+        )
+    def detect_length_field(
+        self, messages: list[NDArray[np.uint8]], offset: int, size: int, msg_len: int
+    ) -> FieldDetectionResult:
+        """Detect length fields with endianness detection.
+        : Length field detection with byte order analysis.
+        Identifies fields that encode message or payload length by:
+        - Correlating field value with message/payload size
+        - Checking if value matches subsequent data length
+        - Detecting big-endian vs little-endian encoding
+        - Validating length values are within reasonable bounds
+        Args:
+            messages: List of message arrays to analyze
+            offset: Field offset in bytes
+            size: Field size in bytes (typically 1, 2, or 4)
+            msg_len: Total message length for validation
+        Returns:
+            FieldDetectionResult with confidence, offset, length, and evidence
+        Example:
+            >>> messages = [np.array([0, 10, *range(10)], dtype=np.uint8)]
+            >>> result = inferrer.detect_length_field(messages, 0, 2, 12)
+            >>> result.confidence > 0.7
+            True
+        """
+        if size > 4:
+            return FieldDetectionResult(
+                confidence=0.0,
+                field_offset=offset,
+                field_length=size,
+                evidence={"reason": "Invalid size for length field (max 4 bytes)"},
+            )
+        # Extract field values (try both endianness)
+        values_be: list[int] = []  # Big-endian
+        values_le: list[int] = []  # Little-endian
+        for msg in messages:
+            if size == 1:
+                val = int(msg[offset])
+                values_be.append(val)
+                values_le.append(val)
+            elif size == 2:
+                val_be = int(msg[offset]) << 8 | int(msg[offset + 1])
+                val_le = int(msg[offset + 1]) << 8 | int(msg[offset])
+                values_be.append(val_be)
+                values_le.append(val_le)
+            elif size == 4:
+                val_be = (
+                    int(msg[offset]) << 24
+                    | int(msg[offset + 1]) << 16
+                    | int(msg[offset + 2]) << 8
+                    | int(msg[offset + 3])
+                )
+                val_le = (
+                    int(msg[offset + 3]) << 24
+                    | int(msg[offset + 2]) << 16
+                    | int(msg[offset + 1]) << 8
+                    | int(msg[offset])
+                )
+                values_be.append(val_be)
+                values_le.append(val_le)
+            else:  # size == 3
+                val_be = int(msg[offset]) << 16 | int(msg[offset + 1]) << 8 | int(msg[offset + 2])
+                val_le = int(msg[offset + 2]) << 16 | int(msg[offset + 1]) << 8 | int(msg[offset])
+                values_be.append(val_be)
+                values_le.append(val_le)
+        # Check correlation with message length
+        def check_correlation(values: list[int]) -> tuple[float, str]:
+            """Check correlation between field values and message structure."""
+            # Pattern 1: Total message length
+            total_len_matches = sum(1 for v in values if v == msg_len)
+            # Pattern 2: Remaining message length (after this field)
+            remaining_len = msg_len - offset - size
+            remaining_matches = sum(1 for v in values if v == remaining_len)
+            # Pattern 3: Payload length (common header size assumptions)
+            for header_size in [4, 8, 12, 16]:
+                if offset + size <= header_size:
+                    payload_len = msg_len - header_size
+                    payload_matches = sum(1 for v in values if v == payload_len)
+                    if payload_matches > 0:
+                        match_ratio = payload_matches / len(values)
+                        if match_ratio > 0.7:
+                            return (match_ratio, f"payload_length_after_{header_size}B_header")
+            # Pattern 4: Values within reasonable bounds
+            reasonable = sum(1 for v in values if 0 < v < msg_len * 2)
+            reasonable_ratio = reasonable / len(values) if values else 0
+            # Best match
+            best_ratio = max(total_len_matches, remaining_matches) / len(values) if values else 0
+            if total_len_matches > remaining_matches:
+                return (best_ratio, "total_message_length")
+            elif remaining_matches > 0:
+                return (best_ratio, "remaining_bytes_after_field")
+            elif reasonable_ratio > 0.5:
+                return (reasonable_ratio * 0.5, "reasonable_length_values")
+            else:
+                return (0.0, "no_correlation")
+        # Check both endianness
+        corr_be, pattern_be = check_correlation(values_be)
+        corr_le, pattern_le = check_correlation(values_le)
+        # Select best endianness
+        if corr_be >= corr_le:
+            confidence = corr_be
+            endianness = "big"
+            pattern = pattern_be
+            values = values_be
+        else:
+            confidence = corr_le
+            endianness = "little"
+            pattern = pattern_le
+            values = values_le
+        # Boost confidence if field is early in message (typical for length fields)
+        if offset < 8:
+            confidence = min(confidence * 1.2, 1.0)
+        evidence = {
+            "endianness": endianness,
+            "correlation_pattern": pattern,
+            "correlation_ratio": float(confidence),
+            "sample_values": values[:5],
+            "message_length": msg_len,
+            "field_offset": offset,
+        }
+        return FieldDetectionResult(
+            confidence=min(confidence, 1.0),
+            field_offset=offset,
+            field_length=size,
+            evidence=evidence,
+        )
+    def detect_enum_field(
+        self, messages: list[NDArray[np.uint8]], offset: int, size: int
+    ) -> FieldDetectionResult:
+        """Detect enumeration fields with value frequency analysis.
+        : Enum field detection with value distribution.
+        Identifies fields with limited discrete value sets by:
+        - Finding fields with <20 unique values
+        - Building value frequency distribution
+        - Checking for reasonable enum characteristics
+        - Suggesting common enum patterns (states, commands, types)
+        Args:
+            messages: List of message arrays to analyze
+            offset: Field offset in bytes
+            size: Field size in bytes (typically 1 or 2)
+        Returns:
+            FieldDetectionResult with confidence, offset, length, and evidence
+        Example:
+            >>> messages = [np.array([1], dtype=np.uint8), np.array([2], dtype=np.uint8)]
+            >>> result = inferrer.detect_enum_field(messages, 0, 1)
+            >>> result.confidence > 0.8
+            True
+        """
+        if size > 4:
+            return FieldDetectionResult(
+                confidence=0.0,
+                field_offset=offset,
+                field_length=size,
+                evidence={"reason": "Field too large for enum (max 4 bytes)"},
+            )
+        # Extract field values
+        values: list[int] = []
+        for msg in messages:
+            if size == 1:
+                val = int(msg[offset])
+            elif size == 2:
+                val = int(msg[offset]) << 8 | int(msg[offset + 1])
+            elif size == 4:
+                val = (
+                    int(msg[offset]) << 24
+                    | int(msg[offset + 1]) << 16
+                    | int(msg[offset + 2]) << 8
+                    | int(msg[offset + 3])
+                )
+            else:  # size == 3
+                val = int(msg[offset]) << 16 | int(msg[offset + 1]) << 8 | int(msg[offset + 2])
+            values.append(val)
+        # Count unique values
+        unique_values = set(values)
+        unique_count = len(unique_values)
+        # Not an enum if only 1 value (that's a constant)
+        if unique_count <= 1:
+            return FieldDetectionResult(
+                confidence=0.0,
+                field_offset=offset,
+                field_length=size,
+                evidence={"reason": "Only one unique value (constant field)"},
+            )
+        # Not an enum if too many values (>20 threshold)
+        if unique_count > 20:
+            return FieldDetectionResult(
+                confidence=0.0,
+                field_offset=offset,
+                field_length=size,
+                evidence={"reason": f"Too many unique values ({unique_count} > 20)"},
+            )
+        # Build frequency distribution
+        value_counts: dict[int, int] = {}
+        for v in values:
+            value_counts[v] = value_counts.get(v, 0) + 1
+        # Sort by frequency
+        sorted_values = sorted(value_counts.items(), key=lambda x: x[1], reverse=True)
+        # Calculate entropy
+        total = len(values)
+        probabilities = [count / total for _, count in sorted_values]
+        entropy = -sum(p * np.log2(p) for p in probabilities if p > 0)
+        # Calculate confidence based on characteristics
+        confidence = 0.0
+        # Factor 1: Few unique values (0-0.4)
+        if unique_count <= 5:
+            confidence += 0.4
+        elif unique_count <= 10:
+            confidence += 0.3
+        else:
+            confidence += 0.2
+        # Factor 2: Good value distribution (0-0.3)
+        # Enums typically have reasonable distribution (not too skewed)
+        max_freq = max(value_counts.values())
+        max_freq_ratio = max_freq / total
+        if 0.1 < max_freq_ratio < 0.9:  # Balanced distribution
+            confidence += 0.3
+        elif max_freq_ratio <= 0.95:  # Somewhat balanced
+            confidence += 0.15
+        # Factor 3: Low entropy relative to max possible (0-0.3)
+        max_entropy = np.log2(unique_count)
+        if max_entropy > 0:
+            entropy_ratio = entropy / max_entropy
+            if entropy_ratio > 0.5:  # Well-distributed
+                confidence += 0.3
+            else:  # Skewed distribution
+                confidence += 0.15
+        # Suggest enum type based on characteristics
+        enum_type = "unknown"
+        if unique_count <= 3:
+            enum_type = "boolean_or_state"
+        elif unique_count <= 8:
+            enum_type = "command_or_type"
+        else:
+            enum_type = "extended_enum"
+        evidence = {
+            "unique_count": unique_count,
+            "value_distribution": dict(sorted_values[:10]),  # Top 10 most frequent
+            "entropy": float(entropy),
+            "max_entropy": float(max_entropy),
+            "suggested_enum_type": enum_type,
+            "most_common_value": sorted_values[0][0],
+            "most_common_frequency": sorted_values[0][1],
+        }
+        return FieldDetectionResult(
+            confidence=min(confidence, 1.0),
+            field_offset=offset,
+            field_length=size,
+            evidence=evidence,
+        )
+    def detect_reserved_field(
+        self, messages: list[NDArray[np.uint8]], offset: int, size: int
+    ) -> FieldDetectionResult:
+        """Detect reserved or padding fields.
+        : Reserved field detection.
+        Identifies fields that are always zero or constant by:
+        - Checking for always-zero fields (padding)
+        - Detecting always-constant fields (reserved)
+        - Tracking consistency across captures
+        - Distinguishing from intentional constant fields
+        Args:
+            messages: List of message arrays to analyze
+            offset: Field offset in bytes
+            size: Field size in bytes
+        Returns:
+            FieldDetectionResult with confidence, offset, length, and evidence
+        Example:
+            >>> messages = [np.array([0, 0, 0, 0], dtype=np.uint8) for _ in range(10)]
+            >>> result = inferrer.detect_reserved_field(messages, 0, 4)
+            >>> result.confidence == 1.0
+            True
+        """
+        # Extract all bytes in field across all messages
+        all_bytes: list[int] = []
+        for msg in messages:
+            for i in range(size):
+                all_bytes.append(int(msg[offset + i]))
+        # Check if all bytes are identical
+        unique_bytes = set(all_bytes)
+        if len(unique_bytes) != 1:
+            # Not reserved - has variation
+            return FieldDetectionResult(
+                confidence=0.0,
+                field_offset=offset,
+                field_length=size,
+                evidence={"reason": f"Field has variation ({len(unique_bytes)} unique values)"},
+            )
+        constant_value = next(iter(unique_bytes))
+        # Determine field type
+        if constant_value == 0:
+            field_subtype = "padding_zeros"
+            confidence = 1.0
+        else:
+            field_subtype = "reserved_constant"
+            # Slightly lower confidence - could be intentional constant
+            confidence = 0.9
+        # Additional checks for reserved field characteristics
+        # Reserved fields are often:
+        # 1. At specific alignment boundaries
+        # 2. Between other fields
+        # 3. At end of structures
+        alignment_bonus = 0.0
+        if offset % 4 == 0 and size % 4 == 0:
+            alignment_bonus = 0.05  # Aligned to 4-byte boundary
+        elif offset % 2 == 0 and size % 2 == 0:
+            alignment_bonus = 0.03  # Aligned to 2-byte boundary
+        confidence = min(confidence + alignment_bonus, 1.0)
+        evidence = {
+            "constant_value": constant_value,
+            "field_subtype": field_subtype,
+            "total_bytes_checked": len(all_bytes),
+            "is_aligned": alignment_bonus > 0,
+            "is_zero_padding": constant_value == 0,
+        }
+        return FieldDetectionResult(
+            confidence=confidence,
+            field_offset=offset,
+            field_length=size,
+            evidence=evidence,
+        )
 def infer_format(messages: list[bytes | NDArray[np.uint8]], min_samples: int = 10) -> MessageSchema:
     """Convenience function for format inference.

oscura 0.8.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

oscura 0.8.0py3-none-any.whl → 0.11.0py3-none-any.whl