PyPI - oscura - Versions diffs - 0.11.0__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

oscura 0.11.0py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

oscura/__init__.py +1 -1
oscura/analyzers/binary/__init__.py +36 -0
oscura/analyzers/binary/core/__init__.py +29 -0
oscura/analyzers/binary/core/file_access.py +193 -0
oscura/analyzers/binary/core/pipeline.py +161 -0
oscura/analyzers/binary/core/results.py +217 -0
oscura/analyzers/binary/detection/__init__.py +10 -0
oscura/analyzers/binary/detection/encoding.py +624 -0
oscura/analyzers/binary/detection/patterns.py +320 -0
oscura/analyzers/binary/detection/structure.py +630 -0
oscura/analyzers/binary/export/__init__.py +9 -0
oscura/analyzers/binary/export/dissector.py +174 -0
oscura/analyzers/binary/inference/__init__.py +15 -0
oscura/analyzers/binary/inference/checksums.py +214 -0
oscura/analyzers/binary/inference/fields.py +150 -0
oscura/analyzers/binary/inference/sequences.py +232 -0
oscura/analyzers/binary/inference/timestamps.py +210 -0
oscura/analyzers/binary/visualization/__init__.py +9 -0
oscura/analyzers/binary/visualization/structure_view.py +182 -0
oscura/automotive/__init__.py +1 -1
oscura/automotive/dtc/data.json +102 -17
oscura/core/schemas/device_mapping.json +8 -2
oscura/core/schemas/packet_format.json +24 -4
oscura/core/schemas/protocol_definition.json +12 -2
oscura/loaders/__init__.py +4 -1
oscura/loaders/binary.py +284 -1
oscura/sessions/legacy.py +80 -19
{oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/METADATA +3 -3
{oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/RECORD +32 -14
{oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/WHEEL +0 -0
{oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/entry_points.txt +0 -0
{oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/licenses/LICENSE +0 -0

oscura/analyzers/binary/inference/sequences.py ADDED Viewed

@@ -0,0 +1,232 @@
+"""Sequence number field analysis."""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:
+    from oscura.analyzers.binary.core.results import Field, Message
+class SequenceAnalyzer:
+    """Analyze sequence number fields.
+    Detects monotonic counters, wrapping behavior, gaps, and duplicates.
+    Example:
+        >>> analyzer = SequenceAnalyzer()
+        >>> metadata = analyzer.analyze(field, messages)
+        >>> if metadata["is_sequence"]:
+        ...     print(f"Sequence: {metadata['start']} + {metadata['increment']}")
+    """
+    def analyze(self, field: Field, messages: list[Message]) -> dict[str, Any]:
+        """Analyze sequence number field.
+        Args:
+            field: Field to analyze.
+            messages: List of messages containing field.
+        Returns:
+            Metadata dict with sequence information.
+        Example:
+            >>> result = analyzer.analyze(field, messages)
+            >>> result["is_sequence"]  # True if monotonic
+            >>> result["gaps"]  # List of missing values
+        """
+        if len(messages) < 3:
+            return {
+                "is_sequence": False,
+                "reason": "insufficient_data",
+            }
+        # Extract field values from all messages
+        values = self._extract_values(field, messages)
+        if len(values) < 3:
+            return {
+                "is_sequence": False,
+                "reason": "could_not_extract_values",
+            }
+        # Check if monotonic increasing
+        is_monotonic = self._is_monotonic_increasing(values)
+        if not is_monotonic:
+            return {
+                "is_sequence": False,
+                "reason": "not_monotonic",
+            }
+        # Analyze sequence characteristics
+        start_value = int(values[0])
+        increment = self._detect_increment(values)
+        gaps = self._detect_gaps(values, increment)
+        duplicates = self._detect_duplicates(values)
+        wraps = self._detect_wrapping(values, field.length)
+        return {
+            "is_sequence": True,
+            "start_value": start_value,
+            "end_value": int(values[-1]),
+            "increment": increment,
+            "total_values": len(values),
+            "unique_values": len(set(values)),
+            "gaps": gaps,
+            "duplicates": duplicates,
+            "wrapping_detected": wraps > 0,
+            "wrap_count": wraps,
+        }
+    def _extract_values(self, field: Field, messages: list[Message]) -> list[int]:
+        """Extract field values from messages.
+        Args:
+            field: Field specification.
+            messages: List of messages.
+        Returns:
+            List of integer values.
+        """
+        values = []
+        for msg in messages:
+            if field.offset + field.length <= len(msg.data):
+                field_bytes = msg.data[field.offset : field.offset + field.length]
+                try:
+                    # Try interpreting as different integer types
+                    if field.length == 1:
+                        value = int(field_bytes[0])
+                    elif field.length == 2 or field.length == 4 or field.length == 8:
+                        value = int.from_bytes(field_bytes, byteorder="little", signed=False)
+                    else:
+                        continue
+                    values.append(value)
+                except Exception:
+                    continue
+        return values
+    def _is_monotonic_increasing(self, values: list[int]) -> bool:
+        """Check if values are monotonically increasing (allowing wraps).
+        Args:
+            values: List of values.
+        Returns:
+            True if mostly increasing.
+        """
+        if len(values) < 2:
+            return False
+        increasing_count = 0
+        total_pairs = 0
+        for i in range(len(values) - 1):
+            diff = values[i + 1] - values[i]
+            # Allow small decreases for wrapping
+            if diff >= 0 or diff < -1000:  # Likely wrapped
+                increasing_count += 1
+            total_pairs += 1
+        # Require 95% increasing
+        return increasing_count / total_pairs > 0.95
+    def _detect_increment(self, values: list[int]) -> int:
+        """Detect common increment value.
+        Args:
+            values: List of values.
+        Returns:
+            Most common increment.
+        """
+        if len(values) < 2:
+            return 1
+        diffs = [values[i + 1] - values[i] for i in range(len(values) - 1)]
+        # Filter out negative diffs (wraps)
+        positive_diffs = [d for d in diffs if d > 0 and d < 1000]
+        if not positive_diffs:
+            return 1
+        # Find most common diff
+        from collections import Counter
+        counter = Counter(positive_diffs)
+        return counter.most_common(1)[0][0]
+    def _detect_gaps(self, values: list[int], increment: int) -> list[int]:
+        """Detect missing values in sequence.
+        Args:
+            values: List of values.
+            increment: Expected increment.
+        Returns:
+            List of missing values.
+        """
+        gaps = []
+        for i in range(len(values) - 1):
+            expected_next = values[i] + increment
+            actual_next = values[i + 1]
+            if actual_next != expected_next and actual_next > expected_next:
+                # There's a gap
+                for missing in range(expected_next, actual_next, increment):
+                    gaps.append(missing)
+        return gaps[:100]  # Limit to first 100 gaps
+    def _detect_duplicates(self, values: list[int]) -> list[int]:
+        """Detect duplicate values.
+        Args:
+            values: List of values.
+        Returns:
+            List of duplicate values.
+        """
+        from collections import Counter
+        counter = Counter(values)
+        duplicates = [val for val, count in counter.items() if count > 1]
+        return duplicates[:100]  # Limit to first 100
+    def _detect_wrapping(self, values: list[int], field_length: int) -> int:
+        """Detect counter wrapping.
+        Args:
+            values: List of values.
+            field_length: Field length in bytes.
+        Returns:
+            Number of detected wraps.
+        """
+        # Determine max value for field length
+        if field_length == 1:
+            max_val = 255
+        elif field_length == 2:
+            max_val = 65535
+        elif field_length == 4:
+            max_val = 4294967295
+        else:
+            return 0
+        wrap_count = 0
+        for i in range(len(values) - 1):
+            # Detect wrap: value decreases significantly
+            if values[i] > values[i + 1] and values[i] > max_val * 0.9:
+                wrap_count += 1
+        return wrap_count

oscura/analyzers/binary/inference/timestamps.py ADDED Viewed

@@ -0,0 +1,210 @@
+"""Timestamp field analysis."""
+from __future__ import annotations
+from datetime import UTC, datetime
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:
+    from oscura.analyzers.binary.core.results import Field, Message
+class TimestampAnalyzer:
+    """Analyze timestamp fields.
+    Detects various timestamp formats and validates temporal consistency.
+    Example:
+        >>> analyzer = TimestampAnalyzer()
+        >>> metadata = analyzer.analyze(field, messages)
+        >>> if metadata["is_timestamp"]:
+        ...     print(f"Format: {metadata['format']}")
+        ...     print(f"Sample: {metadata['sample_decoded']}")
+    """
+    def analyze(self, field: Field, messages: list[Message]) -> dict[str, Any]:
+        """Analyze timestamp field.
+        Args:
+            field: Field to analyze.
+            messages: List of messages containing field.
+        Returns:
+            Metadata dict with timestamp information.
+        Example:
+            >>> result = analyzer.analyze(field, messages)
+            >>> result["format"]  # e.g., "unix_microseconds"
+            >>> result["time_range"]  # (start, end) datetimes
+        """
+        if len(messages) < 3:
+            return {
+                "is_timestamp": False,
+                "reason": "insufficient_data",
+            }
+        # Extract field values
+        values = self._extract_values(field, messages)
+        if len(values) < 3:
+            return {
+                "is_timestamp": False,
+                "reason": "could_not_extract_values",
+            }
+        # Test various timestamp formats
+        formats_to_test = [
+            ("unix_seconds", 1, 1),
+            ("unix_milliseconds", 1000, 1),
+            ("unix_microseconds", 1_000_000, 1),
+            ("unix_nanoseconds", 1_000_000_000, 1),
+            ("gps_time", 1, 315964800),  # GPS epoch offset
+            ("ntp_time", 1, 2208988800),  # NTP epoch offset
+        ]
+        best_format = None
+        best_confidence = 0.0
+        best_decoded = []
+        for format_name, divisor, epoch_offset in formats_to_test:
+            confidence, decoded = self._test_format(values, format_name, divisor, epoch_offset)
+            if confidence > best_confidence:
+                best_confidence = confidence
+                best_format = format_name
+                best_decoded = decoded
+        if best_confidence < 0.6:
+            return {
+                "is_timestamp": False,
+                "reason": "low_confidence",
+                "tested_formats": [f[0] for f in formats_to_test],
+            }
+        # Extract time range
+        time_range = self._get_time_range(best_decoded)
+        return {
+            "is_timestamp": True,
+            "format": best_format,
+            "confidence": best_confidence,
+            "sample_decoded": str(best_decoded[0]) if best_decoded else None,
+            "time_range": {
+                "start": str(time_range[0]) if time_range[0] else None,
+                "end": str(time_range[1]) if time_range[1] else None,
+                "duration_seconds": (
+                    (time_range[1] - time_range[0]).total_seconds()
+                    if time_range[0] and time_range[1]
+                    else 0
+                ),
+            },
+            "monotonic": self._is_monotonic(values),
+        }
+    def _extract_values(self, field: Field, messages: list[Message]) -> list[int]:
+        """Extract field values as integers.
+        Args:
+            field: Field specification.
+            messages: List of messages.
+        Returns:
+            List of integer values.
+        """
+        values = []
+        for msg in messages:
+            if field.offset + field.length <= len(msg.data):
+                field_bytes = msg.data[field.offset : field.offset + field.length]
+                try:
+                    if field.length in [4, 8]:
+                        value = int.from_bytes(field_bytes, byteorder="little", signed=False)
+                        values.append(value)
+                except Exception:
+                    continue
+        return values
+    def _test_format(
+        self,
+        values: list[int],
+        format_name: str,
+        divisor: int,
+        epoch_offset: int,
+    ) -> tuple[float, list[datetime]]:
+        """Test if values match a timestamp format.
+        Args:
+            values: Raw timestamp values.
+            format_name: Format name being tested.
+            divisor: Divisor to convert to seconds.
+            epoch_offset: Epoch offset in seconds.
+        Returns:
+            (confidence, decoded_timestamps).
+        """
+        decoded = []
+        valid_count = 0
+        for val in values[:100]:  # Test first 100
+            try:
+                # Convert to Unix seconds
+                unix_seconds = (val / divisor) - epoch_offset
+                # Create datetime
+                dt = datetime.fromtimestamp(unix_seconds, tz=UTC)
+                # Validate: reasonable date range (2000-2030)
+                if datetime(2000, 1, 1, tzinfo=UTC) <= dt <= datetime(2030, 12, 31, tzinfo=UTC):
+                    decoded.append(dt)
+                    valid_count += 1
+            except Exception:
+                pass
+        # Calculate confidence
+        if len(values) == 0:
+            return 0.0, []
+        confidence = valid_count / min(len(values), 100)
+        # Check monotonic increase (timestamps should increase)
+        if len(decoded) > 1:
+            monotonic_count = sum(
+                1 for i in range(len(decoded) - 1) if decoded[i] <= decoded[i + 1]
+            )
+            monotonic_ratio = monotonic_count / (len(decoded) - 1)
+            confidence *= monotonic_ratio
+        return confidence, decoded
+    def _get_time_range(self, decoded: list[datetime]) -> tuple[datetime | None, datetime | None]:
+        """Get time range from decoded timestamps.
+        Args:
+            decoded: List of datetime objects.
+        Returns:
+            (start, end) tuple.
+        """
+        if not decoded:
+            return None, None
+        return min(decoded), max(decoded)
+    def _is_monotonic(self, values: list[int]) -> bool:
+        """Check if values are monotonically increasing.
+        Args:
+            values: List of values.
+        Returns:
+            True if monotonic.
+        """
+        if len(values) < 2:
+            return True
+        increasing = sum(1 for i in range(len(values) - 1) if values[i] <= values[i + 1])
+        return increasing / (len(values) - 1) > 0.95

oscura/analyzers/binary/visualization/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Visualization tools for binary analysis."""
+from __future__ import annotations
+from oscura.analyzers.binary.visualization.structure_view import StructureVisualizer
+__all__ = [
+    "StructureVisualizer",
+]

oscura/analyzers/binary/visualization/structure_view.py ADDED Viewed

@@ -0,0 +1,182 @@
+"""Structure visualization for binary analysis results."""
+from __future__ import annotations
+from pathlib import Path
+from typing import TYPE_CHECKING, ClassVar
+import matplotlib.patches as mpatches
+import matplotlib.pyplot as plt
+if TYPE_CHECKING:
+    from matplotlib.axes import Axes
+    from oscura.analyzers.binary.core.results import BinaryAnalysisResult
+class StructureVisualizer:
+    """Create visual representations of binary file structure.
+    Generates diagrams showing message layout, field boundaries,
+    and field types with color coding.
+    Example:
+        >>> visualizer = StructureVisualizer()
+        >>> visualizer.visualize_structure(analysis_result, "structure.png")
+    """
+    # Field type color map
+    FIELD_COLORS: ClassVar[dict[str, str]] = {
+        "CONSTANT": "#ff6b6b",  # Red
+        "SEQUENCE": "#4ecdc4",  # Cyan
+        "TIMESTAMP": "#45b7d1",  # Blue
+        "CHECKSUM": "#96ceb4",  # Green
+        "LENGTH": "#ffeaa7",  # Yellow
+        "PAYLOAD": "#f0f0f0",  # Light gray
+        "UNKNOWN": "#dfe6e9",  # Gray
+    }
+    def visualize_structure(self, result: BinaryAnalysisResult, output_path: str | Path) -> None:
+        """Create structure visualization.
+        Args:
+            result: Binary analysis result.
+            output_path: Path to save visualization.
+        Example:
+            >>> visualizer = StructureVisualizer()
+            >>> visualizer.visualize_structure(results, "output.png")
+        """
+        if not result.structure or not result.structure.has_messages:
+            # Create simple "No Structure" message
+            fig, ax = plt.subplots(figsize=(10, 2))
+            ax.text(
+                0.5,
+                0.5,
+                "No Message Structure Detected",
+                ha="center",
+                va="center",
+                fontsize=16,
+            )
+            ax.axis("off")
+            plt.savefig(output_path, dpi=150, bbox_inches="tight")
+            plt.close()
+            return
+        # Create figure with message layout diagram
+        fig, ax = plt.subplots(figsize=(14, 8))
+        # Draw message structure
+        self._draw_message_layout(ax, result)
+        msg_len = result.structure.message_length or 100
+        ax.set_xlim(-0.5, msg_len + 0.5)
+        ax.set_ylim(-0.5, 3.5)
+        ax.axis("off")
+        plt.title(
+            f"Binary File Structure - Message Layout ({msg_len} bytes)",
+            fontsize=14,
+            fontweight="bold",
+        )
+        plt.tight_layout()
+        plt.savefig(output_path, dpi=150, bbox_inches="tight")
+        plt.close()
+    def _draw_message_layout(self, ax: Axes, result: BinaryAnalysisResult) -> None:
+        """Draw message layout with colored fields.
+        Args:
+            ax: Matplotlib axes.
+            result: Analysis result.
+        """
+        if not result.structure or not result.structure.fields:
+            return
+        message_length = result.structure.message_length or 0
+        fields = result.structure.fields
+        # Draw each field as a colored rectangle
+        for field in fields:
+            color = self.FIELD_COLORS.get(field.field_type.value.upper(), "#dfe6e9")
+            # Draw rectangle for field
+            rect = mpatches.Rectangle(
+                (field.offset, 1),
+                field.length,
+                1,
+                linewidth=1,
+                edgecolor="black",
+                facecolor=color,
+            )
+            ax.add_patch(rect)
+            # Add field label
+            field_center = field.offset + field.length / 2
+            ax.text(
+                field_center,
+                1.5,
+                field.name,
+                ha="center",
+                va="center",
+                fontsize=8,
+                fontweight="bold",
+            )
+            # Add field type
+            ax.text(
+                field_center,
+                1.2,
+                field.field_type.value,
+                ha="center",
+                va="center",
+                fontsize=6,
+                style="italic",
+            )
+            # Add offset labels
+            ax.text(
+                field.offset,
+                0.8,
+                f"{field.offset}",
+                ha="center",
+                va="top",
+                fontsize=6,
+                color="gray",
+            )
+        # Add end offset
+        ax.text(
+            message_length,
+            0.8,
+            f"{message_length}",
+            ha="center",
+            va="top",
+            fontsize=6,
+            color="gray",
+        )
+        # Add legend
+        legend_elements = [
+            mpatches.Patch(facecolor=color, edgecolor="black", label=field_type)
+            for field_type, color in self.FIELD_COLORS.items()
+        ]
+        ax.legend(
+            handles=legend_elements,
+            loc="upper center",
+            bbox_to_anchor=(0.5, -0.05),
+            ncol=4,
+            frameon=False,
+        )
+        # Add byte scale
+        ax.text(
+            -0.3,
+            1.5,
+            "Bytes:",
+            ha="right",
+            va="center",
+            fontsize=8,
+            fontweight="bold",
+        )

oscura/automotive/__init__.py CHANGED Viewed

@@ -49,7 +49,7 @@ try:
     __version__ = version("oscura")
 except Exception:
     # Fallback for development/testing when package not installed
-    __version__ = "0.11.0"
+    __version__ = "0.12.0"
 __all__ = [
     "CANMessage",

oscura 0.11.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

oscura 0.11.0py3-none-any.whl → 0.12.0py3-none-any.whl