oscura 0.11.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. oscura/__init__.py +1 -1
  2. oscura/analyzers/binary/__init__.py +36 -0
  3. oscura/analyzers/binary/core/__init__.py +29 -0
  4. oscura/analyzers/binary/core/file_access.py +193 -0
  5. oscura/analyzers/binary/core/pipeline.py +161 -0
  6. oscura/analyzers/binary/core/results.py +217 -0
  7. oscura/analyzers/binary/detection/__init__.py +10 -0
  8. oscura/analyzers/binary/detection/encoding.py +624 -0
  9. oscura/analyzers/binary/detection/patterns.py +320 -0
  10. oscura/analyzers/binary/detection/structure.py +630 -0
  11. oscura/analyzers/binary/export/__init__.py +9 -0
  12. oscura/analyzers/binary/export/dissector.py +174 -0
  13. oscura/analyzers/binary/inference/__init__.py +15 -0
  14. oscura/analyzers/binary/inference/checksums.py +214 -0
  15. oscura/analyzers/binary/inference/fields.py +150 -0
  16. oscura/analyzers/binary/inference/sequences.py +232 -0
  17. oscura/analyzers/binary/inference/timestamps.py +210 -0
  18. oscura/analyzers/binary/visualization/__init__.py +9 -0
  19. oscura/analyzers/binary/visualization/structure_view.py +182 -0
  20. oscura/automotive/__init__.py +1 -1
  21. oscura/automotive/dtc/data.json +102 -17
  22. oscura/core/schemas/device_mapping.json +8 -2
  23. oscura/core/schemas/packet_format.json +24 -4
  24. oscura/core/schemas/protocol_definition.json +12 -2
  25. oscura/loaders/__init__.py +4 -1
  26. oscura/loaders/binary.py +284 -1
  27. oscura/sessions/legacy.py +80 -19
  28. {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/METADATA +3 -3
  29. {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/RECORD +32 -14
  30. {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/WHEEL +0 -0
  31. {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/entry_points.txt +0 -0
  32. {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,232 @@
1
+ """Sequence number field analysis."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ if TYPE_CHECKING:
8
+ from oscura.analyzers.binary.core.results import Field, Message
9
+
10
+
11
+ class SequenceAnalyzer:
12
+ """Analyze sequence number fields.
13
+
14
+ Detects monotonic counters, wrapping behavior, gaps, and duplicates.
15
+
16
+ Example:
17
+ >>> analyzer = SequenceAnalyzer()
18
+ >>> metadata = analyzer.analyze(field, messages)
19
+ >>> if metadata["is_sequence"]:
20
+ ... print(f"Sequence: {metadata['start']} + {metadata['increment']}")
21
+ """
22
+
23
+ def analyze(self, field: Field, messages: list[Message]) -> dict[str, Any]:
24
+ """Analyze sequence number field.
25
+
26
+ Args:
27
+ field: Field to analyze.
28
+ messages: List of messages containing field.
29
+
30
+ Returns:
31
+ Metadata dict with sequence information.
32
+
33
+ Example:
34
+ >>> result = analyzer.analyze(field, messages)
35
+ >>> result["is_sequence"] # True if monotonic
36
+ >>> result["gaps"] # List of missing values
37
+ """
38
+ if len(messages) < 3:
39
+ return {
40
+ "is_sequence": False,
41
+ "reason": "insufficient_data",
42
+ }
43
+
44
+ # Extract field values from all messages
45
+ values = self._extract_values(field, messages)
46
+
47
+ if len(values) < 3:
48
+ return {
49
+ "is_sequence": False,
50
+ "reason": "could_not_extract_values",
51
+ }
52
+
53
+ # Check if monotonic increasing
54
+ is_monotonic = self._is_monotonic_increasing(values)
55
+
56
+ if not is_monotonic:
57
+ return {
58
+ "is_sequence": False,
59
+ "reason": "not_monotonic",
60
+ }
61
+
62
+ # Analyze sequence characteristics
63
+ start_value = int(values[0])
64
+ increment = self._detect_increment(values)
65
+ gaps = self._detect_gaps(values, increment)
66
+ duplicates = self._detect_duplicates(values)
67
+ wraps = self._detect_wrapping(values, field.length)
68
+
69
+ return {
70
+ "is_sequence": True,
71
+ "start_value": start_value,
72
+ "end_value": int(values[-1]),
73
+ "increment": increment,
74
+ "total_values": len(values),
75
+ "unique_values": len(set(values)),
76
+ "gaps": gaps,
77
+ "duplicates": duplicates,
78
+ "wrapping_detected": wraps > 0,
79
+ "wrap_count": wraps,
80
+ }
81
+
82
+ def _extract_values(self, field: Field, messages: list[Message]) -> list[int]:
83
+ """Extract field values from messages.
84
+
85
+ Args:
86
+ field: Field specification.
87
+ messages: List of messages.
88
+
89
+ Returns:
90
+ List of integer values.
91
+ """
92
+ values = []
93
+
94
+ for msg in messages:
95
+ if field.offset + field.length <= len(msg.data):
96
+ field_bytes = msg.data[field.offset : field.offset + field.length]
97
+
98
+ try:
99
+ # Try interpreting as different integer types
100
+ if field.length == 1:
101
+ value = int(field_bytes[0])
102
+ elif field.length == 2 or field.length == 4 or field.length == 8:
103
+ value = int.from_bytes(field_bytes, byteorder="little", signed=False)
104
+ else:
105
+ continue
106
+
107
+ values.append(value)
108
+ except Exception:
109
+ continue
110
+
111
+ return values
112
+
113
+ def _is_monotonic_increasing(self, values: list[int]) -> bool:
114
+ """Check if values are monotonically increasing (allowing wraps).
115
+
116
+ Args:
117
+ values: List of values.
118
+
119
+ Returns:
120
+ True if mostly increasing.
121
+ """
122
+ if len(values) < 2:
123
+ return False
124
+
125
+ increasing_count = 0
126
+ total_pairs = 0
127
+
128
+ for i in range(len(values) - 1):
129
+ diff = values[i + 1] - values[i]
130
+
131
+ # Allow small decreases for wrapping
132
+ if diff >= 0 or diff < -1000: # Likely wrapped
133
+ increasing_count += 1
134
+
135
+ total_pairs += 1
136
+
137
+ # Require 95% increasing
138
+ return increasing_count / total_pairs > 0.95
139
+
140
+ def _detect_increment(self, values: list[int]) -> int:
141
+ """Detect common increment value.
142
+
143
+ Args:
144
+ values: List of values.
145
+
146
+ Returns:
147
+ Most common increment.
148
+ """
149
+ if len(values) < 2:
150
+ return 1
151
+
152
+ diffs = [values[i + 1] - values[i] for i in range(len(values) - 1)]
153
+
154
+ # Filter out negative diffs (wraps)
155
+ positive_diffs = [d for d in diffs if d > 0 and d < 1000]
156
+
157
+ if not positive_diffs:
158
+ return 1
159
+
160
+ # Find most common diff
161
+ from collections import Counter
162
+
163
+ counter = Counter(positive_diffs)
164
+ return counter.most_common(1)[0][0]
165
+
166
+ def _detect_gaps(self, values: list[int], increment: int) -> list[int]:
167
+ """Detect missing values in sequence.
168
+
169
+ Args:
170
+ values: List of values.
171
+ increment: Expected increment.
172
+
173
+ Returns:
174
+ List of missing values.
175
+ """
176
+ gaps = []
177
+
178
+ for i in range(len(values) - 1):
179
+ expected_next = values[i] + increment
180
+ actual_next = values[i + 1]
181
+
182
+ if actual_next != expected_next and actual_next > expected_next:
183
+ # There's a gap
184
+ for missing in range(expected_next, actual_next, increment):
185
+ gaps.append(missing)
186
+
187
+ return gaps[:100] # Limit to first 100 gaps
188
+
189
+ def _detect_duplicates(self, values: list[int]) -> list[int]:
190
+ """Detect duplicate values.
191
+
192
+ Args:
193
+ values: List of values.
194
+
195
+ Returns:
196
+ List of duplicate values.
197
+ """
198
+ from collections import Counter
199
+
200
+ counter = Counter(values)
201
+ duplicates = [val for val, count in counter.items() if count > 1]
202
+
203
+ return duplicates[:100] # Limit to first 100
204
+
205
+ def _detect_wrapping(self, values: list[int], field_length: int) -> int:
206
+ """Detect counter wrapping.
207
+
208
+ Args:
209
+ values: List of values.
210
+ field_length: Field length in bytes.
211
+
212
+ Returns:
213
+ Number of detected wraps.
214
+ """
215
+ # Determine max value for field length
216
+ if field_length == 1:
217
+ max_val = 255
218
+ elif field_length == 2:
219
+ max_val = 65535
220
+ elif field_length == 4:
221
+ max_val = 4294967295
222
+ else:
223
+ return 0
224
+
225
+ wrap_count = 0
226
+
227
+ for i in range(len(values) - 1):
228
+ # Detect wrap: value decreases significantly
229
+ if values[i] > values[i + 1] and values[i] > max_val * 0.9:
230
+ wrap_count += 1
231
+
232
+ return wrap_count
@@ -0,0 +1,210 @@
1
+ """Timestamp field analysis."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import UTC, datetime
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ if TYPE_CHECKING:
9
+ from oscura.analyzers.binary.core.results import Field, Message
10
+
11
+
12
+ class TimestampAnalyzer:
13
+ """Analyze timestamp fields.
14
+
15
+ Detects various timestamp formats and validates temporal consistency.
16
+
17
+ Example:
18
+ >>> analyzer = TimestampAnalyzer()
19
+ >>> metadata = analyzer.analyze(field, messages)
20
+ >>> if metadata["is_timestamp"]:
21
+ ... print(f"Format: {metadata['format']}")
22
+ ... print(f"Sample: {metadata['sample_decoded']}")
23
+ """
24
+
25
+ def analyze(self, field: Field, messages: list[Message]) -> dict[str, Any]:
26
+ """Analyze timestamp field.
27
+
28
+ Args:
29
+ field: Field to analyze.
30
+ messages: List of messages containing field.
31
+
32
+ Returns:
33
+ Metadata dict with timestamp information.
34
+
35
+ Example:
36
+ >>> result = analyzer.analyze(field, messages)
37
+ >>> result["format"] # e.g., "unix_microseconds"
38
+ >>> result["time_range"] # (start, end) datetimes
39
+ """
40
+ if len(messages) < 3:
41
+ return {
42
+ "is_timestamp": False,
43
+ "reason": "insufficient_data",
44
+ }
45
+
46
+ # Extract field values
47
+ values = self._extract_values(field, messages)
48
+
49
+ if len(values) < 3:
50
+ return {
51
+ "is_timestamp": False,
52
+ "reason": "could_not_extract_values",
53
+ }
54
+
55
+ # Test various timestamp formats
56
+ formats_to_test = [
57
+ ("unix_seconds", 1, 1),
58
+ ("unix_milliseconds", 1000, 1),
59
+ ("unix_microseconds", 1_000_000, 1),
60
+ ("unix_nanoseconds", 1_000_000_000, 1),
61
+ ("gps_time", 1, 315964800), # GPS epoch offset
62
+ ("ntp_time", 1, 2208988800), # NTP epoch offset
63
+ ]
64
+
65
+ best_format = None
66
+ best_confidence = 0.0
67
+ best_decoded = []
68
+
69
+ for format_name, divisor, epoch_offset in formats_to_test:
70
+ confidence, decoded = self._test_format(values, format_name, divisor, epoch_offset)
71
+
72
+ if confidence > best_confidence:
73
+ best_confidence = confidence
74
+ best_format = format_name
75
+ best_decoded = decoded
76
+
77
+ if best_confidence < 0.6:
78
+ return {
79
+ "is_timestamp": False,
80
+ "reason": "low_confidence",
81
+ "tested_formats": [f[0] for f in formats_to_test],
82
+ }
83
+
84
+ # Extract time range
85
+ time_range = self._get_time_range(best_decoded)
86
+
87
+ return {
88
+ "is_timestamp": True,
89
+ "format": best_format,
90
+ "confidence": best_confidence,
91
+ "sample_decoded": str(best_decoded[0]) if best_decoded else None,
92
+ "time_range": {
93
+ "start": str(time_range[0]) if time_range[0] else None,
94
+ "end": str(time_range[1]) if time_range[1] else None,
95
+ "duration_seconds": (
96
+ (time_range[1] - time_range[0]).total_seconds()
97
+ if time_range[0] and time_range[1]
98
+ else 0
99
+ ),
100
+ },
101
+ "monotonic": self._is_monotonic(values),
102
+ }
103
+
104
+ def _extract_values(self, field: Field, messages: list[Message]) -> list[int]:
105
+ """Extract field values as integers.
106
+
107
+ Args:
108
+ field: Field specification.
109
+ messages: List of messages.
110
+
111
+ Returns:
112
+ List of integer values.
113
+ """
114
+ values = []
115
+
116
+ for msg in messages:
117
+ if field.offset + field.length <= len(msg.data):
118
+ field_bytes = msg.data[field.offset : field.offset + field.length]
119
+
120
+ try:
121
+ if field.length in [4, 8]:
122
+ value = int.from_bytes(field_bytes, byteorder="little", signed=False)
123
+ values.append(value)
124
+ except Exception:
125
+ continue
126
+
127
+ return values
128
+
129
+ def _test_format(
130
+ self,
131
+ values: list[int],
132
+ format_name: str,
133
+ divisor: int,
134
+ epoch_offset: int,
135
+ ) -> tuple[float, list[datetime]]:
136
+ """Test if values match a timestamp format.
137
+
138
+ Args:
139
+ values: Raw timestamp values.
140
+ format_name: Format name being tested.
141
+ divisor: Divisor to convert to seconds.
142
+ epoch_offset: Epoch offset in seconds.
143
+
144
+ Returns:
145
+ (confidence, decoded_timestamps).
146
+ """
147
+ decoded = []
148
+ valid_count = 0
149
+
150
+ for val in values[:100]: # Test first 100
151
+ try:
152
+ # Convert to Unix seconds
153
+ unix_seconds = (val / divisor) - epoch_offset
154
+
155
+ # Create datetime
156
+ dt = datetime.fromtimestamp(unix_seconds, tz=UTC)
157
+
158
+ # Validate: reasonable date range (2000-2030)
159
+ if datetime(2000, 1, 1, tzinfo=UTC) <= dt <= datetime(2030, 12, 31, tzinfo=UTC):
160
+ decoded.append(dt)
161
+ valid_count += 1
162
+ except Exception:
163
+ pass
164
+
165
+ # Calculate confidence
166
+ if len(values) == 0:
167
+ return 0.0, []
168
+
169
+ confidence = valid_count / min(len(values), 100)
170
+
171
+ # Check monotonic increase (timestamps should increase)
172
+ if len(decoded) > 1:
173
+ monotonic_count = sum(
174
+ 1 for i in range(len(decoded) - 1) if decoded[i] <= decoded[i + 1]
175
+ )
176
+ monotonic_ratio = monotonic_count / (len(decoded) - 1)
177
+
178
+ confidence *= monotonic_ratio
179
+
180
+ return confidence, decoded
181
+
182
+ def _get_time_range(self, decoded: list[datetime]) -> tuple[datetime | None, datetime | None]:
183
+ """Get time range from decoded timestamps.
184
+
185
+ Args:
186
+ decoded: List of datetime objects.
187
+
188
+ Returns:
189
+ (start, end) tuple.
190
+ """
191
+ if not decoded:
192
+ return None, None
193
+
194
+ return min(decoded), max(decoded)
195
+
196
+ def _is_monotonic(self, values: list[int]) -> bool:
197
+ """Check if values are monotonically increasing.
198
+
199
+ Args:
200
+ values: List of values.
201
+
202
+ Returns:
203
+ True if monotonic.
204
+ """
205
+ if len(values) < 2:
206
+ return True
207
+
208
+ increasing = sum(1 for i in range(len(values) - 1) if values[i] <= values[i + 1])
209
+
210
+ return increasing / (len(values) - 1) > 0.95
@@ -0,0 +1,9 @@
1
+ """Visualization tools for binary analysis."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from oscura.analyzers.binary.visualization.structure_view import StructureVisualizer
6
+
7
+ __all__ = [
8
+ "StructureVisualizer",
9
+ ]
@@ -0,0 +1,182 @@
1
+ """Structure visualization for binary analysis results."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING, ClassVar
7
+
8
+ import matplotlib.patches as mpatches
9
+ import matplotlib.pyplot as plt
10
+
11
+ if TYPE_CHECKING:
12
+ from matplotlib.axes import Axes
13
+
14
+ from oscura.analyzers.binary.core.results import BinaryAnalysisResult
15
+
16
+
17
+ class StructureVisualizer:
18
+ """Create visual representations of binary file structure.
19
+
20
+ Generates diagrams showing message layout, field boundaries,
21
+ and field types with color coding.
22
+
23
+ Example:
24
+ >>> visualizer = StructureVisualizer()
25
+ >>> visualizer.visualize_structure(analysis_result, "structure.png")
26
+ """
27
+
28
+ # Field type color map
29
+ FIELD_COLORS: ClassVar[dict[str, str]] = {
30
+ "CONSTANT": "#ff6b6b", # Red
31
+ "SEQUENCE": "#4ecdc4", # Cyan
32
+ "TIMESTAMP": "#45b7d1", # Blue
33
+ "CHECKSUM": "#96ceb4", # Green
34
+ "LENGTH": "#ffeaa7", # Yellow
35
+ "PAYLOAD": "#f0f0f0", # Light gray
36
+ "UNKNOWN": "#dfe6e9", # Gray
37
+ }
38
+
39
+ def visualize_structure(self, result: BinaryAnalysisResult, output_path: str | Path) -> None:
40
+ """Create structure visualization.
41
+
42
+ Args:
43
+ result: Binary analysis result.
44
+ output_path: Path to save visualization.
45
+
46
+ Example:
47
+ >>> visualizer = StructureVisualizer()
48
+ >>> visualizer.visualize_structure(results, "output.png")
49
+ """
50
+ if not result.structure or not result.structure.has_messages:
51
+ # Create simple "No Structure" message
52
+ fig, ax = plt.subplots(figsize=(10, 2))
53
+ ax.text(
54
+ 0.5,
55
+ 0.5,
56
+ "No Message Structure Detected",
57
+ ha="center",
58
+ va="center",
59
+ fontsize=16,
60
+ )
61
+ ax.axis("off")
62
+ plt.savefig(output_path, dpi=150, bbox_inches="tight")
63
+ plt.close()
64
+ return
65
+
66
+ # Create figure with message layout diagram
67
+ fig, ax = plt.subplots(figsize=(14, 8))
68
+
69
+ # Draw message structure
70
+ self._draw_message_layout(ax, result)
71
+
72
+ msg_len = result.structure.message_length or 100
73
+ ax.set_xlim(-0.5, msg_len + 0.5)
74
+ ax.set_ylim(-0.5, 3.5)
75
+ ax.axis("off")
76
+
77
+ plt.title(
78
+ f"Binary File Structure - Message Layout ({msg_len} bytes)",
79
+ fontsize=14,
80
+ fontweight="bold",
81
+ )
82
+
83
+ plt.tight_layout()
84
+ plt.savefig(output_path, dpi=150, bbox_inches="tight")
85
+ plt.close()
86
+
87
+ def _draw_message_layout(self, ax: Axes, result: BinaryAnalysisResult) -> None:
88
+ """Draw message layout with colored fields.
89
+
90
+ Args:
91
+ ax: Matplotlib axes.
92
+ result: Analysis result.
93
+ """
94
+ if not result.structure or not result.structure.fields:
95
+ return
96
+
97
+ message_length = result.structure.message_length or 0
98
+ fields = result.structure.fields
99
+
100
+ # Draw each field as a colored rectangle
101
+ for field in fields:
102
+ color = self.FIELD_COLORS.get(field.field_type.value.upper(), "#dfe6e9")
103
+
104
+ # Draw rectangle for field
105
+ rect = mpatches.Rectangle(
106
+ (field.offset, 1),
107
+ field.length,
108
+ 1,
109
+ linewidth=1,
110
+ edgecolor="black",
111
+ facecolor=color,
112
+ )
113
+ ax.add_patch(rect)
114
+
115
+ # Add field label
116
+ field_center = field.offset + field.length / 2
117
+ ax.text(
118
+ field_center,
119
+ 1.5,
120
+ field.name,
121
+ ha="center",
122
+ va="center",
123
+ fontsize=8,
124
+ fontweight="bold",
125
+ )
126
+
127
+ # Add field type
128
+ ax.text(
129
+ field_center,
130
+ 1.2,
131
+ field.field_type.value,
132
+ ha="center",
133
+ va="center",
134
+ fontsize=6,
135
+ style="italic",
136
+ )
137
+
138
+ # Add offset labels
139
+ ax.text(
140
+ field.offset,
141
+ 0.8,
142
+ f"{field.offset}",
143
+ ha="center",
144
+ va="top",
145
+ fontsize=6,
146
+ color="gray",
147
+ )
148
+
149
+ # Add end offset
150
+ ax.text(
151
+ message_length,
152
+ 0.8,
153
+ f"{message_length}",
154
+ ha="center",
155
+ va="top",
156
+ fontsize=6,
157
+ color="gray",
158
+ )
159
+
160
+ # Add legend
161
+ legend_elements = [
162
+ mpatches.Patch(facecolor=color, edgecolor="black", label=field_type)
163
+ for field_type, color in self.FIELD_COLORS.items()
164
+ ]
165
+ ax.legend(
166
+ handles=legend_elements,
167
+ loc="upper center",
168
+ bbox_to_anchor=(0.5, -0.05),
169
+ ncol=4,
170
+ frameon=False,
171
+ )
172
+
173
+ # Add byte scale
174
+ ax.text(
175
+ -0.3,
176
+ 1.5,
177
+ "Bytes:",
178
+ ha="right",
179
+ va="center",
180
+ fontsize=8,
181
+ fontweight="bold",
182
+ )
@@ -49,7 +49,7 @@ try:
49
49
  __version__ = version("oscura")
50
50
  except Exception:
51
51
  # Fallback for development/testing when package not installed
52
- __version__ = "0.11.0"
52
+ __version__ = "0.12.0"
53
53
 
54
54
  __all__ = [
55
55
  "CANMessage",