oscura 0.11.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. oscura/__init__.py +1 -1
  2. oscura/analyzers/binary/__init__.py +36 -0
  3. oscura/analyzers/binary/core/__init__.py +29 -0
  4. oscura/analyzers/binary/core/file_access.py +193 -0
  5. oscura/analyzers/binary/core/pipeline.py +161 -0
  6. oscura/analyzers/binary/core/results.py +217 -0
  7. oscura/analyzers/binary/detection/__init__.py +10 -0
  8. oscura/analyzers/binary/detection/encoding.py +624 -0
  9. oscura/analyzers/binary/detection/patterns.py +320 -0
  10. oscura/analyzers/binary/detection/structure.py +630 -0
  11. oscura/analyzers/binary/export/__init__.py +9 -0
  12. oscura/analyzers/binary/export/dissector.py +174 -0
  13. oscura/analyzers/binary/inference/__init__.py +15 -0
  14. oscura/analyzers/binary/inference/checksums.py +214 -0
  15. oscura/analyzers/binary/inference/fields.py +150 -0
  16. oscura/analyzers/binary/inference/sequences.py +232 -0
  17. oscura/analyzers/binary/inference/timestamps.py +210 -0
  18. oscura/analyzers/binary/visualization/__init__.py +9 -0
  19. oscura/analyzers/binary/visualization/structure_view.py +182 -0
  20. oscura/automotive/__init__.py +1 -1
  21. oscura/automotive/dtc/data.json +102 -17
  22. oscura/core/schemas/device_mapping.json +8 -2
  23. oscura/core/schemas/packet_format.json +24 -4
  24. oscura/core/schemas/protocol_definition.json +12 -2
  25. oscura/loaders/__init__.py +4 -1
  26. oscura/loaders/binary.py +284 -1
  27. oscura/sessions/legacy.py +80 -19
  28. {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/METADATA +3 -3
  29. {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/RECORD +32 -14
  30. {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/WHEEL +0 -0
  31. {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/entry_points.txt +0 -0
  32. {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,174 @@
1
+ """Dissector generation for binary protocols."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ from oscura.analyzers.binary.core.results import BinaryAnalysisResult, Field
10
+
11
+
12
+ class DissectorGenerator:
13
+ """Generate protocol dissectors from binary analysis results.
14
+
15
+ Supports Wireshark Lua and Kaitai Struct formats.
16
+
17
+ Example:
18
+ >>> generator = DissectorGenerator()
19
+ >>> generator.generate_wireshark_lua(results, "protocol.lua")
20
+ >>> generator.generate_kaitai_struct(results, "protocol.ksy")
21
+ """
22
+
23
+ def generate_wireshark_lua(self, result: BinaryAnalysisResult, output_path: str | Path) -> None:
24
+ """Generate Wireshark Lua dissector.
25
+
26
+ Args:
27
+ result: Binary analysis result.
28
+ output_path: Path to save Lua dissector.
29
+
30
+ Example:
31
+ >>> generator = DissectorGenerator()
32
+ >>> generator.generate_wireshark_lua(results, "custom.lua")
33
+ """
34
+ if not result.structure or not result.structure.has_messages:
35
+ # Create minimal dissector
36
+ lua_code = """-- No structure detected
37
+ -- This is a placeholder dissector
38
+
39
+ local proto = Proto("custom", "Custom Protocol")
40
+
41
+ function proto.dissector(buffer, pinfo, tree)
42
+ pinfo.cols.protocol = "CUSTOM"
43
+ local subtree = tree:add(proto, buffer(), "Custom Protocol (No structure detected)")
44
+ end
45
+
46
+ DissectorTable.get("wtap_encap"):add(wtap.USER0, proto)
47
+ """
48
+ Path(output_path).write_text(lua_code)
49
+ return
50
+
51
+ # Generate full dissector
52
+ fields = result.structure.fields
53
+ message_length = result.structure.message_length
54
+
55
+ lua_code = f"""-- Auto-generated Wireshark dissector
56
+ -- Generated by Oscura Binary Analysis
57
+ -- Message length: {message_length} bytes
58
+
59
+ local proto = Proto("custom", "Custom Binary Protocol")
60
+
61
+ -- Field definitions
62
+ """
63
+
64
+ # Add field definitions
65
+ for field in fields:
66
+ field_name = field.name.replace(" ", "_").lower()
67
+ lua_code += f'proto.fields.{field_name} = ProtoField.bytes("custom.{field_name}", "{field.name}", base.HEX)\n'
68
+
69
+ lua_code += """
70
+ -- Dissector function
71
+ function proto.dissector(buffer, pinfo, tree)
72
+ local length = buffer:len()
73
+ if length == 0 then return end
74
+
75
+ pinfo.cols.protocol = proto.name
76
+
77
+ local subtree = tree:add(proto, buffer(), "Custom Binary Protocol")
78
+
79
+ """
80
+
81
+ # Add field parsing
82
+ for field in fields:
83
+ field_name = field.name.replace(" ", "_").lower()
84
+ lua_code += f" -- {field.name} ({field.field_type.value})\n"
85
+ lua_code += f" if length >= {field.offset + field.length} then\n"
86
+ lua_code += f" subtree:add(proto.fields.{field_name}, buffer({field.offset}, {field.length}))\n"
87
+ lua_code += " end\n\n"
88
+
89
+ lua_code += """end
90
+
91
+ -- Register dissector
92
+ DissectorTable.get("wtap_encap"):add(wtap.USER0, proto)
93
+
94
+ -- Also register for UDP port (example)
95
+ -- local udp_table = DissectorTable.get("udp.port")
96
+ -- udp_table:add(12345, proto)
97
+ """
98
+
99
+ Path(output_path).write_text(lua_code)
100
+
101
+ def generate_kaitai_struct(self, result: BinaryAnalysisResult, output_path: str | Path) -> None:
102
+ """Generate Kaitai Struct YAML definition.
103
+
104
+ Args:
105
+ result: Binary analysis result.
106
+ output_path: Path to save Kaitai Struct definition.
107
+
108
+ Example:
109
+ >>> generator = DissectorGenerator()
110
+ >>> generator.generate_kaitai_struct(results, "protocol.ksy")
111
+ """
112
+ if not result.structure or not result.structure.has_messages:
113
+ # Create minimal definition
114
+ ksy_content = """meta:
115
+ id: custom_protocol
116
+ title: Custom Binary Protocol
117
+ endian: le
118
+
119
+ doc: |
120
+ No structure detected. This is a placeholder definition.
121
+
122
+ seq: []
123
+ """
124
+ Path(output_path).write_text(ksy_content)
125
+ return
126
+
127
+ # Generate full definition
128
+ fields = result.structure.fields
129
+
130
+ ksy_content = f"""meta:
131
+ id: custom_protocol
132
+ title: Custom Binary Protocol
133
+ endian: le
134
+
135
+ doc: |
136
+ Auto-generated Kaitai Struct definition from Oscura Binary Analysis.
137
+ Message length: {result.structure.message_length} bytes
138
+ Messages detected: {result.structure.message_count}
139
+
140
+ seq:
141
+ """
142
+
143
+ # Add field definitions
144
+ for field in fields:
145
+ field_name = field.name.replace(" ", "_").lower()
146
+ ksy_type = self._map_field_to_kaitai_type(field)
147
+
148
+ ksy_content += f" - id: {field_name}\n"
149
+ ksy_content += f" type: {ksy_type}\n"
150
+ ksy_content += f" doc: '{field.field_type.value} field at offset {field.offset}'\n"
151
+
152
+ Path(output_path).write_text(ksy_content)
153
+
154
+ def _map_field_to_kaitai_type(self, field: Field) -> str:
155
+ """Map field to Kaitai Struct type.
156
+
157
+ Args:
158
+ field: Field to map.
159
+
160
+ Returns:
161
+ Kaitai type string.
162
+ """
163
+ # Map by field length
164
+ if field.length == 1:
165
+ return "u1"
166
+ elif field.length == 2:
167
+ return "u2"
168
+ elif field.length == 4:
169
+ return "u4"
170
+ elif field.length == 8:
171
+ return "u8"
172
+ else:
173
+ # Variable or custom length
174
+ return f"bytes({field.length})" if field.length < 1024 else "bytes_eos"
@@ -0,0 +1,15 @@
1
+ """Semantic analysis for binary fields."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from oscura.analyzers.binary.inference.checksums import ChecksumAnalyzer
6
+ from oscura.analyzers.binary.inference.fields import SemanticAnalyzer
7
+ from oscura.analyzers.binary.inference.sequences import SequenceAnalyzer
8
+ from oscura.analyzers.binary.inference.timestamps import TimestampAnalyzer
9
+
10
+ __all__ = [
11
+ "ChecksumAnalyzer",
12
+ "SemanticAnalyzer",
13
+ "SequenceAnalyzer",
14
+ "TimestampAnalyzer",
15
+ ]
@@ -0,0 +1,214 @@
1
+ """Checksum field analysis."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import zlib
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ if TYPE_CHECKING:
9
+ from oscura.analyzers.binary.core.results import Field, Message
10
+
11
+
12
+ class ChecksumAnalyzer:
13
+ """Analyze checksum fields.
14
+
15
+ Tests various checksum algorithms and validates against message data.
16
+
17
+ Example:
18
+ >>> analyzer = ChecksumAnalyzer()
19
+ >>> metadata = analyzer.analyze(field, messages)
20
+ >>> if metadata["algorithm"] != "unknown":
21
+ ... print(f"Algorithm: {metadata['algorithm']}")
22
+ ... print(f"Match rate: {metadata['match_rate']:.1%}")
23
+ """
24
+
25
+ def analyze(self, field: Field, messages: list[Message]) -> dict[str, Any]:
26
+ """Analyze checksum field.
27
+
28
+ Args:
29
+ field: Field to analyze.
30
+ messages: List of messages containing field.
31
+
32
+ Returns:
33
+ Metadata dict with checksum information.
34
+
35
+ Example:
36
+ >>> result = analyzer.analyze(field, messages)
37
+ >>> result["algorithm"] # e.g., "crc32"
38
+ >>> result["match_rate"] # e.g., 0.997
39
+ """
40
+ if len(messages) < 5:
41
+ return {
42
+ "algorithm": "unknown",
43
+ "reason": "insufficient_data",
44
+ "match_rate": 0.0,
45
+ }
46
+
47
+ # Test various algorithms
48
+ algorithms = []
49
+
50
+ if field.length == 1:
51
+ algorithms = ["sum8", "xor8"]
52
+ elif field.length == 2:
53
+ algorithms = ["crc16", "fletcher16", "sum16"]
54
+ elif field.length == 4:
55
+ algorithms = ["crc32", "adler32", "sum32"]
56
+ else:
57
+ return {
58
+ "algorithm": "unknown",
59
+ "reason": "unsupported_length",
60
+ "match_rate": 0.0,
61
+ }
62
+
63
+ best_algorithm = "unknown"
64
+ best_match_rate = 0.0
65
+ best_matches = 0
66
+
67
+ for algorithm in algorithms:
68
+ match_rate, matches = self._test_algorithm(algorithm, field, messages)
69
+
70
+ if match_rate > best_match_rate:
71
+ best_match_rate = match_rate
72
+ best_algorithm = algorithm
73
+ best_matches = matches
74
+
75
+ # Require >95% match rate for confirmation
76
+ if best_match_rate < 0.95:
77
+ return {
78
+ "algorithm": "unknown",
79
+ "reason": "low_match_rate",
80
+ "match_rate": best_match_rate,
81
+ "tested_algorithms": algorithms,
82
+ }
83
+
84
+ return {
85
+ "algorithm": best_algorithm,
86
+ "match_rate": best_match_rate,
87
+ "validated_count": best_matches,
88
+ "total_messages": len(messages),
89
+ "confidence": min(1.0, best_match_rate),
90
+ }
91
+
92
+ def _test_algorithm(
93
+ self, algorithm: str, field: Field, messages: list[Message]
94
+ ) -> tuple[float, int]:
95
+ """Test a checksum algorithm against messages.
96
+
97
+ Args:
98
+ algorithm: Algorithm name to test.
99
+ field: Checksum field.
100
+ messages: List of messages.
101
+
102
+ Returns:
103
+ (match_rate, match_count) tuple.
104
+ """
105
+ matches = 0
106
+ tested = 0
107
+
108
+ for msg in messages[:100]: # Test first 100 messages
109
+ if field.offset + field.length > len(msg.data):
110
+ continue
111
+
112
+ # Extract expected checksum from field
113
+ expected_bytes = msg.data[field.offset : field.offset + field.length]
114
+ expected = int.from_bytes(expected_bytes, byteorder="little", signed=False)
115
+
116
+ # Calculate checksum over payload (excluding checksum field)
117
+ # Try checksumming data before and after the field
118
+ payload_before = msg.data[: field.offset]
119
+ payload_after = msg.data[field.offset + field.length :]
120
+
121
+ # Most common: checksum covers everything except itself
122
+ payload = payload_before + payload_after
123
+
124
+ # Calculate checksum
125
+ calculated = self._calculate_checksum(algorithm, payload)
126
+
127
+ if calculated == expected:
128
+ matches += 1
129
+
130
+ tested += 1
131
+
132
+ match_rate = matches / tested if tested > 0 else 0.0
133
+ return match_rate, matches
134
+
135
+ def _calculate_checksum(self, algorithm: str, data: bytes) -> int:
136
+ """Calculate checksum using specified algorithm.
137
+
138
+ Args:
139
+ algorithm: Algorithm name.
140
+ data: Data to checksum.
141
+
142
+ Returns:
143
+ Checksum value.
144
+ """
145
+ if algorithm == "crc32":
146
+ return zlib.crc32(data) & 0xFFFFFFFF
147
+
148
+ elif algorithm == "adler32":
149
+ return zlib.adler32(data) & 0xFFFFFFFF
150
+
151
+ elif algorithm == "crc16":
152
+ return self._crc16(data)
153
+
154
+ elif algorithm == "fletcher16":
155
+ return self._fletcher16(data)
156
+
157
+ elif algorithm == "sum8":
158
+ return sum(data) & 0xFF
159
+
160
+ elif algorithm == "xor8":
161
+ result = 0
162
+ for b in data:
163
+ result ^= b
164
+ return result & 0xFF
165
+
166
+ elif algorithm == "sum16":
167
+ return sum(data) & 0xFFFF
168
+
169
+ elif algorithm == "sum32":
170
+ return sum(data) & 0xFFFFFFFF
171
+
172
+ return 0
173
+
174
+ def _crc16(self, data: bytes) -> int:
175
+ """Calculate CRC-16 (CCITT).
176
+
177
+ Args:
178
+ data: Input data.
179
+
180
+ Returns:
181
+ CRC-16 value.
182
+ """
183
+ crc = 0xFFFF
184
+
185
+ for byte in data:
186
+ crc ^= byte << 8
187
+
188
+ for _ in range(8):
189
+ if crc & 0x8000:
190
+ crc = (crc << 1) ^ 0x1021
191
+ else:
192
+ crc = crc << 1
193
+
194
+ crc &= 0xFFFF
195
+
196
+ return crc
197
+
198
+ def _fletcher16(self, data: bytes) -> int:
199
+ """Calculate Fletcher-16 checksum.
200
+
201
+ Args:
202
+ data: Input data.
203
+
204
+ Returns:
205
+ Fletcher-16 value.
206
+ """
207
+ sum1 = 0
208
+ sum2 = 0
209
+
210
+ for byte in data:
211
+ sum1 = (sum1 + byte) % 255
212
+ sum2 = (sum2 + sum1) % 255
213
+
214
+ return (sum2 << 8) | sum1
@@ -0,0 +1,150 @@
1
+ """Semantic field analysis coordinator."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ from oscura.analyzers.binary.core.results import FieldType
8
+ from oscura.analyzers.binary.inference.checksums import ChecksumAnalyzer
9
+ from oscura.analyzers.binary.inference.sequences import SequenceAnalyzer
10
+ from oscura.analyzers.binary.inference.timestamps import TimestampAnalyzer
11
+
12
+ if TYPE_CHECKING:
13
+ from oscura.analyzers.binary.core.file_access import BinaryFile
14
+ from oscura.analyzers.binary.core.results import Field, StructureResult
15
+
16
+
17
+ class SemanticAnalyzer:
18
+ """Coordinate semantic analysis of binary fields.
19
+
20
+ Enhances field information with semantic metadata including:
21
+ - Sequence number detection and analysis
22
+ - Timestamp format identification
23
+ - Checksum algorithm detection and validation
24
+ - Payload characteristics
25
+
26
+ Example:
27
+ >>> analyzer = SemanticAnalyzer()
28
+ >>> enhanced_fields = analyzer.analyze_fields(binary_file, structure)
29
+ >>> for field in enhanced_fields:
30
+ ... if field.field_type == FieldType.CHECKSUM:
31
+ ... print(f"Checksum: {field.metadata['algorithm']}")
32
+ """
33
+
34
+ def __init__(self) -> None:
35
+ """Initialize semantic analyzer."""
36
+ self.sequence_analyzer = SequenceAnalyzer()
37
+ self.timestamp_analyzer = TimestampAnalyzer()
38
+ self.checksum_analyzer = ChecksumAnalyzer()
39
+
40
+ def analyze_fields(self, file: BinaryFile, structure: StructureResult) -> list[Field]:
41
+ """Enhance fields with semantic analysis.
42
+
43
+ Args:
44
+ file: Binary file being analyzed.
45
+ structure: Structure inference result.
46
+
47
+ Returns:
48
+ List of enhanced fields with metadata.
49
+
50
+ Example:
51
+ >>> analyzer = SemanticAnalyzer()
52
+ >>> fields = analyzer.analyze_fields(binary_file, structure_result)
53
+ """
54
+ if not structure.has_messages or not structure.fields:
55
+ return structure.fields
56
+
57
+ enhanced_fields = []
58
+
59
+ for field in structure.fields:
60
+ # Analyze based on field type
61
+ if field.field_type == FieldType.SEQUENCE:
62
+ metadata = self.sequence_analyzer.analyze(field, structure.messages)
63
+ field.metadata.update(metadata)
64
+
65
+ elif field.field_type == FieldType.TIMESTAMP:
66
+ metadata = self.timestamp_analyzer.analyze(field, structure.messages)
67
+ field.metadata.update(metadata)
68
+
69
+ elif field.field_type == FieldType.CHECKSUM:
70
+ metadata = self.checksum_analyzer.analyze(field, structure.messages)
71
+ field.metadata.update(metadata)
72
+
73
+ elif field.field_type == FieldType.PAYLOAD:
74
+ metadata = self._analyze_payload(field, structure.messages)
75
+ field.metadata.update(metadata)
76
+
77
+ enhanced_fields.append(field)
78
+
79
+ return enhanced_fields
80
+
81
+ def _analyze_payload(self, field: Field, messages: list[Any]) -> dict[str, Any]:
82
+ """Analyze payload field characteristics.
83
+
84
+ Args:
85
+ field: Payload field.
86
+ messages: List of messages.
87
+
88
+ Returns:
89
+ Metadata dict with payload characteristics.
90
+ """
91
+ if len(messages) == 0:
92
+ return {"payload_type": "unknown"}
93
+
94
+ # Sample first few payloads
95
+ sample_size = min(10, len(messages))
96
+ entropies = []
97
+
98
+ for msg in messages[:sample_size]:
99
+ if field.offset + field.length <= len(msg.data):
100
+ payload = msg.data[field.offset : field.offset + field.length]
101
+ entropy = self._calculate_entropy(payload)
102
+ entropies.append(entropy)
103
+
104
+ if not entropies:
105
+ return {"payload_type": "unknown"}
106
+
107
+ avg_entropy = sum(entropies) / len(entropies)
108
+
109
+ # Classify payload type by entropy
110
+ if avg_entropy > 7.5:
111
+ payload_type = "compressed_or_encrypted"
112
+ elif avg_entropy > 5.0:
113
+ payload_type = "binary_data"
114
+ elif avg_entropy > 3.0:
115
+ payload_type = "mixed_content"
116
+ else:
117
+ payload_type = "low_entropy"
118
+
119
+ return {
120
+ "payload_type": payload_type,
121
+ "average_entropy": avg_entropy,
122
+ "entropy_bits_per_byte": avg_entropy,
123
+ }
124
+
125
+ def _calculate_entropy(self, data: bytes) -> float:
126
+ """Calculate Shannon entropy of data.
127
+
128
+ Args:
129
+ data: Input bytes.
130
+
131
+ Returns:
132
+ Entropy in bits per byte.
133
+ """
134
+ if len(data) == 0:
135
+ return 0.0
136
+
137
+ from collections import Counter
138
+
139
+ counts = Counter(data)
140
+ total = len(data)
141
+
142
+ entropy = 0.0
143
+ for count in counts.values():
144
+ p = count / total
145
+ if p > 0:
146
+ import math
147
+
148
+ entropy -= p * math.log2(p)
149
+
150
+ return entropy