oscura 0.11.0__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oscura/__init__.py +1 -1
- oscura/analyzers/binary/__init__.py +36 -0
- oscura/analyzers/binary/core/__init__.py +29 -0
- oscura/analyzers/binary/core/file_access.py +193 -0
- oscura/analyzers/binary/core/pipeline.py +161 -0
- oscura/analyzers/binary/core/results.py +217 -0
- oscura/analyzers/binary/detection/__init__.py +10 -0
- oscura/analyzers/binary/detection/encoding.py +624 -0
- oscura/analyzers/binary/detection/patterns.py +320 -0
- oscura/analyzers/binary/detection/structure.py +630 -0
- oscura/analyzers/binary/export/__init__.py +9 -0
- oscura/analyzers/binary/export/dissector.py +174 -0
- oscura/analyzers/binary/inference/__init__.py +15 -0
- oscura/analyzers/binary/inference/checksums.py +214 -0
- oscura/analyzers/binary/inference/fields.py +150 -0
- oscura/analyzers/binary/inference/sequences.py +232 -0
- oscura/analyzers/binary/inference/timestamps.py +210 -0
- oscura/analyzers/binary/visualization/__init__.py +9 -0
- oscura/analyzers/binary/visualization/structure_view.py +182 -0
- oscura/automotive/__init__.py +1 -1
- oscura/automotive/dtc/data.json +102 -17
- oscura/core/schemas/device_mapping.json +8 -2
- oscura/core/schemas/packet_format.json +24 -4
- oscura/core/schemas/protocol_definition.json +12 -2
- oscura/loaders/__init__.py +4 -1
- oscura/loaders/binary.py +284 -1
- oscura/sessions/legacy.py +80 -19
- {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/METADATA +3 -3
- {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/RECORD +32 -14
- {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/WHEEL +0 -0
- {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/entry_points.txt +0 -0
- {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""Dissector generation for binary protocols."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from oscura.analyzers.binary.core.results import BinaryAnalysisResult, Field
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DissectorGenerator:
|
|
13
|
+
"""Generate protocol dissectors from binary analysis results.
|
|
14
|
+
|
|
15
|
+
Supports Wireshark Lua and Kaitai Struct formats.
|
|
16
|
+
|
|
17
|
+
Example:
|
|
18
|
+
>>> generator = DissectorGenerator()
|
|
19
|
+
>>> generator.generate_wireshark_lua(results, "protocol.lua")
|
|
20
|
+
>>> generator.generate_kaitai_struct(results, "protocol.ksy")
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def generate_wireshark_lua(self, result: BinaryAnalysisResult, output_path: str | Path) -> None:
|
|
24
|
+
"""Generate Wireshark Lua dissector.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
result: Binary analysis result.
|
|
28
|
+
output_path: Path to save Lua dissector.
|
|
29
|
+
|
|
30
|
+
Example:
|
|
31
|
+
>>> generator = DissectorGenerator()
|
|
32
|
+
>>> generator.generate_wireshark_lua(results, "custom.lua")
|
|
33
|
+
"""
|
|
34
|
+
if not result.structure or not result.structure.has_messages:
|
|
35
|
+
# Create minimal dissector
|
|
36
|
+
lua_code = """-- No structure detected
|
|
37
|
+
-- This is a placeholder dissector
|
|
38
|
+
|
|
39
|
+
local proto = Proto("custom", "Custom Protocol")
|
|
40
|
+
|
|
41
|
+
function proto.dissector(buffer, pinfo, tree)
|
|
42
|
+
pinfo.cols.protocol = "CUSTOM"
|
|
43
|
+
local subtree = tree:add(proto, buffer(), "Custom Protocol (No structure detected)")
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
DissectorTable.get("wtap_encap"):add(wtap.USER0, proto)
|
|
47
|
+
"""
|
|
48
|
+
Path(output_path).write_text(lua_code)
|
|
49
|
+
return
|
|
50
|
+
|
|
51
|
+
# Generate full dissector
|
|
52
|
+
fields = result.structure.fields
|
|
53
|
+
message_length = result.structure.message_length
|
|
54
|
+
|
|
55
|
+
lua_code = f"""-- Auto-generated Wireshark dissector
|
|
56
|
+
-- Generated by Oscura Binary Analysis
|
|
57
|
+
-- Message length: {message_length} bytes
|
|
58
|
+
|
|
59
|
+
local proto = Proto("custom", "Custom Binary Protocol")
|
|
60
|
+
|
|
61
|
+
-- Field definitions
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
# Add field definitions
|
|
65
|
+
for field in fields:
|
|
66
|
+
field_name = field.name.replace(" ", "_").lower()
|
|
67
|
+
lua_code += f'proto.fields.{field_name} = ProtoField.bytes("custom.{field_name}", "{field.name}", base.HEX)\n'
|
|
68
|
+
|
|
69
|
+
lua_code += """
|
|
70
|
+
-- Dissector function
|
|
71
|
+
function proto.dissector(buffer, pinfo, tree)
|
|
72
|
+
local length = buffer:len()
|
|
73
|
+
if length == 0 then return end
|
|
74
|
+
|
|
75
|
+
pinfo.cols.protocol = proto.name
|
|
76
|
+
|
|
77
|
+
local subtree = tree:add(proto, buffer(), "Custom Binary Protocol")
|
|
78
|
+
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
# Add field parsing
|
|
82
|
+
for field in fields:
|
|
83
|
+
field_name = field.name.replace(" ", "_").lower()
|
|
84
|
+
lua_code += f" -- {field.name} ({field.field_type.value})\n"
|
|
85
|
+
lua_code += f" if length >= {field.offset + field.length} then\n"
|
|
86
|
+
lua_code += f" subtree:add(proto.fields.{field_name}, buffer({field.offset}, {field.length}))\n"
|
|
87
|
+
lua_code += " end\n\n"
|
|
88
|
+
|
|
89
|
+
lua_code += """end
|
|
90
|
+
|
|
91
|
+
-- Register dissector
|
|
92
|
+
DissectorTable.get("wtap_encap"):add(wtap.USER0, proto)
|
|
93
|
+
|
|
94
|
+
-- Also register for UDP port (example)
|
|
95
|
+
-- local udp_table = DissectorTable.get("udp.port")
|
|
96
|
+
-- udp_table:add(12345, proto)
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
Path(output_path).write_text(lua_code)
|
|
100
|
+
|
|
101
|
+
def generate_kaitai_struct(self, result: BinaryAnalysisResult, output_path: str | Path) -> None:
|
|
102
|
+
"""Generate Kaitai Struct YAML definition.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
result: Binary analysis result.
|
|
106
|
+
output_path: Path to save Kaitai Struct definition.
|
|
107
|
+
|
|
108
|
+
Example:
|
|
109
|
+
>>> generator = DissectorGenerator()
|
|
110
|
+
>>> generator.generate_kaitai_struct(results, "protocol.ksy")
|
|
111
|
+
"""
|
|
112
|
+
if not result.structure or not result.structure.has_messages:
|
|
113
|
+
# Create minimal definition
|
|
114
|
+
ksy_content = """meta:
|
|
115
|
+
id: custom_protocol
|
|
116
|
+
title: Custom Binary Protocol
|
|
117
|
+
endian: le
|
|
118
|
+
|
|
119
|
+
doc: |
|
|
120
|
+
No structure detected. This is a placeholder definition.
|
|
121
|
+
|
|
122
|
+
seq: []
|
|
123
|
+
"""
|
|
124
|
+
Path(output_path).write_text(ksy_content)
|
|
125
|
+
return
|
|
126
|
+
|
|
127
|
+
# Generate full definition
|
|
128
|
+
fields = result.structure.fields
|
|
129
|
+
|
|
130
|
+
ksy_content = f"""meta:
|
|
131
|
+
id: custom_protocol
|
|
132
|
+
title: Custom Binary Protocol
|
|
133
|
+
endian: le
|
|
134
|
+
|
|
135
|
+
doc: |
|
|
136
|
+
Auto-generated Kaitai Struct definition from Oscura Binary Analysis.
|
|
137
|
+
Message length: {result.structure.message_length} bytes
|
|
138
|
+
Messages detected: {result.structure.message_count}
|
|
139
|
+
|
|
140
|
+
seq:
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
# Add field definitions
|
|
144
|
+
for field in fields:
|
|
145
|
+
field_name = field.name.replace(" ", "_").lower()
|
|
146
|
+
ksy_type = self._map_field_to_kaitai_type(field)
|
|
147
|
+
|
|
148
|
+
ksy_content += f" - id: {field_name}\n"
|
|
149
|
+
ksy_content += f" type: {ksy_type}\n"
|
|
150
|
+
ksy_content += f" doc: '{field.field_type.value} field at offset {field.offset}'\n"
|
|
151
|
+
|
|
152
|
+
Path(output_path).write_text(ksy_content)
|
|
153
|
+
|
|
154
|
+
def _map_field_to_kaitai_type(self, field: Field) -> str:
|
|
155
|
+
"""Map field to Kaitai Struct type.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
field: Field to map.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Kaitai type string.
|
|
162
|
+
"""
|
|
163
|
+
# Map by field length
|
|
164
|
+
if field.length == 1:
|
|
165
|
+
return "u1"
|
|
166
|
+
elif field.length == 2:
|
|
167
|
+
return "u2"
|
|
168
|
+
elif field.length == 4:
|
|
169
|
+
return "u4"
|
|
170
|
+
elif field.length == 8:
|
|
171
|
+
return "u8"
|
|
172
|
+
else:
|
|
173
|
+
# Variable or custom length
|
|
174
|
+
return f"bytes({field.length})" if field.length < 1024 else "bytes_eos"
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Semantic analysis for binary fields."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from oscura.analyzers.binary.inference.checksums import ChecksumAnalyzer
|
|
6
|
+
from oscura.analyzers.binary.inference.fields import SemanticAnalyzer
|
|
7
|
+
from oscura.analyzers.binary.inference.sequences import SequenceAnalyzer
|
|
8
|
+
from oscura.analyzers.binary.inference.timestamps import TimestampAnalyzer
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"ChecksumAnalyzer",
|
|
12
|
+
"SemanticAnalyzer",
|
|
13
|
+
"SequenceAnalyzer",
|
|
14
|
+
"TimestampAnalyzer",
|
|
15
|
+
]
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""Checksum field analysis."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import zlib
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from oscura.analyzers.binary.core.results import Field, Message
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ChecksumAnalyzer:
|
|
13
|
+
"""Analyze checksum fields.
|
|
14
|
+
|
|
15
|
+
Tests various checksum algorithms and validates against message data.
|
|
16
|
+
|
|
17
|
+
Example:
|
|
18
|
+
>>> analyzer = ChecksumAnalyzer()
|
|
19
|
+
>>> metadata = analyzer.analyze(field, messages)
|
|
20
|
+
>>> if metadata["algorithm"] != "unknown":
|
|
21
|
+
... print(f"Algorithm: {metadata['algorithm']}")
|
|
22
|
+
... print(f"Match rate: {metadata['match_rate']:.1%}")
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def analyze(self, field: Field, messages: list[Message]) -> dict[str, Any]:
|
|
26
|
+
"""Analyze checksum field.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
field: Field to analyze.
|
|
30
|
+
messages: List of messages containing field.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Metadata dict with checksum information.
|
|
34
|
+
|
|
35
|
+
Example:
|
|
36
|
+
>>> result = analyzer.analyze(field, messages)
|
|
37
|
+
>>> result["algorithm"] # e.g., "crc32"
|
|
38
|
+
>>> result["match_rate"] # e.g., 0.997
|
|
39
|
+
"""
|
|
40
|
+
if len(messages) < 5:
|
|
41
|
+
return {
|
|
42
|
+
"algorithm": "unknown",
|
|
43
|
+
"reason": "insufficient_data",
|
|
44
|
+
"match_rate": 0.0,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
# Test various algorithms
|
|
48
|
+
algorithms = []
|
|
49
|
+
|
|
50
|
+
if field.length == 1:
|
|
51
|
+
algorithms = ["sum8", "xor8"]
|
|
52
|
+
elif field.length == 2:
|
|
53
|
+
algorithms = ["crc16", "fletcher16", "sum16"]
|
|
54
|
+
elif field.length == 4:
|
|
55
|
+
algorithms = ["crc32", "adler32", "sum32"]
|
|
56
|
+
else:
|
|
57
|
+
return {
|
|
58
|
+
"algorithm": "unknown",
|
|
59
|
+
"reason": "unsupported_length",
|
|
60
|
+
"match_rate": 0.0,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
best_algorithm = "unknown"
|
|
64
|
+
best_match_rate = 0.0
|
|
65
|
+
best_matches = 0
|
|
66
|
+
|
|
67
|
+
for algorithm in algorithms:
|
|
68
|
+
match_rate, matches = self._test_algorithm(algorithm, field, messages)
|
|
69
|
+
|
|
70
|
+
if match_rate > best_match_rate:
|
|
71
|
+
best_match_rate = match_rate
|
|
72
|
+
best_algorithm = algorithm
|
|
73
|
+
best_matches = matches
|
|
74
|
+
|
|
75
|
+
# Require >95% match rate for confirmation
|
|
76
|
+
if best_match_rate < 0.95:
|
|
77
|
+
return {
|
|
78
|
+
"algorithm": "unknown",
|
|
79
|
+
"reason": "low_match_rate",
|
|
80
|
+
"match_rate": best_match_rate,
|
|
81
|
+
"tested_algorithms": algorithms,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return {
|
|
85
|
+
"algorithm": best_algorithm,
|
|
86
|
+
"match_rate": best_match_rate,
|
|
87
|
+
"validated_count": best_matches,
|
|
88
|
+
"total_messages": len(messages),
|
|
89
|
+
"confidence": min(1.0, best_match_rate),
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
def _test_algorithm(
|
|
93
|
+
self, algorithm: str, field: Field, messages: list[Message]
|
|
94
|
+
) -> tuple[float, int]:
|
|
95
|
+
"""Test a checksum algorithm against messages.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
algorithm: Algorithm name to test.
|
|
99
|
+
field: Checksum field.
|
|
100
|
+
messages: List of messages.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
(match_rate, match_count) tuple.
|
|
104
|
+
"""
|
|
105
|
+
matches = 0
|
|
106
|
+
tested = 0
|
|
107
|
+
|
|
108
|
+
for msg in messages[:100]: # Test first 100 messages
|
|
109
|
+
if field.offset + field.length > len(msg.data):
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
# Extract expected checksum from field
|
|
113
|
+
expected_bytes = msg.data[field.offset : field.offset + field.length]
|
|
114
|
+
expected = int.from_bytes(expected_bytes, byteorder="little", signed=False)
|
|
115
|
+
|
|
116
|
+
# Calculate checksum over payload (excluding checksum field)
|
|
117
|
+
# Try checksumming data before and after the field
|
|
118
|
+
payload_before = msg.data[: field.offset]
|
|
119
|
+
payload_after = msg.data[field.offset + field.length :]
|
|
120
|
+
|
|
121
|
+
# Most common: checksum covers everything except itself
|
|
122
|
+
payload = payload_before + payload_after
|
|
123
|
+
|
|
124
|
+
# Calculate checksum
|
|
125
|
+
calculated = self._calculate_checksum(algorithm, payload)
|
|
126
|
+
|
|
127
|
+
if calculated == expected:
|
|
128
|
+
matches += 1
|
|
129
|
+
|
|
130
|
+
tested += 1
|
|
131
|
+
|
|
132
|
+
match_rate = matches / tested if tested > 0 else 0.0
|
|
133
|
+
return match_rate, matches
|
|
134
|
+
|
|
135
|
+
def _calculate_checksum(self, algorithm: str, data: bytes) -> int:
|
|
136
|
+
"""Calculate checksum using specified algorithm.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
algorithm: Algorithm name.
|
|
140
|
+
data: Data to checksum.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Checksum value.
|
|
144
|
+
"""
|
|
145
|
+
if algorithm == "crc32":
|
|
146
|
+
return zlib.crc32(data) & 0xFFFFFFFF
|
|
147
|
+
|
|
148
|
+
elif algorithm == "adler32":
|
|
149
|
+
return zlib.adler32(data) & 0xFFFFFFFF
|
|
150
|
+
|
|
151
|
+
elif algorithm == "crc16":
|
|
152
|
+
return self._crc16(data)
|
|
153
|
+
|
|
154
|
+
elif algorithm == "fletcher16":
|
|
155
|
+
return self._fletcher16(data)
|
|
156
|
+
|
|
157
|
+
elif algorithm == "sum8":
|
|
158
|
+
return sum(data) & 0xFF
|
|
159
|
+
|
|
160
|
+
elif algorithm == "xor8":
|
|
161
|
+
result = 0
|
|
162
|
+
for b in data:
|
|
163
|
+
result ^= b
|
|
164
|
+
return result & 0xFF
|
|
165
|
+
|
|
166
|
+
elif algorithm == "sum16":
|
|
167
|
+
return sum(data) & 0xFFFF
|
|
168
|
+
|
|
169
|
+
elif algorithm == "sum32":
|
|
170
|
+
return sum(data) & 0xFFFFFFFF
|
|
171
|
+
|
|
172
|
+
return 0
|
|
173
|
+
|
|
174
|
+
def _crc16(self, data: bytes) -> int:
|
|
175
|
+
"""Calculate CRC-16 (CCITT).
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
data: Input data.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
CRC-16 value.
|
|
182
|
+
"""
|
|
183
|
+
crc = 0xFFFF
|
|
184
|
+
|
|
185
|
+
for byte in data:
|
|
186
|
+
crc ^= byte << 8
|
|
187
|
+
|
|
188
|
+
for _ in range(8):
|
|
189
|
+
if crc & 0x8000:
|
|
190
|
+
crc = (crc << 1) ^ 0x1021
|
|
191
|
+
else:
|
|
192
|
+
crc = crc << 1
|
|
193
|
+
|
|
194
|
+
crc &= 0xFFFF
|
|
195
|
+
|
|
196
|
+
return crc
|
|
197
|
+
|
|
198
|
+
def _fletcher16(self, data: bytes) -> int:
|
|
199
|
+
"""Calculate Fletcher-16 checksum.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
data: Input data.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Fletcher-16 value.
|
|
206
|
+
"""
|
|
207
|
+
sum1 = 0
|
|
208
|
+
sum2 = 0
|
|
209
|
+
|
|
210
|
+
for byte in data:
|
|
211
|
+
sum1 = (sum1 + byte) % 255
|
|
212
|
+
sum2 = (sum2 + sum1) % 255
|
|
213
|
+
|
|
214
|
+
return (sum2 << 8) | sum1
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""Semantic field analysis coordinator."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
from oscura.analyzers.binary.core.results import FieldType
|
|
8
|
+
from oscura.analyzers.binary.inference.checksums import ChecksumAnalyzer
|
|
9
|
+
from oscura.analyzers.binary.inference.sequences import SequenceAnalyzer
|
|
10
|
+
from oscura.analyzers.binary.inference.timestamps import TimestampAnalyzer
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from oscura.analyzers.binary.core.file_access import BinaryFile
|
|
14
|
+
from oscura.analyzers.binary.core.results import Field, StructureResult
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SemanticAnalyzer:
|
|
18
|
+
"""Coordinate semantic analysis of binary fields.
|
|
19
|
+
|
|
20
|
+
Enhances field information with semantic metadata including:
|
|
21
|
+
- Sequence number detection and analysis
|
|
22
|
+
- Timestamp format identification
|
|
23
|
+
- Checksum algorithm detection and validation
|
|
24
|
+
- Payload characteristics
|
|
25
|
+
|
|
26
|
+
Example:
|
|
27
|
+
>>> analyzer = SemanticAnalyzer()
|
|
28
|
+
>>> enhanced_fields = analyzer.analyze_fields(binary_file, structure)
|
|
29
|
+
>>> for field in enhanced_fields:
|
|
30
|
+
... if field.field_type == FieldType.CHECKSUM:
|
|
31
|
+
... print(f"Checksum: {field.metadata['algorithm']}")
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self) -> None:
|
|
35
|
+
"""Initialize semantic analyzer."""
|
|
36
|
+
self.sequence_analyzer = SequenceAnalyzer()
|
|
37
|
+
self.timestamp_analyzer = TimestampAnalyzer()
|
|
38
|
+
self.checksum_analyzer = ChecksumAnalyzer()
|
|
39
|
+
|
|
40
|
+
def analyze_fields(self, file: BinaryFile, structure: StructureResult) -> list[Field]:
|
|
41
|
+
"""Enhance fields with semantic analysis.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
file: Binary file being analyzed.
|
|
45
|
+
structure: Structure inference result.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
List of enhanced fields with metadata.
|
|
49
|
+
|
|
50
|
+
Example:
|
|
51
|
+
>>> analyzer = SemanticAnalyzer()
|
|
52
|
+
>>> fields = analyzer.analyze_fields(binary_file, structure_result)
|
|
53
|
+
"""
|
|
54
|
+
if not structure.has_messages or not structure.fields:
|
|
55
|
+
return structure.fields
|
|
56
|
+
|
|
57
|
+
enhanced_fields = []
|
|
58
|
+
|
|
59
|
+
for field in structure.fields:
|
|
60
|
+
# Analyze based on field type
|
|
61
|
+
if field.field_type == FieldType.SEQUENCE:
|
|
62
|
+
metadata = self.sequence_analyzer.analyze(field, structure.messages)
|
|
63
|
+
field.metadata.update(metadata)
|
|
64
|
+
|
|
65
|
+
elif field.field_type == FieldType.TIMESTAMP:
|
|
66
|
+
metadata = self.timestamp_analyzer.analyze(field, structure.messages)
|
|
67
|
+
field.metadata.update(metadata)
|
|
68
|
+
|
|
69
|
+
elif field.field_type == FieldType.CHECKSUM:
|
|
70
|
+
metadata = self.checksum_analyzer.analyze(field, structure.messages)
|
|
71
|
+
field.metadata.update(metadata)
|
|
72
|
+
|
|
73
|
+
elif field.field_type == FieldType.PAYLOAD:
|
|
74
|
+
metadata = self._analyze_payload(field, structure.messages)
|
|
75
|
+
field.metadata.update(metadata)
|
|
76
|
+
|
|
77
|
+
enhanced_fields.append(field)
|
|
78
|
+
|
|
79
|
+
return enhanced_fields
|
|
80
|
+
|
|
81
|
+
def _analyze_payload(self, field: Field, messages: list[Any]) -> dict[str, Any]:
|
|
82
|
+
"""Analyze payload field characteristics.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
field: Payload field.
|
|
86
|
+
messages: List of messages.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Metadata dict with payload characteristics.
|
|
90
|
+
"""
|
|
91
|
+
if len(messages) == 0:
|
|
92
|
+
return {"payload_type": "unknown"}
|
|
93
|
+
|
|
94
|
+
# Sample first few payloads
|
|
95
|
+
sample_size = min(10, len(messages))
|
|
96
|
+
entropies = []
|
|
97
|
+
|
|
98
|
+
for msg in messages[:sample_size]:
|
|
99
|
+
if field.offset + field.length <= len(msg.data):
|
|
100
|
+
payload = msg.data[field.offset : field.offset + field.length]
|
|
101
|
+
entropy = self._calculate_entropy(payload)
|
|
102
|
+
entropies.append(entropy)
|
|
103
|
+
|
|
104
|
+
if not entropies:
|
|
105
|
+
return {"payload_type": "unknown"}
|
|
106
|
+
|
|
107
|
+
avg_entropy = sum(entropies) / len(entropies)
|
|
108
|
+
|
|
109
|
+
# Classify payload type by entropy
|
|
110
|
+
if avg_entropy > 7.5:
|
|
111
|
+
payload_type = "compressed_or_encrypted"
|
|
112
|
+
elif avg_entropy > 5.0:
|
|
113
|
+
payload_type = "binary_data"
|
|
114
|
+
elif avg_entropy > 3.0:
|
|
115
|
+
payload_type = "mixed_content"
|
|
116
|
+
else:
|
|
117
|
+
payload_type = "low_entropy"
|
|
118
|
+
|
|
119
|
+
return {
|
|
120
|
+
"payload_type": payload_type,
|
|
121
|
+
"average_entropy": avg_entropy,
|
|
122
|
+
"entropy_bits_per_byte": avg_entropy,
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
def _calculate_entropy(self, data: bytes) -> float:
|
|
126
|
+
"""Calculate Shannon entropy of data.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
data: Input bytes.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Entropy in bits per byte.
|
|
133
|
+
"""
|
|
134
|
+
if len(data) == 0:
|
|
135
|
+
return 0.0
|
|
136
|
+
|
|
137
|
+
from collections import Counter
|
|
138
|
+
|
|
139
|
+
counts = Counter(data)
|
|
140
|
+
total = len(data)
|
|
141
|
+
|
|
142
|
+
entropy = 0.0
|
|
143
|
+
for count in counts.values():
|
|
144
|
+
p = count / total
|
|
145
|
+
if p > 0:
|
|
146
|
+
import math
|
|
147
|
+
|
|
148
|
+
entropy -= p * math.log2(p)
|
|
149
|
+
|
|
150
|
+
return entropy
|