oscura 0.11.0__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oscura/__init__.py +1 -1
- oscura/analyzers/binary/__init__.py +36 -0
- oscura/analyzers/binary/core/__init__.py +29 -0
- oscura/analyzers/binary/core/file_access.py +193 -0
- oscura/analyzers/binary/core/pipeline.py +161 -0
- oscura/analyzers/binary/core/results.py +217 -0
- oscura/analyzers/binary/detection/__init__.py +10 -0
- oscura/analyzers/binary/detection/encoding.py +624 -0
- oscura/analyzers/binary/detection/patterns.py +320 -0
- oscura/analyzers/binary/detection/structure.py +630 -0
- oscura/analyzers/binary/export/__init__.py +9 -0
- oscura/analyzers/binary/export/dissector.py +174 -0
- oscura/analyzers/binary/inference/__init__.py +15 -0
- oscura/analyzers/binary/inference/checksums.py +214 -0
- oscura/analyzers/binary/inference/fields.py +150 -0
- oscura/analyzers/binary/inference/sequences.py +232 -0
- oscura/analyzers/binary/inference/timestamps.py +210 -0
- oscura/analyzers/binary/visualization/__init__.py +9 -0
- oscura/analyzers/binary/visualization/structure_view.py +182 -0
- oscura/automotive/__init__.py +1 -1
- oscura/automotive/dtc/data.json +102 -17
- oscura/core/schemas/device_mapping.json +8 -2
- oscura/core/schemas/packet_format.json +24 -4
- oscura/core/schemas/protocol_definition.json +12 -2
- oscura/loaders/__init__.py +4 -1
- oscura/loaders/binary.py +284 -1
- oscura/sessions/legacy.py +80 -19
- {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/METADATA +3 -3
- {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/RECORD +32 -14
- {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/WHEEL +0 -0
- {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/entry_points.txt +0 -0
- {oscura-0.11.0.dist-info → oscura-0.12.0.dist-info}/licenses/LICENSE +0 -0
oscura/automotive/dtc/data.json
CHANGED
|
@@ -266,7 +266,12 @@
|
|
|
266
266
|
"category": "Powertrain",
|
|
267
267
|
"severity": "High",
|
|
268
268
|
"system": "Throttle Control",
|
|
269
|
-
"possible_causes": [
|
|
269
|
+
"possible_causes": [
|
|
270
|
+
"TPS circuit shorted to voltage",
|
|
271
|
+
"Faulty TPS sensor",
|
|
272
|
+
"Wiring harness open",
|
|
273
|
+
"ECM problem"
|
|
274
|
+
]
|
|
270
275
|
},
|
|
271
276
|
"P0125": {
|
|
272
277
|
"code": "P0125",
|
|
@@ -860,7 +865,12 @@
|
|
|
860
865
|
"category": "Powertrain",
|
|
861
866
|
"severity": "Medium",
|
|
862
867
|
"system": "Emissions Control",
|
|
863
|
-
"possible_causes": [
|
|
868
|
+
"possible_causes": [
|
|
869
|
+
"EGR valve stuck closed",
|
|
870
|
+
"EGR passages clogged",
|
|
871
|
+
"Faulty EGR valve",
|
|
872
|
+
"Vacuum leak"
|
|
873
|
+
]
|
|
864
874
|
},
|
|
865
875
|
"P0401": {
|
|
866
876
|
"code": "P0401",
|
|
@@ -881,7 +891,12 @@
|
|
|
881
891
|
"category": "Powertrain",
|
|
882
892
|
"severity": "Medium",
|
|
883
893
|
"system": "Emissions Control",
|
|
884
|
-
"possible_causes": [
|
|
894
|
+
"possible_causes": [
|
|
895
|
+
"EGR valve stuck open",
|
|
896
|
+
"Faulty EGR valve",
|
|
897
|
+
"EGR vacuum solenoid fault",
|
|
898
|
+
"ECM problem"
|
|
899
|
+
]
|
|
885
900
|
},
|
|
886
901
|
"P0403": {
|
|
887
902
|
"code": "P0403",
|
|
@@ -943,7 +958,12 @@
|
|
|
943
958
|
"category": "Powertrain",
|
|
944
959
|
"severity": "Low",
|
|
945
960
|
"system": "Emissions Control",
|
|
946
|
-
"possible_causes": [
|
|
961
|
+
"possible_causes": [
|
|
962
|
+
"Loose or missing fuel cap",
|
|
963
|
+
"EVAP system leak",
|
|
964
|
+
"Faulty purge valve",
|
|
965
|
+
"Faulty vent valve"
|
|
966
|
+
]
|
|
947
967
|
},
|
|
948
968
|
"P0441": {
|
|
949
969
|
"code": "P0441",
|
|
@@ -1055,7 +1075,12 @@
|
|
|
1055
1075
|
"category": "Powertrain",
|
|
1056
1076
|
"severity": "Low",
|
|
1057
1077
|
"system": "Idle Control",
|
|
1058
|
-
"possible_causes": [
|
|
1078
|
+
"possible_causes": [
|
|
1079
|
+
"Vacuum leak",
|
|
1080
|
+
"IAC valve fault",
|
|
1081
|
+
"Dirty throttle body",
|
|
1082
|
+
"PCV valve problem"
|
|
1083
|
+
]
|
|
1059
1084
|
},
|
|
1060
1085
|
"P0507": {
|
|
1061
1086
|
"code": "P0507",
|
|
@@ -1063,7 +1088,12 @@
|
|
|
1063
1088
|
"category": "Powertrain",
|
|
1064
1089
|
"severity": "Low",
|
|
1065
1090
|
"system": "Idle Control",
|
|
1066
|
-
"possible_causes": [
|
|
1091
|
+
"possible_causes": [
|
|
1092
|
+
"Vacuum leak",
|
|
1093
|
+
"IAC valve stuck open",
|
|
1094
|
+
"PCV valve stuck open",
|
|
1095
|
+
"EVAP purge valve leaking"
|
|
1096
|
+
]
|
|
1067
1097
|
},
|
|
1068
1098
|
"P0600": {
|
|
1069
1099
|
"code": "P0600",
|
|
@@ -1097,7 +1127,12 @@
|
|
|
1097
1127
|
"category": "Powertrain",
|
|
1098
1128
|
"severity": "Critical",
|
|
1099
1129
|
"system": "Engine Control Module",
|
|
1100
|
-
"possible_causes": [
|
|
1130
|
+
"possible_causes": [
|
|
1131
|
+
"ECM not programmed",
|
|
1132
|
+
"ECM programming incomplete",
|
|
1133
|
+
"Wrong software version",
|
|
1134
|
+
"ECM fault"
|
|
1135
|
+
]
|
|
1101
1136
|
},
|
|
1102
1137
|
"P0603": {
|
|
1103
1138
|
"code": "P0603",
|
|
@@ -1170,7 +1205,12 @@
|
|
|
1170
1205
|
"category": "Powertrain",
|
|
1171
1206
|
"severity": "Medium",
|
|
1172
1207
|
"system": "Charging System",
|
|
1173
|
-
"possible_causes": [
|
|
1208
|
+
"possible_causes": [
|
|
1209
|
+
"Faulty alternator",
|
|
1210
|
+
"Wiring harness problem",
|
|
1211
|
+
"Poor electrical connection",
|
|
1212
|
+
"ECM fault"
|
|
1213
|
+
]
|
|
1174
1214
|
},
|
|
1175
1215
|
"P0625": {
|
|
1176
1216
|
"code": "P0625",
|
|
@@ -1243,7 +1283,12 @@
|
|
|
1243
1283
|
"category": "Powertrain",
|
|
1244
1284
|
"severity": "High",
|
|
1245
1285
|
"system": "Transmission",
|
|
1246
|
-
"possible_causes": [
|
|
1286
|
+
"possible_causes": [
|
|
1287
|
+
"Faulty input speed sensor",
|
|
1288
|
+
"Wiring harness problem",
|
|
1289
|
+
"Sensor reluctor damaged",
|
|
1290
|
+
"TCM fault"
|
|
1291
|
+
]
|
|
1247
1292
|
},
|
|
1248
1293
|
"P0720": {
|
|
1249
1294
|
"code": "P0720",
|
|
@@ -1446,7 +1491,12 @@
|
|
|
1446
1491
|
"category": "Chassis",
|
|
1447
1492
|
"severity": "High",
|
|
1448
1493
|
"system": "ABS",
|
|
1449
|
-
"possible_causes": [
|
|
1494
|
+
"possible_causes": [
|
|
1495
|
+
"Faulty valve relay",
|
|
1496
|
+
"Relay circuit problem",
|
|
1497
|
+
"ABS module fault",
|
|
1498
|
+
"Wiring harness issue"
|
|
1499
|
+
]
|
|
1450
1500
|
},
|
|
1451
1501
|
"C0161": {
|
|
1452
1502
|
"code": "C0161",
|
|
@@ -2156,7 +2206,12 @@
|
|
|
2156
2206
|
"category": "Body",
|
|
2157
2207
|
"severity": "Low",
|
|
2158
2208
|
"system": "Lighting System",
|
|
2159
|
-
"possible_causes": [
|
|
2209
|
+
"possible_causes": [
|
|
2210
|
+
"Burned out bulb",
|
|
2211
|
+
"Wiring harness problem",
|
|
2212
|
+
"Lamp socket corrosion",
|
|
2213
|
+
"BCM fault"
|
|
2214
|
+
]
|
|
2160
2215
|
},
|
|
2161
2216
|
"B0601": {
|
|
2162
2217
|
"code": "B0601",
|
|
@@ -2177,7 +2232,12 @@
|
|
|
2177
2232
|
"category": "Body",
|
|
2178
2233
|
"severity": "Low",
|
|
2179
2234
|
"system": "Lighting System",
|
|
2180
|
-
"possible_causes": [
|
|
2235
|
+
"possible_causes": [
|
|
2236
|
+
"Burned out turn signal bulb",
|
|
2237
|
+
"Wiring harness problem",
|
|
2238
|
+
"Flasher relay fault",
|
|
2239
|
+
"BCM fault"
|
|
2240
|
+
]
|
|
2181
2241
|
},
|
|
2182
2242
|
"B0603": {
|
|
2183
2243
|
"code": "B0603",
|
|
@@ -2185,7 +2245,12 @@
|
|
|
2185
2245
|
"category": "Body",
|
|
2186
2246
|
"severity": "Low",
|
|
2187
2247
|
"system": "Lighting System",
|
|
2188
|
-
"possible_causes": [
|
|
2248
|
+
"possible_causes": [
|
|
2249
|
+
"Burned out turn signal bulb",
|
|
2250
|
+
"Wiring harness problem",
|
|
2251
|
+
"Flasher relay fault",
|
|
2252
|
+
"BCM fault"
|
|
2253
|
+
]
|
|
2189
2254
|
},
|
|
2190
2255
|
"B0604": {
|
|
2191
2256
|
"code": "B0604",
|
|
@@ -2297,7 +2362,12 @@
|
|
|
2297
2362
|
"category": "Body",
|
|
2298
2363
|
"severity": "Low",
|
|
2299
2364
|
"system": "Keyless Entry",
|
|
2300
|
-
"possible_causes": [
|
|
2365
|
+
"possible_causes": [
|
|
2366
|
+
"Key fob battery weak",
|
|
2367
|
+
"Key fob not synchronized",
|
|
2368
|
+
"BCM fault",
|
|
2369
|
+
"Receiver antenna fault"
|
|
2370
|
+
]
|
|
2301
2371
|
},
|
|
2302
2372
|
"B1300": {
|
|
2303
2373
|
"code": "B1300",
|
|
@@ -2396,7 +2466,12 @@
|
|
|
2396
2466
|
"category": "Network",
|
|
2397
2467
|
"severity": "Critical",
|
|
2398
2468
|
"system": "CAN Bus",
|
|
2399
|
-
"possible_causes": [
|
|
2469
|
+
"possible_causes": [
|
|
2470
|
+
"TCM not powered",
|
|
2471
|
+
"CAN bus wiring problem",
|
|
2472
|
+
"TCM internal fault",
|
|
2473
|
+
"CAN bus short circuit"
|
|
2474
|
+
]
|
|
2400
2475
|
},
|
|
2401
2476
|
"U0102": {
|
|
2402
2477
|
"code": "U0102",
|
|
@@ -2469,7 +2544,12 @@
|
|
|
2469
2544
|
"category": "Network",
|
|
2470
2545
|
"severity": "High",
|
|
2471
2546
|
"system": "CAN Bus",
|
|
2472
|
-
"possible_causes": [
|
|
2547
|
+
"possible_causes": [
|
|
2548
|
+
"BCM not powered",
|
|
2549
|
+
"CAN bus wiring problem",
|
|
2550
|
+
"BCM internal fault",
|
|
2551
|
+
"Ground connection issue"
|
|
2552
|
+
]
|
|
2473
2553
|
},
|
|
2474
2554
|
"U0141": {
|
|
2475
2555
|
"code": "U0141",
|
|
@@ -2477,7 +2557,12 @@
|
|
|
2477
2557
|
"category": "Network",
|
|
2478
2558
|
"severity": "High",
|
|
2479
2559
|
"system": "CAN Bus",
|
|
2480
|
-
"possible_causes": [
|
|
2560
|
+
"possible_causes": [
|
|
2561
|
+
"BCM not powered",
|
|
2562
|
+
"CAN bus wiring problem",
|
|
2563
|
+
"Module internal fault",
|
|
2564
|
+
"Connector problem"
|
|
2565
|
+
]
|
|
2481
2566
|
},
|
|
2482
2567
|
"U0151": {
|
|
2483
2568
|
"code": "U0151",
|
|
@@ -149,14 +149,20 @@
|
|
|
149
149
|
"type": "array",
|
|
150
150
|
"description": "Device IDs to include (whitelist)",
|
|
151
151
|
"items": {
|
|
152
|
-
"oneOf": [
|
|
152
|
+
"oneOf": [
|
|
153
|
+
{ "type": "integer" },
|
|
154
|
+
{ "type": "string", "pattern": "^0[xX][0-9A-Fa-f]+$" }
|
|
155
|
+
]
|
|
153
156
|
}
|
|
154
157
|
},
|
|
155
158
|
"exclude_devices": {
|
|
156
159
|
"type": "array",
|
|
157
160
|
"description": "Device IDs to exclude (blacklist)",
|
|
158
161
|
"items": {
|
|
159
|
-
"oneOf": [
|
|
162
|
+
"oneOf": [
|
|
163
|
+
{ "type": "integer" },
|
|
164
|
+
{ "type": "string", "pattern": "^0[xX][0-9A-Fa-f]+$" }
|
|
165
|
+
]
|
|
160
166
|
}
|
|
161
167
|
},
|
|
162
168
|
"include_categories": {
|
|
@@ -118,7 +118,10 @@
|
|
|
118
118
|
},
|
|
119
119
|
"value": {
|
|
120
120
|
"description": "Expected constant value for validation",
|
|
121
|
-
"oneOf": [
|
|
121
|
+
"oneOf": [
|
|
122
|
+
{ "type": "integer" },
|
|
123
|
+
{ "type": "array", "items": { "type": "integer" } }
|
|
124
|
+
]
|
|
122
125
|
},
|
|
123
126
|
"description": {
|
|
124
127
|
"type": "string",
|
|
@@ -185,7 +188,18 @@
|
|
|
185
188
|
},
|
|
186
189
|
"type": {
|
|
187
190
|
"type": "string",
|
|
188
|
-
"enum": [
|
|
191
|
+
"enum": [
|
|
192
|
+
"uint8",
|
|
193
|
+
"uint16",
|
|
194
|
+
"uint32",
|
|
195
|
+
"uint64",
|
|
196
|
+
"int8",
|
|
197
|
+
"int16",
|
|
198
|
+
"int32",
|
|
199
|
+
"int64",
|
|
200
|
+
"float32",
|
|
201
|
+
"float64"
|
|
202
|
+
],
|
|
189
203
|
"description": "Sample data type"
|
|
190
204
|
},
|
|
191
205
|
"endian": {
|
|
@@ -289,7 +303,10 @@
|
|
|
289
303
|
},
|
|
290
304
|
"expected": {
|
|
291
305
|
"description": "Expected value",
|
|
292
|
-
"oneOf": [
|
|
306
|
+
"oneOf": [
|
|
307
|
+
{ "type": "integer" },
|
|
308
|
+
{ "type": "array", "items": { "type": "integer" } }
|
|
309
|
+
]
|
|
293
310
|
},
|
|
294
311
|
"on_failure": {
|
|
295
312
|
"type": "string",
|
|
@@ -362,7 +379,10 @@
|
|
|
362
379
|
},
|
|
363
380
|
"pattern": {
|
|
364
381
|
"description": "Idle pattern to detect",
|
|
365
|
-
"oneOf": [
|
|
382
|
+
"oneOf": [
|
|
383
|
+
{ "type": "string", "enum": ["auto", "zeros", "ones"] },
|
|
384
|
+
{ "type": "integer" }
|
|
385
|
+
]
|
|
366
386
|
},
|
|
367
387
|
"min_duration": {
|
|
368
388
|
"type": "integer",
|
|
@@ -241,7 +241,12 @@
|
|
|
241
241
|
},
|
|
242
242
|
"value": {
|
|
243
243
|
"description": "Expected constant value for validation",
|
|
244
|
-
"oneOf": [
|
|
244
|
+
"oneOf": [
|
|
245
|
+
{ "type": "integer" },
|
|
246
|
+
{ "type": "number" },
|
|
247
|
+
{ "type": "string" },
|
|
248
|
+
{ "type": "array" }
|
|
249
|
+
]
|
|
245
250
|
},
|
|
246
251
|
"condition": {
|
|
247
252
|
"type": "string",
|
|
@@ -326,7 +331,12 @@
|
|
|
326
331
|
},
|
|
327
332
|
"expected": {
|
|
328
333
|
"description": "Expected value",
|
|
329
|
-
"oneOf": [
|
|
334
|
+
"oneOf": [
|
|
335
|
+
{ "type": "integer" },
|
|
336
|
+
{ "type": "number" },
|
|
337
|
+
{ "type": "string" },
|
|
338
|
+
{ "type": "array" }
|
|
339
|
+
]
|
|
330
340
|
},
|
|
331
341
|
"on_mismatch": {
|
|
332
342
|
"type": "string",
|
oscura/loaders/__init__.py
CHANGED
|
@@ -41,6 +41,7 @@ _LOADER_REGISTRY: dict[str, tuple[str, str]] = {
|
|
|
41
41
|
"tdms": ("oscura.loaders.tdms", "load_tdms"),
|
|
42
42
|
"touchstone": ("oscura.loaders.touchstone", "load_touchstone"),
|
|
43
43
|
"chipwhisperer": ("oscura.loaders.chipwhisperer", "load_chipwhisperer"),
|
|
44
|
+
"binary": ("oscura.loaders.binary", "load_binary_auto"),
|
|
44
45
|
}
|
|
45
46
|
|
|
46
47
|
|
|
@@ -96,7 +97,7 @@ from oscura.loaders import (
|
|
|
96
97
|
csv,
|
|
97
98
|
hdf5,
|
|
98
99
|
)
|
|
99
|
-
from oscura.loaders.binary import load_binary
|
|
100
|
+
from oscura.loaders.binary import load_binary, load_binary_auto
|
|
100
101
|
|
|
101
102
|
# Import configurable binary loading functionality
|
|
102
103
|
from oscura.loaders.configurable import (
|
|
@@ -182,6 +183,7 @@ logger = logging.getLogger(__name__)
|
|
|
182
183
|
SUPPORTED_FORMATS: dict[str, str] = {
|
|
183
184
|
".wfm": "auto_wfm", # Auto-detect Tektronix vs Rigol
|
|
184
185
|
".tss": "tss", # Tektronix session files
|
|
186
|
+
".bin": "binary", # Raw binary with auto-detection
|
|
185
187
|
".npz": "numpy",
|
|
186
188
|
".csv": "csv",
|
|
187
189
|
".h5": "hdf5",
|
|
@@ -655,6 +657,7 @@ __all__ = [
|
|
|
655
657
|
"load_all_channels",
|
|
656
658
|
"load_auto",
|
|
657
659
|
"load_binary",
|
|
660
|
+
"load_binary_auto",
|
|
658
661
|
"load_binary_packets",
|
|
659
662
|
"load_lazy",
|
|
660
663
|
"load_packets_streaming",
|
oscura/loaders/binary.py
CHANGED
|
@@ -153,4 +153,287 @@ def _load_binary_mmap(
|
|
|
153
153
|
mm.close()
|
|
154
154
|
|
|
155
155
|
|
|
156
|
-
|
|
156
|
+
def detect_binary_dtype(
|
|
157
|
+
path: str | PathLike[str], sample_size: int = 8192
|
|
158
|
+
) -> tuple[str, dict[str, Any]]:
|
|
159
|
+
"""Auto-detect most likely dtype for binary file using intelligent multi-heuristic analysis.
|
|
160
|
+
|
|
161
|
+
Performs comprehensive analysis including:
|
|
162
|
+
- Multi-location sampling (beginning, middle, end)
|
|
163
|
+
- Byte entropy and distribution analysis
|
|
164
|
+
- IEEE 754 floating point pattern detection
|
|
165
|
+
- Value range validation for each dtype
|
|
166
|
+
- Alignment and padding pattern detection
|
|
167
|
+
- Statistical confidence scoring
|
|
168
|
+
|
|
169
|
+
Designed to handle completely unknown binary formats with no prior knowledge.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
path: Path to binary file.
|
|
173
|
+
sample_size: Bytes to sample per location (default: 8KB).
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Tuple of (detected_dtype, confidence_scores).
|
|
177
|
+
confidence_scores maps each dtype to its normalized confidence (0-1).
|
|
178
|
+
|
|
179
|
+
Example:
|
|
180
|
+
>>> dtype, confidence = detect_binary_dtype("unknown.bin")
|
|
181
|
+
>>> print(f"Detected: {dtype} (confidence: {confidence[dtype]:.1%})")
|
|
182
|
+
Detected: uint16 (confidence: 85.3%)
|
|
183
|
+
"""
|
|
184
|
+
path = Path(path)
|
|
185
|
+
file_size = path.stat().st_size
|
|
186
|
+
|
|
187
|
+
# Sample from multiple locations for robust detection
|
|
188
|
+
samples_to_check = []
|
|
189
|
+
with open(path, "rb") as f:
|
|
190
|
+
# Beginning
|
|
191
|
+
samples_to_check.append(f.read(min(sample_size, file_size)))
|
|
192
|
+
|
|
193
|
+
# Middle (if large enough)
|
|
194
|
+
if file_size > sample_size * 2:
|
|
195
|
+
f.seek(file_size // 2)
|
|
196
|
+
samples_to_check.append(f.read(min(sample_size, file_size - f.tell())))
|
|
197
|
+
|
|
198
|
+
# End (if large enough)
|
|
199
|
+
if file_size > sample_size * 3:
|
|
200
|
+
f.seek(max(0, file_size - sample_size))
|
|
201
|
+
samples_to_check.append(f.read())
|
|
202
|
+
|
|
203
|
+
sample = b"".join(samples_to_check)
|
|
204
|
+
|
|
205
|
+
if len(sample) < 16:
|
|
206
|
+
return "uint8", {"uint8": 1.0}
|
|
207
|
+
|
|
208
|
+
from collections import Counter
|
|
209
|
+
|
|
210
|
+
# Byte entropy calculation
|
|
211
|
+
byte_counts = Counter(sample)
|
|
212
|
+
total = len(sample)
|
|
213
|
+
entropy = -sum((count / total) * np.log2(count / total) for count in byte_counts.values())
|
|
214
|
+
zero_density = sample.count(b"\x00") / len(sample)
|
|
215
|
+
|
|
216
|
+
# Score each dtype possibility
|
|
217
|
+
scores: dict[str, float] = {
|
|
218
|
+
"uint8": 0.0,
|
|
219
|
+
"int8": 0.0,
|
|
220
|
+
"uint16": 0.0,
|
|
221
|
+
"int16": 0.0,
|
|
222
|
+
"uint32": 0.0,
|
|
223
|
+
"int32": 0.0,
|
|
224
|
+
"float32": 0.0,
|
|
225
|
+
"float64": 0.0,
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
# Test 1: IEEE 754 floating point validation
|
|
229
|
+
float32_valid = 0
|
|
230
|
+
for i in range(0, min(len(sample) - 3, 4096), 4):
|
|
231
|
+
try:
|
|
232
|
+
val = np.frombuffer(sample[i : i + 4], dtype=np.float32)[0]
|
|
233
|
+
if np.isfinite(val) and -1e10 < val < 1e10:
|
|
234
|
+
float32_valid += 1
|
|
235
|
+
except Exception:
|
|
236
|
+
pass
|
|
237
|
+
|
|
238
|
+
float64_valid = 0
|
|
239
|
+
for i in range(0, min(len(sample) - 7, 4096), 8):
|
|
240
|
+
try:
|
|
241
|
+
val = np.frombuffer(sample[i : i + 8], dtype=np.float64)[0]
|
|
242
|
+
if np.isfinite(val) and -1e10 < val < 1e10:
|
|
243
|
+
float64_valid += 1
|
|
244
|
+
except Exception:
|
|
245
|
+
pass
|
|
246
|
+
|
|
247
|
+
scores["float32"] = (float32_valid / (min(len(sample), 4096) / 4)) * 3.0
|
|
248
|
+
scores["float64"] = (float64_valid / (min(len(sample), 4096) / 8)) * 3.0
|
|
249
|
+
|
|
250
|
+
# Test 2: Entropy-based scoring
|
|
251
|
+
if entropy > 7.0:
|
|
252
|
+
scores["float32"] += 2.0
|
|
253
|
+
scores["float64"] += 2.0
|
|
254
|
+
elif entropy > 6.0:
|
|
255
|
+
scores["int32"] += 1.5
|
|
256
|
+
scores["uint32"] += 1.5
|
|
257
|
+
elif entropy > 4.5:
|
|
258
|
+
scores["int16"] += 2.0
|
|
259
|
+
scores["uint16"] += 2.0
|
|
260
|
+
else:
|
|
261
|
+
scores["int8"] += 2.0
|
|
262
|
+
scores["uint8"] += 2.0
|
|
263
|
+
|
|
264
|
+
# Test 3: Zero density (structured data indicator)
|
|
265
|
+
if zero_density > 0.6:
|
|
266
|
+
scores["int16"] += 1.5
|
|
267
|
+
scores["uint16"] += 1.5
|
|
268
|
+
elif zero_density > 0.4:
|
|
269
|
+
scores["int16"] += 1.0
|
|
270
|
+
scores["uint16"] += 1.0
|
|
271
|
+
|
|
272
|
+
# Test 4: Value range reasonableness
|
|
273
|
+
uint8_reasonable = sum(1 for b in sample[: min(1000, len(sample))] if b < 128) / min(
|
|
274
|
+
1000, len(sample)
|
|
275
|
+
)
|
|
276
|
+
if uint8_reasonable > 0.8:
|
|
277
|
+
scores["uint8"] += 1.5
|
|
278
|
+
|
|
279
|
+
# Find best dtype
|
|
280
|
+
best_dtype = max(scores.items(), key=lambda x: x[1])[0]
|
|
281
|
+
|
|
282
|
+
# Normalize confidence scores
|
|
283
|
+
max_score = max(scores.values()) if scores.values() else 1.0
|
|
284
|
+
confidence = {k: v / max_score for k, v in scores.items()} if max_score > 0 else scores
|
|
285
|
+
|
|
286
|
+
return best_dtype, confidence
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def detect_packet_structure(path: str | PathLike[str], sample_size: int = 8192) -> tuple[bool, int]:
|
|
290
|
+
"""Detect if binary file contains structured packet data.
|
|
291
|
+
|
|
292
|
+
Looks for repeating header patterns and regular spacing indicating
|
|
293
|
+
packet boundaries.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
path: Path to binary file.
|
|
297
|
+
sample_size: Number of bytes to sample for detection.
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
Tuple of (is_packet_data, packet_size_estimate).
|
|
301
|
+
packet_size_estimate is 0 if not packet data.
|
|
302
|
+
|
|
303
|
+
Example:
|
|
304
|
+
>>> is_packets, size = detect_packet_structure("capture.bin")
|
|
305
|
+
>>> if is_packets:
|
|
306
|
+
... print(f"Detected packet structure with ~{size} byte packets")
|
|
307
|
+
"""
|
|
308
|
+
path = Path(path)
|
|
309
|
+
|
|
310
|
+
with open(path, "rb") as f:
|
|
311
|
+
sample = f.read(sample_size)
|
|
312
|
+
|
|
313
|
+
if len(sample) < 512:
|
|
314
|
+
return False, 0
|
|
315
|
+
|
|
316
|
+
# Look for sequence numbers (common in packet headers)
|
|
317
|
+
# Check for patterns like: 00 00, 01 00, 02 00, 03 00 (little-endian sequence)
|
|
318
|
+
sequence_positions = []
|
|
319
|
+
for seq_byte in range(10): # Check first 10 sequence numbers
|
|
320
|
+
pattern = seq_byte.to_bytes(1, "little") + b"\x00"
|
|
321
|
+
pos = sample.find(pattern)
|
|
322
|
+
if pos != -1:
|
|
323
|
+
sequence_positions.append(pos)
|
|
324
|
+
|
|
325
|
+
# If we found multiple sequence numbers at regular intervals = likely packets
|
|
326
|
+
if len(sequence_positions) >= 3:
|
|
327
|
+
# Calculate intervals between sequence numbers
|
|
328
|
+
intervals = [
|
|
329
|
+
sequence_positions[i + 1] - sequence_positions[i]
|
|
330
|
+
for i in range(len(sequence_positions) - 1)
|
|
331
|
+
]
|
|
332
|
+
|
|
333
|
+
# Check if intervals are consistent (within 10% variation)
|
|
334
|
+
if intervals:
|
|
335
|
+
avg_interval = sum(intervals) / len(intervals)
|
|
336
|
+
variation = max(abs(i - avg_interval) for i in intervals) / avg_interval
|
|
337
|
+
|
|
338
|
+
if variation < 0.1 and 100 < avg_interval < 10000:
|
|
339
|
+
# Consistent spacing in reasonable range = packet structure
|
|
340
|
+
return True, int(avg_interval)
|
|
341
|
+
|
|
342
|
+
# Look for repeating byte patterns (common header markers)
|
|
343
|
+
# Check 4-byte patterns that repeat regularly
|
|
344
|
+
pattern_positions: dict[bytes, list[int]] = {}
|
|
345
|
+
for i in range(0, min(1024, len(sample) - 4), 4):
|
|
346
|
+
pattern = sample[i : i + 4]
|
|
347
|
+
if pattern not in pattern_positions:
|
|
348
|
+
pattern_positions[pattern] = []
|
|
349
|
+
pattern_positions[pattern].append(i)
|
|
350
|
+
|
|
351
|
+
# Find patterns that repeat with consistent spacing
|
|
352
|
+
for pattern, positions in pattern_positions.items():
|
|
353
|
+
if len(positions) >= 3 and pattern != b"\x00\x00\x00\x00":
|
|
354
|
+
intervals = [positions[i + 1] - positions[i] for i in range(len(positions) - 1)]
|
|
355
|
+
if intervals:
|
|
356
|
+
avg_interval = sum(intervals) / len(intervals)
|
|
357
|
+
variation = (
|
|
358
|
+
max(abs(i - avg_interval) for i in intervals) / avg_interval
|
|
359
|
+
if intervals
|
|
360
|
+
else 1.0
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
if variation < 0.1 and 100 < avg_interval < 10000:
|
|
364
|
+
return True, int(avg_interval)
|
|
365
|
+
|
|
366
|
+
return False, 0
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def load_binary_auto(
|
|
370
|
+
path: str | PathLike[str],
|
|
371
|
+
*,
|
|
372
|
+
sample_rate: float | None = None,
|
|
373
|
+
max_samples: int = 100_000,
|
|
374
|
+
channels: int = 1,
|
|
375
|
+
channel: int = 0,
|
|
376
|
+
) -> WaveformTrace:
|
|
377
|
+
"""Load binary file with automatic dtype detection and intelligent defaults.
|
|
378
|
+
|
|
379
|
+
This is a smart wrapper around load_binary() that:
|
|
380
|
+
- Auto-detects dtype
|
|
381
|
+
- Limits samples to prevent memory issues
|
|
382
|
+
- Uses memory-mapped I/O for large files
|
|
383
|
+
- Detects packet structures
|
|
384
|
+
|
|
385
|
+
Designed for use with unknown binary formats where manual
|
|
386
|
+
configuration is not available.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
path: Path to binary file.
|
|
390
|
+
sample_rate: Sample rate in Hz. If None, estimates from file.
|
|
391
|
+
max_samples: Maximum number of samples to load (default: 100K).
|
|
392
|
+
channels: Number of interleaved channels.
|
|
393
|
+
channel: Channel index to load.
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
WaveformTrace with loaded data and metadata.
|
|
397
|
+
|
|
398
|
+
Example:
|
|
399
|
+
>>> trace = load_binary_auto("unknown_capture.bin")
|
|
400
|
+
>>> print(f"Loaded {len(trace.data)} samples, dtype: {trace.metadata.source_file}")
|
|
401
|
+
"""
|
|
402
|
+
path = Path(path)
|
|
403
|
+
file_size = path.stat().st_size
|
|
404
|
+
|
|
405
|
+
# Detect dtype with confidence scoring
|
|
406
|
+
dtype, confidence = detect_binary_dtype(path)
|
|
407
|
+
|
|
408
|
+
# Detect packet structure (informational)
|
|
409
|
+
is_packets, packet_size = detect_packet_structure(path)
|
|
410
|
+
|
|
411
|
+
# Estimate sample rate if not provided
|
|
412
|
+
if sample_rate is None:
|
|
413
|
+
# Common sample rates for oscilloscopes/DAQ
|
|
414
|
+
sample_rate = 1.0e6 # 1 MS/s default
|
|
415
|
+
|
|
416
|
+
# Use mmap for files > 10MB
|
|
417
|
+
use_mmap = file_size > 10 * 1024 * 1024
|
|
418
|
+
|
|
419
|
+
# Calculate potential samples based on detected dtype
|
|
420
|
+
bytes_per_sample = np.dtype(dtype).itemsize
|
|
421
|
+
potential_samples = file_size // bytes_per_sample
|
|
422
|
+
|
|
423
|
+
# Limit samples for analysis performance
|
|
424
|
+
count = min(max_samples, potential_samples)
|
|
425
|
+
|
|
426
|
+
# Load with detected parameters
|
|
427
|
+
return load_binary(
|
|
428
|
+
path,
|
|
429
|
+
dtype=dtype,
|
|
430
|
+
sample_rate=sample_rate,
|
|
431
|
+
channels=channels,
|
|
432
|
+
channel=channel,
|
|
433
|
+
offset=0,
|
|
434
|
+
count=count,
|
|
435
|
+
mmap_mode=use_mmap,
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
__all__ = ["detect_binary_dtype", "detect_packet_structure", "load_binary", "load_binary_auto"]
|