oscura 0.8.0__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oscura/__init__.py +19 -19
- oscura/__main__.py +4 -0
- oscura/analyzers/__init__.py +2 -0
- oscura/analyzers/digital/extraction.py +2 -3
- oscura/analyzers/digital/quality.py +1 -1
- oscura/analyzers/digital/timing.py +1 -1
- oscura/analyzers/ml/signal_classifier.py +6 -0
- oscura/analyzers/patterns/__init__.py +66 -0
- oscura/analyzers/power/basic.py +3 -3
- oscura/analyzers/power/soa.py +1 -1
- oscura/analyzers/power/switching.py +3 -3
- oscura/analyzers/signal_classification.py +529 -0
- oscura/analyzers/signal_integrity/sparams.py +3 -3
- oscura/analyzers/statistics/basic.py +10 -7
- oscura/analyzers/validation.py +1 -1
- oscura/analyzers/waveform/measurements.py +200 -156
- oscura/analyzers/waveform/measurements_with_uncertainty.py +91 -35
- oscura/analyzers/waveform/spectral.py +182 -84
- oscura/api/dsl/commands.py +15 -6
- oscura/api/server/templates/base.html +137 -146
- oscura/api/server/templates/export.html +84 -110
- oscura/api/server/templates/home.html +248 -267
- oscura/api/server/templates/protocols.html +44 -48
- oscura/api/server/templates/reports.html +27 -35
- oscura/api/server/templates/session_detail.html +68 -78
- oscura/api/server/templates/sessions.html +62 -72
- oscura/api/server/templates/waveforms.html +54 -64
- oscura/automotive/__init__.py +1 -1
- oscura/automotive/can/session.py +1 -1
- oscura/automotive/dbc/generator.py +638 -23
- oscura/automotive/dtc/data.json +17 -102
- oscura/automotive/flexray/fibex.py +9 -1
- oscura/automotive/uds/decoder.py +99 -6
- oscura/cli/analyze.py +8 -2
- oscura/cli/batch.py +36 -5
- oscura/cli/characterize.py +18 -4
- oscura/cli/export.py +47 -5
- oscura/cli/main.py +2 -0
- oscura/cli/onboarding/wizard.py +10 -6
- oscura/cli/pipeline.py +585 -0
- oscura/cli/visualize.py +6 -4
- oscura/convenience.py +400 -32
- oscura/core/measurement_result.py +286 -0
- oscura/core/progress.py +1 -1
- oscura/core/schemas/device_mapping.json +2 -8
- oscura/core/schemas/packet_format.json +4 -24
- oscura/core/schemas/protocol_definition.json +2 -12
- oscura/core/types.py +232 -239
- oscura/correlation/multi_protocol.py +1 -1
- oscura/export/legacy/__init__.py +11 -0
- oscura/export/legacy/wav.py +75 -0
- oscura/exporters/__init__.py +19 -0
- oscura/exporters/wireshark.py +809 -0
- oscura/hardware/acquisition/file.py +5 -19
- oscura/hardware/acquisition/saleae.py +10 -10
- oscura/hardware/acquisition/socketcan.py +4 -6
- oscura/hardware/acquisition/synthetic.py +1 -5
- oscura/hardware/acquisition/visa.py +6 -6
- oscura/hardware/security/side_channel_detector.py +5 -508
- oscura/inference/message_format.py +686 -1
- oscura/jupyter/display.py +2 -2
- oscura/jupyter/magic.py +3 -3
- oscura/loaders/__init__.py +17 -12
- oscura/loaders/binary.py +1 -1
- oscura/loaders/chipwhisperer.py +1 -2
- oscura/loaders/configurable.py +1 -1
- oscura/loaders/csv_loader.py +2 -2
- oscura/loaders/hdf5_loader.py +1 -1
- oscura/loaders/lazy.py +6 -1
- oscura/loaders/mmap_loader.py +0 -1
- oscura/loaders/numpy_loader.py +8 -7
- oscura/loaders/preprocessing.py +3 -5
- oscura/loaders/rigol.py +21 -7
- oscura/loaders/sigrok.py +2 -5
- oscura/loaders/tdms.py +3 -2
- oscura/loaders/tektronix.py +38 -32
- oscura/loaders/tss.py +20 -27
- oscura/loaders/validation.py +17 -10
- oscura/loaders/vcd.py +13 -8
- oscura/loaders/wav.py +1 -6
- oscura/pipeline/__init__.py +76 -0
- oscura/pipeline/handlers/__init__.py +165 -0
- oscura/pipeline/handlers/analyzers.py +1045 -0
- oscura/pipeline/handlers/decoders.py +899 -0
- oscura/pipeline/handlers/exporters.py +1103 -0
- oscura/pipeline/handlers/filters.py +891 -0
- oscura/pipeline/handlers/loaders.py +640 -0
- oscura/pipeline/handlers/transforms.py +768 -0
- oscura/reporting/formatting/measurements.py +55 -14
- oscura/reporting/templates/enhanced/protocol_re.html +504 -503
- oscura/sessions/legacy.py +49 -1
- oscura/side_channel/__init__.py +38 -57
- oscura/utils/builders/signal_builder.py +5 -5
- oscura/utils/comparison/compare.py +7 -9
- oscura/utils/comparison/golden.py +1 -1
- oscura/utils/filtering/convenience.py +2 -2
- oscura/utils/math/arithmetic.py +38 -62
- oscura/utils/math/interpolation.py +20 -20
- oscura/utils/pipeline/__init__.py +4 -17
- oscura/utils/progressive.py +1 -4
- oscura/utils/triggering/edge.py +1 -1
- oscura/utils/triggering/pattern.py +2 -2
- oscura/utils/triggering/pulse.py +2 -2
- oscura/utils/triggering/window.py +3 -3
- oscura/validation/hil_testing.py +11 -11
- oscura/visualization/__init__.py +46 -284
- oscura/visualization/batch.py +72 -433
- oscura/visualization/plot.py +542 -53
- oscura/visualization/styles.py +184 -318
- oscura/workflows/batch/advanced.py +1 -1
- oscura/workflows/batch/aggregate.py +12 -9
- oscura/workflows/complete_re.py +251 -23
- oscura/workflows/digital.py +27 -4
- oscura/workflows/multi_trace.py +136 -17
- oscura/workflows/waveform.py +11 -6
- oscura-0.11.0.dist-info/METADATA +460 -0
- {oscura-0.8.0.dist-info → oscura-0.11.0.dist-info}/RECORD +120 -145
- oscura/side_channel/dpa.py +0 -1025
- oscura/utils/optimization/__init__.py +0 -19
- oscura/utils/optimization/parallel.py +0 -443
- oscura/utils/optimization/search.py +0 -532
- oscura/utils/pipeline/base.py +0 -338
- oscura/utils/pipeline/composition.py +0 -248
- oscura/utils/pipeline/parallel.py +0 -449
- oscura/utils/pipeline/pipeline.py +0 -375
- oscura/utils/search/__init__.py +0 -16
- oscura/utils/search/anomaly.py +0 -424
- oscura/utils/search/context.py +0 -294
- oscura/utils/search/pattern.py +0 -288
- oscura/utils/storage/__init__.py +0 -61
- oscura/utils/storage/database.py +0 -1166
- oscura/visualization/accessibility.py +0 -526
- oscura/visualization/annotations.py +0 -371
- oscura/visualization/axis_scaling.py +0 -305
- oscura/visualization/colors.py +0 -451
- oscura/visualization/digital.py +0 -436
- oscura/visualization/eye.py +0 -571
- oscura/visualization/histogram.py +0 -281
- oscura/visualization/interactive.py +0 -1035
- oscura/visualization/jitter.py +0 -1042
- oscura/visualization/keyboard.py +0 -394
- oscura/visualization/layout.py +0 -400
- oscura/visualization/optimization.py +0 -1079
- oscura/visualization/palettes.py +0 -446
- oscura/visualization/power.py +0 -508
- oscura/visualization/power_extended.py +0 -955
- oscura/visualization/presets.py +0 -469
- oscura/visualization/protocols.py +0 -1246
- oscura/visualization/render.py +0 -223
- oscura/visualization/rendering.py +0 -444
- oscura/visualization/reverse_engineering.py +0 -838
- oscura/visualization/signal_integrity.py +0 -989
- oscura/visualization/specialized.py +0 -643
- oscura/visualization/spectral.py +0 -1226
- oscura/visualization/thumbnails.py +0 -340
- oscura/visualization/time_axis.py +0 -351
- oscura/visualization/waveform.py +0 -454
- oscura-0.8.0.dist-info/METADATA +0 -661
- {oscura-0.8.0.dist-info → oscura-0.11.0.dist-info}/WHEEL +0 -0
- {oscura-0.8.0.dist-info → oscura-0.11.0.dist-info}/entry_points.txt +0 -0
- {oscura-0.8.0.dist-info → oscura-0.11.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -30,6 +30,23 @@ from numpy.typing import NDArray
|
|
|
30
30
|
from oscura.inference.alignment import align_local
|
|
31
31
|
|
|
32
32
|
|
|
33
|
+
@dataclass
|
|
34
|
+
class FieldDetectionResult:
|
|
35
|
+
"""Result from a field type detector.
|
|
36
|
+
|
|
37
|
+
Attributes:
|
|
38
|
+
confidence: Detection confidence score (0.0-1.0)
|
|
39
|
+
field_offset: Field offset in bytes
|
|
40
|
+
field_length: Field length in bytes
|
|
41
|
+
evidence: Supporting data for detection
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
confidence: float
|
|
45
|
+
field_offset: int
|
|
46
|
+
field_length: int
|
|
47
|
+
evidence: dict[str, Any]
|
|
48
|
+
|
|
49
|
+
|
|
33
50
|
@dataclass
|
|
34
51
|
class InferredField:
|
|
35
52
|
"""An inferred message field.
|
|
@@ -51,7 +68,18 @@ class InferredField:
|
|
|
51
68
|
name: str
|
|
52
69
|
offset: int
|
|
53
70
|
size: int
|
|
54
|
-
field_type: Literal[
|
|
71
|
+
field_type: Literal[
|
|
72
|
+
"constant",
|
|
73
|
+
"counter",
|
|
74
|
+
"timestamp",
|
|
75
|
+
"length",
|
|
76
|
+
"checksum",
|
|
77
|
+
"data",
|
|
78
|
+
"float",
|
|
79
|
+
"enum",
|
|
80
|
+
"reserved",
|
|
81
|
+
"unknown",
|
|
82
|
+
]
|
|
55
83
|
entropy: float
|
|
56
84
|
variance: float
|
|
57
85
|
confidence: float
|
|
@@ -1369,6 +1397,663 @@ class MessageFormatInferrer:
|
|
|
1369
1397
|
# Return first byte for larger fields
|
|
1370
1398
|
return int(msg[field.offset])
|
|
1371
1399
|
|
|
1400
|
+
# =============================================================================
|
|
1401
|
+
# Advanced Field Type Detectors
|
|
1402
|
+
# =============================================================================
|
|
1403
|
+
|
|
1404
|
+
def detect_timestamp_field(
|
|
1405
|
+
self, messages: list[NDArray[np.uint8]], offset: int, size: int
|
|
1406
|
+
) -> FieldDetectionResult:
|
|
1407
|
+
"""Detect timestamp fields in message data.
|
|
1408
|
+
|
|
1409
|
+
: Timestamp field detection with wraparound handling.
|
|
1410
|
+
|
|
1411
|
+
Analyzes field values for characteristics of timestamp fields:
|
|
1412
|
+
- Monotonically increasing values (with wraparound support)
|
|
1413
|
+
- Regular intervals (milliseconds, seconds, microseconds)
|
|
1414
|
+
- Unix timestamp patterns (seconds since epoch)
|
|
1415
|
+
- 32-bit or 64-bit timestamp detection
|
|
1416
|
+
|
|
1417
|
+
Args:
|
|
1418
|
+
messages: List of message arrays to analyze
|
|
1419
|
+
offset: Field offset in bytes
|
|
1420
|
+
size: Field size in bytes (typically 4 or 8)
|
|
1421
|
+
|
|
1422
|
+
Returns:
|
|
1423
|
+
FieldDetectionResult with confidence, offset, length, and evidence
|
|
1424
|
+
|
|
1425
|
+
Example:
|
|
1426
|
+
>>> messages = [np.array([0, 0, 0, 100], dtype=np.uint8),
|
|
1427
|
+
... np.array([0, 0, 0, 200], dtype=np.uint8)]
|
|
1428
|
+
>>> result = inferrer.detect_timestamp_field(messages, 0, 4)
|
|
1429
|
+
>>> result.confidence > 0.8 # High confidence for regular intervals
|
|
1430
|
+
True
|
|
1431
|
+
"""
|
|
1432
|
+
if size not in [4, 8]:
|
|
1433
|
+
return FieldDetectionResult(
|
|
1434
|
+
confidence=0.0,
|
|
1435
|
+
field_offset=offset,
|
|
1436
|
+
field_length=size,
|
|
1437
|
+
evidence={"reason": "Invalid size for timestamp (must be 4 or 8 bytes)"},
|
|
1438
|
+
)
|
|
1439
|
+
|
|
1440
|
+
# Extract field values (big-endian)
|
|
1441
|
+
values: list[int] = []
|
|
1442
|
+
for msg in messages:
|
|
1443
|
+
if size == 4:
|
|
1444
|
+
val = (
|
|
1445
|
+
int(msg[offset]) << 24
|
|
1446
|
+
| int(msg[offset + 1]) << 16
|
|
1447
|
+
| int(msg[offset + 2]) << 8
|
|
1448
|
+
| int(msg[offset + 3])
|
|
1449
|
+
)
|
|
1450
|
+
else: # size == 8
|
|
1451
|
+
val = (
|
|
1452
|
+
int(msg[offset]) << 56
|
|
1453
|
+
| int(msg[offset + 1]) << 48
|
|
1454
|
+
| int(msg[offset + 2]) << 40
|
|
1455
|
+
| int(msg[offset + 3]) << 32
|
|
1456
|
+
| int(msg[offset + 4]) << 24
|
|
1457
|
+
| int(msg[offset + 5]) << 16
|
|
1458
|
+
| int(msg[offset + 6]) << 8
|
|
1459
|
+
| int(msg[offset + 7])
|
|
1460
|
+
)
|
|
1461
|
+
values.append(val)
|
|
1462
|
+
|
|
1463
|
+
if len(values) < 3:
|
|
1464
|
+
return FieldDetectionResult(
|
|
1465
|
+
confidence=0.0,
|
|
1466
|
+
field_offset=offset,
|
|
1467
|
+
field_length=size,
|
|
1468
|
+
evidence={"reason": "Insufficient samples for timestamp detection"},
|
|
1469
|
+
)
|
|
1470
|
+
|
|
1471
|
+
# Calculate differences between consecutive values
|
|
1472
|
+
diffs = [values[i + 1] - values[i] for i in range(len(values) - 1)]
|
|
1473
|
+
|
|
1474
|
+
# Check for monotonically increasing (handle wraparound for 32-bit)
|
|
1475
|
+
max_val = (1 << (size * 8)) - 1
|
|
1476
|
+
increasing_count = 0
|
|
1477
|
+
wraparound_count = 0
|
|
1478
|
+
|
|
1479
|
+
for diff in diffs:
|
|
1480
|
+
if diff > 0:
|
|
1481
|
+
increasing_count += 1
|
|
1482
|
+
elif diff < -(max_val // 2): # Likely wraparound
|
|
1483
|
+
wraparound_count += 1
|
|
1484
|
+
increasing_count += 1
|
|
1485
|
+
|
|
1486
|
+
monotonic_ratio = increasing_count / len(diffs)
|
|
1487
|
+
|
|
1488
|
+
if monotonic_ratio < 0.7:
|
|
1489
|
+
return FieldDetectionResult(
|
|
1490
|
+
confidence=0.0,
|
|
1491
|
+
field_offset=offset,
|
|
1492
|
+
field_length=size,
|
|
1493
|
+
evidence={
|
|
1494
|
+
"reason": "Values not monotonically increasing",
|
|
1495
|
+
"monotonic_ratio": monotonic_ratio,
|
|
1496
|
+
},
|
|
1497
|
+
)
|
|
1498
|
+
|
|
1499
|
+
# Analyze intervals for regularity
|
|
1500
|
+
positive_diffs = [d for d in diffs if d > 0]
|
|
1501
|
+
if not positive_diffs:
|
|
1502
|
+
return FieldDetectionResult(
|
|
1503
|
+
confidence=0.0,
|
|
1504
|
+
field_offset=offset,
|
|
1505
|
+
field_length=size,
|
|
1506
|
+
evidence={"reason": "No positive increments"},
|
|
1507
|
+
)
|
|
1508
|
+
|
|
1509
|
+
avg_interval = sum(positive_diffs) / len(positive_diffs)
|
|
1510
|
+
interval_variance = np.var(positive_diffs)
|
|
1511
|
+
interval_std = np.sqrt(interval_variance)
|
|
1512
|
+
|
|
1513
|
+
# Regular intervals have low coefficient of variation
|
|
1514
|
+
cv = interval_std / avg_interval if avg_interval > 0 else float("inf")
|
|
1515
|
+
|
|
1516
|
+
# Check for Unix timestamp patterns (seconds since 1970-01-01)
|
|
1517
|
+
unix_epoch_2020 = 1577836800 # 2020-01-01 00:00:00 UTC
|
|
1518
|
+
unix_epoch_2030 = 1893456000 # 2030-01-01 00:00:00 UTC
|
|
1519
|
+
|
|
1520
|
+
is_unix_timestamp = False
|
|
1521
|
+
if size == 4 and all(unix_epoch_2020 <= v <= unix_epoch_2030 for v in values[:5]):
|
|
1522
|
+
is_unix_timestamp = True
|
|
1523
|
+
|
|
1524
|
+
# Calculate confidence based on multiple factors
|
|
1525
|
+
confidence = 0.0
|
|
1526
|
+
|
|
1527
|
+
# Factor 1: Monotonic increase (0-0.4)
|
|
1528
|
+
confidence += min(monotonic_ratio, 1.0) * 0.4
|
|
1529
|
+
|
|
1530
|
+
# Factor 2: Regular intervals (0-0.4)
|
|
1531
|
+
if cv < 0.1: # Very regular
|
|
1532
|
+
confidence += 0.4
|
|
1533
|
+
elif cv < 0.5: # Somewhat regular
|
|
1534
|
+
confidence += 0.2
|
|
1535
|
+
elif cv < 1.0: # Loosely regular
|
|
1536
|
+
confidence += 0.1
|
|
1537
|
+
|
|
1538
|
+
# Factor 3: Unix timestamp detection (0-0.2)
|
|
1539
|
+
if is_unix_timestamp:
|
|
1540
|
+
confidence += 0.2
|
|
1541
|
+
|
|
1542
|
+
evidence = {
|
|
1543
|
+
"monotonic_ratio": float(monotonic_ratio),
|
|
1544
|
+
"avg_interval": float(avg_interval),
|
|
1545
|
+
"interval_std": float(interval_std),
|
|
1546
|
+
"coefficient_of_variation": float(cv),
|
|
1547
|
+
"wraparound_detected": wraparound_count > 0,
|
|
1548
|
+
"is_unix_timestamp": is_unix_timestamp,
|
|
1549
|
+
"sample_values": values[:5],
|
|
1550
|
+
}
|
|
1551
|
+
|
|
1552
|
+
return FieldDetectionResult(
|
|
1553
|
+
confidence=min(confidence, 1.0),
|
|
1554
|
+
field_offset=offset,
|
|
1555
|
+
field_length=size,
|
|
1556
|
+
evidence=evidence,
|
|
1557
|
+
)
|
|
1558
|
+
|
|
1559
|
+
def detect_float_field(
|
|
1560
|
+
self, messages: list[NDArray[np.uint8]], offset: int, size: int
|
|
1561
|
+
) -> FieldDetectionResult:
|
|
1562
|
+
"""Detect IEEE 754 floating-point fields.
|
|
1563
|
+
|
|
1564
|
+
: Floating-point field detection.
|
|
1565
|
+
|
|
1566
|
+
Identifies floating-point encoded fields by:
|
|
1567
|
+
- Checking for valid IEEE 754 patterns (sign, exponent, mantissa)
|
|
1568
|
+
- Detecting 32-bit (single) and 64-bit (double) precision floats
|
|
1569
|
+
- Validating reasonable ranges (not NaN, not Inf)
|
|
1570
|
+
- Checking value distribution consistency
|
|
1571
|
+
|
|
1572
|
+
Args:
|
|
1573
|
+
messages: List of message arrays to analyze
|
|
1574
|
+
offset: Field offset in bytes
|
|
1575
|
+
size: Field size in bytes (4 for float32, 8 for float64)
|
|
1576
|
+
|
|
1577
|
+
Returns:
|
|
1578
|
+
FieldDetectionResult with confidence, offset, length, and evidence
|
|
1579
|
+
|
|
1580
|
+
Example:
|
|
1581
|
+
>>> messages = [np.array([0x40, 0x49, 0x0F, 0xDB], dtype=np.uint8)] # 3.14159
|
|
1582
|
+
>>> result = inferrer.detect_float_field(messages, 0, 4)
|
|
1583
|
+
>>> result.confidence > 0.5
|
|
1584
|
+
True
|
|
1585
|
+
"""
|
|
1586
|
+
if size not in [4, 8]:
|
|
1587
|
+
return FieldDetectionResult(
|
|
1588
|
+
confidence=0.0,
|
|
1589
|
+
field_offset=offset,
|
|
1590
|
+
field_length=size,
|
|
1591
|
+
evidence={"reason": "Invalid size for float (must be 4 or 8 bytes)"},
|
|
1592
|
+
)
|
|
1593
|
+
|
|
1594
|
+
# Extract raw bytes and attempt IEEE 754 interpretation
|
|
1595
|
+
float_values: list[float] = []
|
|
1596
|
+
valid_count = 0
|
|
1597
|
+
|
|
1598
|
+
for msg in messages:
|
|
1599
|
+
raw_bytes = bytes(msg[offset : offset + size])
|
|
1600
|
+
|
|
1601
|
+
try:
|
|
1602
|
+
if size == 4:
|
|
1603
|
+
# 32-bit float (big-endian)
|
|
1604
|
+
val = np.frombuffer(raw_bytes, dtype=">f4")[0]
|
|
1605
|
+
else: # size == 8
|
|
1606
|
+
# 64-bit double (big-endian)
|
|
1607
|
+
val = np.frombuffer(raw_bytes, dtype=">f8")[0]
|
|
1608
|
+
|
|
1609
|
+
# Check for valid float (not NaN, not Inf)
|
|
1610
|
+
if np.isfinite(val):
|
|
1611
|
+
float_values.append(float(val))
|
|
1612
|
+
valid_count += 1
|
|
1613
|
+
else:
|
|
1614
|
+
float_values.append(0.0)
|
|
1615
|
+
|
|
1616
|
+
except (ValueError, IndexError):
|
|
1617
|
+
float_values.append(0.0)
|
|
1618
|
+
|
|
1619
|
+
if len(messages) == 0:
|
|
1620
|
+
return FieldDetectionResult(
|
|
1621
|
+
confidence=0.0,
|
|
1622
|
+
field_offset=offset,
|
|
1623
|
+
field_length=size,
|
|
1624
|
+
evidence={"reason": "No messages to analyze"},
|
|
1625
|
+
)
|
|
1626
|
+
|
|
1627
|
+
valid_ratio = valid_count / len(messages)
|
|
1628
|
+
|
|
1629
|
+
# Need majority valid floats
|
|
1630
|
+
if valid_ratio < 0.5:
|
|
1631
|
+
return FieldDetectionResult(
|
|
1632
|
+
confidence=0.0,
|
|
1633
|
+
field_offset=offset,
|
|
1634
|
+
field_length=size,
|
|
1635
|
+
evidence={"reason": "Too many invalid floats", "valid_ratio": valid_ratio},
|
|
1636
|
+
)
|
|
1637
|
+
|
|
1638
|
+
# Analyze value distribution
|
|
1639
|
+
valid_floats = [v for v in float_values if v != 0.0 or valid_count == len(messages)]
|
|
1640
|
+
|
|
1641
|
+
if not valid_floats:
|
|
1642
|
+
return FieldDetectionResult(
|
|
1643
|
+
confidence=0.0,
|
|
1644
|
+
field_offset=offset,
|
|
1645
|
+
field_length=size,
|
|
1646
|
+
evidence={"reason": "No valid float values"},
|
|
1647
|
+
)
|
|
1648
|
+
|
|
1649
|
+
# Check for reasonable range (not all zeros, some variance)
|
|
1650
|
+
float_variance = float(np.var(valid_floats))
|
|
1651
|
+
float_mean = float(np.mean(valid_floats))
|
|
1652
|
+
float_range = float(np.max(valid_floats) - np.min(valid_floats))
|
|
1653
|
+
|
|
1654
|
+
# Calculate confidence
|
|
1655
|
+
confidence = 0.0
|
|
1656
|
+
|
|
1657
|
+
# Factor 1: Valid float ratio (0-0.5)
|
|
1658
|
+
confidence += valid_ratio * 0.5
|
|
1659
|
+
|
|
1660
|
+
# Factor 2: Non-zero variance (0-0.3)
|
|
1661
|
+
if float_variance > 1e-6:
|
|
1662
|
+
confidence += 0.3
|
|
1663
|
+
elif float_variance > 1e-12:
|
|
1664
|
+
confidence += 0.15
|
|
1665
|
+
|
|
1666
|
+
# Factor 3: Reasonable range (0-0.2)
|
|
1667
|
+
if float_range > 0:
|
|
1668
|
+
confidence += 0.2
|
|
1669
|
+
|
|
1670
|
+
evidence = {
|
|
1671
|
+
"valid_float_ratio": float(valid_ratio),
|
|
1672
|
+
"float_mean": float(float_mean),
|
|
1673
|
+
"float_variance": float(float_variance),
|
|
1674
|
+
"float_range": float(float_range),
|
|
1675
|
+
"sample_values": valid_floats[:5],
|
|
1676
|
+
"size_bits": size * 8,
|
|
1677
|
+
}
|
|
1678
|
+
|
|
1679
|
+
return FieldDetectionResult(
|
|
1680
|
+
confidence=min(confidence, 1.0),
|
|
1681
|
+
field_offset=offset,
|
|
1682
|
+
field_length=size,
|
|
1683
|
+
evidence=evidence,
|
|
1684
|
+
)
|
|
1685
|
+
|
|
1686
|
+
def detect_length_field(
|
|
1687
|
+
self, messages: list[NDArray[np.uint8]], offset: int, size: int, msg_len: int
|
|
1688
|
+
) -> FieldDetectionResult:
|
|
1689
|
+
"""Detect length fields with endianness detection.
|
|
1690
|
+
|
|
1691
|
+
: Length field detection with byte order analysis.
|
|
1692
|
+
|
|
1693
|
+
Identifies fields that encode message or payload length by:
|
|
1694
|
+
- Correlating field value with message/payload size
|
|
1695
|
+
- Checking if value matches subsequent data length
|
|
1696
|
+
- Detecting big-endian vs little-endian encoding
|
|
1697
|
+
- Validating length values are within reasonable bounds
|
|
1698
|
+
|
|
1699
|
+
Args:
|
|
1700
|
+
messages: List of message arrays to analyze
|
|
1701
|
+
offset: Field offset in bytes
|
|
1702
|
+
size: Field size in bytes (typically 1, 2, or 4)
|
|
1703
|
+
msg_len: Total message length for validation
|
|
1704
|
+
|
|
1705
|
+
Returns:
|
|
1706
|
+
FieldDetectionResult with confidence, offset, length, and evidence
|
|
1707
|
+
|
|
1708
|
+
Example:
|
|
1709
|
+
>>> messages = [np.array([0, 10, *range(10)], dtype=np.uint8)]
|
|
1710
|
+
>>> result = inferrer.detect_length_field(messages, 0, 2, 12)
|
|
1711
|
+
>>> result.confidence > 0.7
|
|
1712
|
+
True
|
|
1713
|
+
"""
|
|
1714
|
+
if size > 4:
|
|
1715
|
+
return FieldDetectionResult(
|
|
1716
|
+
confidence=0.0,
|
|
1717
|
+
field_offset=offset,
|
|
1718
|
+
field_length=size,
|
|
1719
|
+
evidence={"reason": "Invalid size for length field (max 4 bytes)"},
|
|
1720
|
+
)
|
|
1721
|
+
|
|
1722
|
+
# Extract field values (try both endianness)
|
|
1723
|
+
values_be: list[int] = [] # Big-endian
|
|
1724
|
+
values_le: list[int] = [] # Little-endian
|
|
1725
|
+
|
|
1726
|
+
for msg in messages:
|
|
1727
|
+
if size == 1:
|
|
1728
|
+
val = int(msg[offset])
|
|
1729
|
+
values_be.append(val)
|
|
1730
|
+
values_le.append(val)
|
|
1731
|
+
elif size == 2:
|
|
1732
|
+
val_be = int(msg[offset]) << 8 | int(msg[offset + 1])
|
|
1733
|
+
val_le = int(msg[offset + 1]) << 8 | int(msg[offset])
|
|
1734
|
+
values_be.append(val_be)
|
|
1735
|
+
values_le.append(val_le)
|
|
1736
|
+
elif size == 4:
|
|
1737
|
+
val_be = (
|
|
1738
|
+
int(msg[offset]) << 24
|
|
1739
|
+
| int(msg[offset + 1]) << 16
|
|
1740
|
+
| int(msg[offset + 2]) << 8
|
|
1741
|
+
| int(msg[offset + 3])
|
|
1742
|
+
)
|
|
1743
|
+
val_le = (
|
|
1744
|
+
int(msg[offset + 3]) << 24
|
|
1745
|
+
| int(msg[offset + 2]) << 16
|
|
1746
|
+
| int(msg[offset + 1]) << 8
|
|
1747
|
+
| int(msg[offset])
|
|
1748
|
+
)
|
|
1749
|
+
values_be.append(val_be)
|
|
1750
|
+
values_le.append(val_le)
|
|
1751
|
+
else: # size == 3
|
|
1752
|
+
val_be = int(msg[offset]) << 16 | int(msg[offset + 1]) << 8 | int(msg[offset + 2])
|
|
1753
|
+
val_le = int(msg[offset + 2]) << 16 | int(msg[offset + 1]) << 8 | int(msg[offset])
|
|
1754
|
+
values_be.append(val_be)
|
|
1755
|
+
values_le.append(val_le)
|
|
1756
|
+
|
|
1757
|
+
# Check correlation with message length
|
|
1758
|
+
def check_correlation(values: list[int]) -> tuple[float, str]:
|
|
1759
|
+
"""Check correlation between field values and message structure."""
|
|
1760
|
+
# Pattern 1: Total message length
|
|
1761
|
+
total_len_matches = sum(1 for v in values if v == msg_len)
|
|
1762
|
+
|
|
1763
|
+
# Pattern 2: Remaining message length (after this field)
|
|
1764
|
+
remaining_len = msg_len - offset - size
|
|
1765
|
+
remaining_matches = sum(1 for v in values if v == remaining_len)
|
|
1766
|
+
|
|
1767
|
+
# Pattern 3: Payload length (common header size assumptions)
|
|
1768
|
+
for header_size in [4, 8, 12, 16]:
|
|
1769
|
+
if offset + size <= header_size:
|
|
1770
|
+
payload_len = msg_len - header_size
|
|
1771
|
+
payload_matches = sum(1 for v in values if v == payload_len)
|
|
1772
|
+
if payload_matches > 0:
|
|
1773
|
+
match_ratio = payload_matches / len(values)
|
|
1774
|
+
if match_ratio > 0.7:
|
|
1775
|
+
return (match_ratio, f"payload_length_after_{header_size}B_header")
|
|
1776
|
+
|
|
1777
|
+
# Pattern 4: Values within reasonable bounds
|
|
1778
|
+
reasonable = sum(1 for v in values if 0 < v < msg_len * 2)
|
|
1779
|
+
reasonable_ratio = reasonable / len(values) if values else 0
|
|
1780
|
+
|
|
1781
|
+
# Best match
|
|
1782
|
+
best_ratio = max(total_len_matches, remaining_matches) / len(values) if values else 0
|
|
1783
|
+
|
|
1784
|
+
if total_len_matches > remaining_matches:
|
|
1785
|
+
return (best_ratio, "total_message_length")
|
|
1786
|
+
elif remaining_matches > 0:
|
|
1787
|
+
return (best_ratio, "remaining_bytes_after_field")
|
|
1788
|
+
elif reasonable_ratio > 0.5:
|
|
1789
|
+
return (reasonable_ratio * 0.5, "reasonable_length_values")
|
|
1790
|
+
else:
|
|
1791
|
+
return (0.0, "no_correlation")
|
|
1792
|
+
|
|
1793
|
+
# Check both endianness
|
|
1794
|
+
corr_be, pattern_be = check_correlation(values_be)
|
|
1795
|
+
corr_le, pattern_le = check_correlation(values_le)
|
|
1796
|
+
|
|
1797
|
+
# Select best endianness
|
|
1798
|
+
if corr_be >= corr_le:
|
|
1799
|
+
confidence = corr_be
|
|
1800
|
+
endianness = "big"
|
|
1801
|
+
pattern = pattern_be
|
|
1802
|
+
values = values_be
|
|
1803
|
+
else:
|
|
1804
|
+
confidence = corr_le
|
|
1805
|
+
endianness = "little"
|
|
1806
|
+
pattern = pattern_le
|
|
1807
|
+
values = values_le
|
|
1808
|
+
|
|
1809
|
+
# Boost confidence if field is early in message (typical for length fields)
|
|
1810
|
+
if offset < 8:
|
|
1811
|
+
confidence = min(confidence * 1.2, 1.0)
|
|
1812
|
+
|
|
1813
|
+
evidence = {
|
|
1814
|
+
"endianness": endianness,
|
|
1815
|
+
"correlation_pattern": pattern,
|
|
1816
|
+
"correlation_ratio": float(confidence),
|
|
1817
|
+
"sample_values": values[:5],
|
|
1818
|
+
"message_length": msg_len,
|
|
1819
|
+
"field_offset": offset,
|
|
1820
|
+
}
|
|
1821
|
+
|
|
1822
|
+
return FieldDetectionResult(
|
|
1823
|
+
confidence=min(confidence, 1.0),
|
|
1824
|
+
field_offset=offset,
|
|
1825
|
+
field_length=size,
|
|
1826
|
+
evidence=evidence,
|
|
1827
|
+
)
|
|
1828
|
+
|
|
1829
|
+
def detect_enum_field(
|
|
1830
|
+
self, messages: list[NDArray[np.uint8]], offset: int, size: int
|
|
1831
|
+
) -> FieldDetectionResult:
|
|
1832
|
+
"""Detect enumeration fields with value frequency analysis.
|
|
1833
|
+
|
|
1834
|
+
: Enum field detection with value distribution.
|
|
1835
|
+
|
|
1836
|
+
Identifies fields with limited discrete value sets by:
|
|
1837
|
+
- Finding fields with <20 unique values
|
|
1838
|
+
- Building value frequency distribution
|
|
1839
|
+
- Checking for reasonable enum characteristics
|
|
1840
|
+
- Suggesting common enum patterns (states, commands, types)
|
|
1841
|
+
|
|
1842
|
+
Args:
|
|
1843
|
+
messages: List of message arrays to analyze
|
|
1844
|
+
offset: Field offset in bytes
|
|
1845
|
+
size: Field size in bytes (typically 1 or 2)
|
|
1846
|
+
|
|
1847
|
+
Returns:
|
|
1848
|
+
FieldDetectionResult with confidence, offset, length, and evidence
|
|
1849
|
+
|
|
1850
|
+
Example:
|
|
1851
|
+
>>> messages = [np.array([1], dtype=np.uint8), np.array([2], dtype=np.uint8)]
|
|
1852
|
+
>>> result = inferrer.detect_enum_field(messages, 0, 1)
|
|
1853
|
+
>>> result.confidence > 0.8
|
|
1854
|
+
True
|
|
1855
|
+
"""
|
|
1856
|
+
if size > 4:
|
|
1857
|
+
return FieldDetectionResult(
|
|
1858
|
+
confidence=0.0,
|
|
1859
|
+
field_offset=offset,
|
|
1860
|
+
field_length=size,
|
|
1861
|
+
evidence={"reason": "Field too large for enum (max 4 bytes)"},
|
|
1862
|
+
)
|
|
1863
|
+
|
|
1864
|
+
# Extract field values
|
|
1865
|
+
values: list[int] = []
|
|
1866
|
+
for msg in messages:
|
|
1867
|
+
if size == 1:
|
|
1868
|
+
val = int(msg[offset])
|
|
1869
|
+
elif size == 2:
|
|
1870
|
+
val = int(msg[offset]) << 8 | int(msg[offset + 1])
|
|
1871
|
+
elif size == 4:
|
|
1872
|
+
val = (
|
|
1873
|
+
int(msg[offset]) << 24
|
|
1874
|
+
| int(msg[offset + 1]) << 16
|
|
1875
|
+
| int(msg[offset + 2]) << 8
|
|
1876
|
+
| int(msg[offset + 3])
|
|
1877
|
+
)
|
|
1878
|
+
else: # size == 3
|
|
1879
|
+
val = int(msg[offset]) << 16 | int(msg[offset + 1]) << 8 | int(msg[offset + 2])
|
|
1880
|
+
values.append(val)
|
|
1881
|
+
|
|
1882
|
+
# Count unique values
|
|
1883
|
+
unique_values = set(values)
|
|
1884
|
+
unique_count = len(unique_values)
|
|
1885
|
+
|
|
1886
|
+
# Not an enum if only 1 value (that's a constant)
|
|
1887
|
+
if unique_count <= 1:
|
|
1888
|
+
return FieldDetectionResult(
|
|
1889
|
+
confidence=0.0,
|
|
1890
|
+
field_offset=offset,
|
|
1891
|
+
field_length=size,
|
|
1892
|
+
evidence={"reason": "Only one unique value (constant field)"},
|
|
1893
|
+
)
|
|
1894
|
+
|
|
1895
|
+
# Not an enum if too many values (>20 threshold)
|
|
1896
|
+
if unique_count > 20:
|
|
1897
|
+
return FieldDetectionResult(
|
|
1898
|
+
confidence=0.0,
|
|
1899
|
+
field_offset=offset,
|
|
1900
|
+
field_length=size,
|
|
1901
|
+
evidence={"reason": f"Too many unique values ({unique_count} > 20)"},
|
|
1902
|
+
)
|
|
1903
|
+
|
|
1904
|
+
# Build frequency distribution
|
|
1905
|
+
value_counts: dict[int, int] = {}
|
|
1906
|
+
for v in values:
|
|
1907
|
+
value_counts[v] = value_counts.get(v, 0) + 1
|
|
1908
|
+
|
|
1909
|
+
# Sort by frequency
|
|
1910
|
+
sorted_values = sorted(value_counts.items(), key=lambda x: x[1], reverse=True)
|
|
1911
|
+
|
|
1912
|
+
# Calculate entropy
|
|
1913
|
+
total = len(values)
|
|
1914
|
+
probabilities = [count / total for _, count in sorted_values]
|
|
1915
|
+
entropy = -sum(p * np.log2(p) for p in probabilities if p > 0)
|
|
1916
|
+
|
|
1917
|
+
# Calculate confidence based on characteristics
|
|
1918
|
+
confidence = 0.0
|
|
1919
|
+
|
|
1920
|
+
# Factor 1: Few unique values (0-0.4)
|
|
1921
|
+
if unique_count <= 5:
|
|
1922
|
+
confidence += 0.4
|
|
1923
|
+
elif unique_count <= 10:
|
|
1924
|
+
confidence += 0.3
|
|
1925
|
+
else:
|
|
1926
|
+
confidence += 0.2
|
|
1927
|
+
|
|
1928
|
+
# Factor 2: Good value distribution (0-0.3)
|
|
1929
|
+
# Enums typically have reasonable distribution (not too skewed)
|
|
1930
|
+
max_freq = max(value_counts.values())
|
|
1931
|
+
max_freq_ratio = max_freq / total
|
|
1932
|
+
if 0.1 < max_freq_ratio < 0.9: # Balanced distribution
|
|
1933
|
+
confidence += 0.3
|
|
1934
|
+
elif max_freq_ratio <= 0.95: # Somewhat balanced
|
|
1935
|
+
confidence += 0.15
|
|
1936
|
+
|
|
1937
|
+
# Factor 3: Low entropy relative to max possible (0-0.3)
|
|
1938
|
+
max_entropy = np.log2(unique_count)
|
|
1939
|
+
if max_entropy > 0:
|
|
1940
|
+
entropy_ratio = entropy / max_entropy
|
|
1941
|
+
if entropy_ratio > 0.5: # Well-distributed
|
|
1942
|
+
confidence += 0.3
|
|
1943
|
+
else: # Skewed distribution
|
|
1944
|
+
confidence += 0.15
|
|
1945
|
+
|
|
1946
|
+
# Suggest enum type based on characteristics
|
|
1947
|
+
enum_type = "unknown"
|
|
1948
|
+
if unique_count <= 3:
|
|
1949
|
+
enum_type = "boolean_or_state"
|
|
1950
|
+
elif unique_count <= 8:
|
|
1951
|
+
enum_type = "command_or_type"
|
|
1952
|
+
else:
|
|
1953
|
+
enum_type = "extended_enum"
|
|
1954
|
+
|
|
1955
|
+
evidence = {
|
|
1956
|
+
"unique_count": unique_count,
|
|
1957
|
+
"value_distribution": dict(sorted_values[:10]), # Top 10 most frequent
|
|
1958
|
+
"entropy": float(entropy),
|
|
1959
|
+
"max_entropy": float(max_entropy),
|
|
1960
|
+
"suggested_enum_type": enum_type,
|
|
1961
|
+
"most_common_value": sorted_values[0][0],
|
|
1962
|
+
"most_common_frequency": sorted_values[0][1],
|
|
1963
|
+
}
|
|
1964
|
+
|
|
1965
|
+
return FieldDetectionResult(
|
|
1966
|
+
confidence=min(confidence, 1.0),
|
|
1967
|
+
field_offset=offset,
|
|
1968
|
+
field_length=size,
|
|
1969
|
+
evidence=evidence,
|
|
1970
|
+
)
|
|
1971
|
+
|
|
1972
|
+
def detect_reserved_field(
|
|
1973
|
+
self, messages: list[NDArray[np.uint8]], offset: int, size: int
|
|
1974
|
+
) -> FieldDetectionResult:
|
|
1975
|
+
"""Detect reserved or padding fields.
|
|
1976
|
+
|
|
1977
|
+
: Reserved field detection.
|
|
1978
|
+
|
|
1979
|
+
Identifies fields that are always zero or constant by:
|
|
1980
|
+
- Checking for always-zero fields (padding)
|
|
1981
|
+
- Detecting always-constant fields (reserved)
|
|
1982
|
+
- Tracking consistency across captures
|
|
1983
|
+
- Distinguishing from intentional constant fields
|
|
1984
|
+
|
|
1985
|
+
Args:
|
|
1986
|
+
messages: List of message arrays to analyze
|
|
1987
|
+
offset: Field offset in bytes
|
|
1988
|
+
size: Field size in bytes
|
|
1989
|
+
|
|
1990
|
+
Returns:
|
|
1991
|
+
FieldDetectionResult with confidence, offset, length, and evidence
|
|
1992
|
+
|
|
1993
|
+
Example:
|
|
1994
|
+
>>> messages = [np.array([0, 0, 0, 0], dtype=np.uint8) for _ in range(10)]
|
|
1995
|
+
>>> result = inferrer.detect_reserved_field(messages, 0, 4)
|
|
1996
|
+
>>> result.confidence == 1.0
|
|
1997
|
+
True
|
|
1998
|
+
"""
|
|
1999
|
+
# Extract all bytes in field across all messages
|
|
2000
|
+
all_bytes: list[int] = []
|
|
2001
|
+
for msg in messages:
|
|
2002
|
+
for i in range(size):
|
|
2003
|
+
all_bytes.append(int(msg[offset + i]))
|
|
2004
|
+
|
|
2005
|
+
# Check if all bytes are identical
|
|
2006
|
+
unique_bytes = set(all_bytes)
|
|
2007
|
+
|
|
2008
|
+
if len(unique_bytes) != 1:
|
|
2009
|
+
# Not reserved - has variation
|
|
2010
|
+
return FieldDetectionResult(
|
|
2011
|
+
confidence=0.0,
|
|
2012
|
+
field_offset=offset,
|
|
2013
|
+
field_length=size,
|
|
2014
|
+
evidence={"reason": f"Field has variation ({len(unique_bytes)} unique values)"},
|
|
2015
|
+
)
|
|
2016
|
+
|
|
2017
|
+
constant_value = next(iter(unique_bytes))
|
|
2018
|
+
|
|
2019
|
+
# Determine field type
|
|
2020
|
+
if constant_value == 0:
|
|
2021
|
+
field_subtype = "padding_zeros"
|
|
2022
|
+
confidence = 1.0
|
|
2023
|
+
else:
|
|
2024
|
+
field_subtype = "reserved_constant"
|
|
2025
|
+
# Slightly lower confidence - could be intentional constant
|
|
2026
|
+
confidence = 0.9
|
|
2027
|
+
|
|
2028
|
+
# Additional checks for reserved field characteristics
|
|
2029
|
+
# Reserved fields are often:
|
|
2030
|
+
# 1. At specific alignment boundaries
|
|
2031
|
+
# 2. Between other fields
|
|
2032
|
+
# 3. At end of structures
|
|
2033
|
+
|
|
2034
|
+
alignment_bonus = 0.0
|
|
2035
|
+
if offset % 4 == 0 and size % 4 == 0:
|
|
2036
|
+
alignment_bonus = 0.05 # Aligned to 4-byte boundary
|
|
2037
|
+
elif offset % 2 == 0 and size % 2 == 0:
|
|
2038
|
+
alignment_bonus = 0.03 # Aligned to 2-byte boundary
|
|
2039
|
+
|
|
2040
|
+
confidence = min(confidence + alignment_bonus, 1.0)
|
|
2041
|
+
|
|
2042
|
+
evidence = {
|
|
2043
|
+
"constant_value": constant_value,
|
|
2044
|
+
"field_subtype": field_subtype,
|
|
2045
|
+
"total_bytes_checked": len(all_bytes),
|
|
2046
|
+
"is_aligned": alignment_bonus > 0,
|
|
2047
|
+
"is_zero_padding": constant_value == 0,
|
|
2048
|
+
}
|
|
2049
|
+
|
|
2050
|
+
return FieldDetectionResult(
|
|
2051
|
+
confidence=confidence,
|
|
2052
|
+
field_offset=offset,
|
|
2053
|
+
field_length=size,
|
|
2054
|
+
evidence=evidence,
|
|
2055
|
+
)
|
|
2056
|
+
|
|
1372
2057
|
|
|
1373
2058
|
def infer_format(messages: list[bytes | NDArray[np.uint8]], min_samples: int = 10) -> MessageSchema:
|
|
1374
2059
|
"""Convenience function for format inference.
|