oscura 0.8.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. oscura/__init__.py +19 -19
  2. oscura/__main__.py +4 -0
  3. oscura/analyzers/__init__.py +2 -0
  4. oscura/analyzers/digital/extraction.py +2 -3
  5. oscura/analyzers/digital/quality.py +1 -1
  6. oscura/analyzers/digital/timing.py +1 -1
  7. oscura/analyzers/ml/signal_classifier.py +6 -0
  8. oscura/analyzers/patterns/__init__.py +66 -0
  9. oscura/analyzers/power/basic.py +3 -3
  10. oscura/analyzers/power/soa.py +1 -1
  11. oscura/analyzers/power/switching.py +3 -3
  12. oscura/analyzers/signal_classification.py +529 -0
  13. oscura/analyzers/signal_integrity/sparams.py +3 -3
  14. oscura/analyzers/statistics/basic.py +10 -7
  15. oscura/analyzers/validation.py +1 -1
  16. oscura/analyzers/waveform/measurements.py +200 -156
  17. oscura/analyzers/waveform/measurements_with_uncertainty.py +91 -35
  18. oscura/analyzers/waveform/spectral.py +182 -84
  19. oscura/api/dsl/commands.py +15 -6
  20. oscura/api/server/templates/base.html +137 -146
  21. oscura/api/server/templates/export.html +84 -110
  22. oscura/api/server/templates/home.html +248 -267
  23. oscura/api/server/templates/protocols.html +44 -48
  24. oscura/api/server/templates/reports.html +27 -35
  25. oscura/api/server/templates/session_detail.html +68 -78
  26. oscura/api/server/templates/sessions.html +62 -72
  27. oscura/api/server/templates/waveforms.html +54 -64
  28. oscura/automotive/__init__.py +1 -1
  29. oscura/automotive/can/session.py +1 -1
  30. oscura/automotive/dbc/generator.py +638 -23
  31. oscura/automotive/dtc/data.json +17 -102
  32. oscura/automotive/flexray/fibex.py +9 -1
  33. oscura/automotive/uds/decoder.py +99 -6
  34. oscura/cli/analyze.py +8 -2
  35. oscura/cli/batch.py +36 -5
  36. oscura/cli/characterize.py +18 -4
  37. oscura/cli/export.py +47 -5
  38. oscura/cli/main.py +2 -0
  39. oscura/cli/onboarding/wizard.py +10 -6
  40. oscura/cli/pipeline.py +585 -0
  41. oscura/cli/visualize.py +6 -4
  42. oscura/convenience.py +400 -32
  43. oscura/core/measurement_result.py +286 -0
  44. oscura/core/progress.py +1 -1
  45. oscura/core/schemas/device_mapping.json +2 -8
  46. oscura/core/schemas/packet_format.json +4 -24
  47. oscura/core/schemas/protocol_definition.json +2 -12
  48. oscura/core/types.py +232 -239
  49. oscura/correlation/multi_protocol.py +1 -1
  50. oscura/export/legacy/__init__.py +11 -0
  51. oscura/export/legacy/wav.py +75 -0
  52. oscura/exporters/__init__.py +19 -0
  53. oscura/exporters/wireshark.py +809 -0
  54. oscura/hardware/acquisition/file.py +5 -19
  55. oscura/hardware/acquisition/saleae.py +10 -10
  56. oscura/hardware/acquisition/socketcan.py +4 -6
  57. oscura/hardware/acquisition/synthetic.py +1 -5
  58. oscura/hardware/acquisition/visa.py +6 -6
  59. oscura/hardware/security/side_channel_detector.py +5 -508
  60. oscura/inference/message_format.py +686 -1
  61. oscura/jupyter/display.py +2 -2
  62. oscura/jupyter/magic.py +3 -3
  63. oscura/loaders/__init__.py +17 -12
  64. oscura/loaders/binary.py +1 -1
  65. oscura/loaders/chipwhisperer.py +1 -2
  66. oscura/loaders/configurable.py +1 -1
  67. oscura/loaders/csv_loader.py +2 -2
  68. oscura/loaders/hdf5_loader.py +1 -1
  69. oscura/loaders/lazy.py +6 -1
  70. oscura/loaders/mmap_loader.py +0 -1
  71. oscura/loaders/numpy_loader.py +8 -7
  72. oscura/loaders/preprocessing.py +3 -5
  73. oscura/loaders/rigol.py +21 -7
  74. oscura/loaders/sigrok.py +2 -5
  75. oscura/loaders/tdms.py +3 -2
  76. oscura/loaders/tektronix.py +38 -32
  77. oscura/loaders/tss.py +20 -27
  78. oscura/loaders/validation.py +17 -10
  79. oscura/loaders/vcd.py +13 -8
  80. oscura/loaders/wav.py +1 -6
  81. oscura/pipeline/__init__.py +76 -0
  82. oscura/pipeline/handlers/__init__.py +165 -0
  83. oscura/pipeline/handlers/analyzers.py +1045 -0
  84. oscura/pipeline/handlers/decoders.py +899 -0
  85. oscura/pipeline/handlers/exporters.py +1103 -0
  86. oscura/pipeline/handlers/filters.py +891 -0
  87. oscura/pipeline/handlers/loaders.py +640 -0
  88. oscura/pipeline/handlers/transforms.py +768 -0
  89. oscura/reporting/formatting/measurements.py +55 -14
  90. oscura/reporting/templates/enhanced/protocol_re.html +504 -503
  91. oscura/sessions/legacy.py +49 -1
  92. oscura/side_channel/__init__.py +38 -57
  93. oscura/utils/builders/signal_builder.py +5 -5
  94. oscura/utils/comparison/compare.py +7 -9
  95. oscura/utils/comparison/golden.py +1 -1
  96. oscura/utils/filtering/convenience.py +2 -2
  97. oscura/utils/math/arithmetic.py +38 -62
  98. oscura/utils/math/interpolation.py +20 -20
  99. oscura/utils/pipeline/__init__.py +4 -17
  100. oscura/utils/progressive.py +1 -4
  101. oscura/utils/triggering/edge.py +1 -1
  102. oscura/utils/triggering/pattern.py +2 -2
  103. oscura/utils/triggering/pulse.py +2 -2
  104. oscura/utils/triggering/window.py +3 -3
  105. oscura/validation/hil_testing.py +11 -11
  106. oscura/visualization/__init__.py +46 -284
  107. oscura/visualization/batch.py +72 -433
  108. oscura/visualization/plot.py +542 -53
  109. oscura/visualization/styles.py +184 -318
  110. oscura/workflows/batch/advanced.py +1 -1
  111. oscura/workflows/batch/aggregate.py +12 -9
  112. oscura/workflows/complete_re.py +251 -23
  113. oscura/workflows/digital.py +27 -4
  114. oscura/workflows/multi_trace.py +136 -17
  115. oscura/workflows/waveform.py +11 -6
  116. oscura-0.11.0.dist-info/METADATA +460 -0
  117. {oscura-0.8.0.dist-info → oscura-0.11.0.dist-info}/RECORD +120 -145
  118. oscura/side_channel/dpa.py +0 -1025
  119. oscura/utils/optimization/__init__.py +0 -19
  120. oscura/utils/optimization/parallel.py +0 -443
  121. oscura/utils/optimization/search.py +0 -532
  122. oscura/utils/pipeline/base.py +0 -338
  123. oscura/utils/pipeline/composition.py +0 -248
  124. oscura/utils/pipeline/parallel.py +0 -449
  125. oscura/utils/pipeline/pipeline.py +0 -375
  126. oscura/utils/search/__init__.py +0 -16
  127. oscura/utils/search/anomaly.py +0 -424
  128. oscura/utils/search/context.py +0 -294
  129. oscura/utils/search/pattern.py +0 -288
  130. oscura/utils/storage/__init__.py +0 -61
  131. oscura/utils/storage/database.py +0 -1166
  132. oscura/visualization/accessibility.py +0 -526
  133. oscura/visualization/annotations.py +0 -371
  134. oscura/visualization/axis_scaling.py +0 -305
  135. oscura/visualization/colors.py +0 -451
  136. oscura/visualization/digital.py +0 -436
  137. oscura/visualization/eye.py +0 -571
  138. oscura/visualization/histogram.py +0 -281
  139. oscura/visualization/interactive.py +0 -1035
  140. oscura/visualization/jitter.py +0 -1042
  141. oscura/visualization/keyboard.py +0 -394
  142. oscura/visualization/layout.py +0 -400
  143. oscura/visualization/optimization.py +0 -1079
  144. oscura/visualization/palettes.py +0 -446
  145. oscura/visualization/power.py +0 -508
  146. oscura/visualization/power_extended.py +0 -955
  147. oscura/visualization/presets.py +0 -469
  148. oscura/visualization/protocols.py +0 -1246
  149. oscura/visualization/render.py +0 -223
  150. oscura/visualization/rendering.py +0 -444
  151. oscura/visualization/reverse_engineering.py +0 -838
  152. oscura/visualization/signal_integrity.py +0 -989
  153. oscura/visualization/specialized.py +0 -643
  154. oscura/visualization/spectral.py +0 -1226
  155. oscura/visualization/thumbnails.py +0 -340
  156. oscura/visualization/time_axis.py +0 -351
  157. oscura/visualization/waveform.py +0 -454
  158. oscura-0.8.0.dist-info/METADATA +0 -661
  159. {oscura-0.8.0.dist-info → oscura-0.11.0.dist-info}/WHEEL +0 -0
  160. {oscura-0.8.0.dist-info → oscura-0.11.0.dist-info}/entry_points.txt +0 -0
  161. {oscura-0.8.0.dist-info → oscura-0.11.0.dist-info}/licenses/LICENSE +0 -0
@@ -30,6 +30,23 @@ from numpy.typing import NDArray
30
30
  from oscura.inference.alignment import align_local
31
31
 
32
32
 
33
+ @dataclass
34
+ class FieldDetectionResult:
35
+ """Result from a field type detector.
36
+
37
+ Attributes:
38
+ confidence: Detection confidence score (0.0-1.0)
39
+ field_offset: Field offset in bytes
40
+ field_length: Field length in bytes
41
+ evidence: Supporting data for detection
42
+ """
43
+
44
+ confidence: float
45
+ field_offset: int
46
+ field_length: int
47
+ evidence: dict[str, Any]
48
+
49
+
33
50
  @dataclass
34
51
  class InferredField:
35
52
  """An inferred message field.
@@ -51,7 +68,18 @@ class InferredField:
51
68
  name: str
52
69
  offset: int
53
70
  size: int
54
- field_type: Literal["constant", "counter", "timestamp", "length", "checksum", "data", "unknown"]
71
+ field_type: Literal[
72
+ "constant",
73
+ "counter",
74
+ "timestamp",
75
+ "length",
76
+ "checksum",
77
+ "data",
78
+ "float",
79
+ "enum",
80
+ "reserved",
81
+ "unknown",
82
+ ]
55
83
  entropy: float
56
84
  variance: float
57
85
  confidence: float
@@ -1369,6 +1397,663 @@ class MessageFormatInferrer:
1369
1397
  # Return first byte for larger fields
1370
1398
  return int(msg[field.offset])
1371
1399
 
1400
+ # =============================================================================
1401
+ # Advanced Field Type Detectors
1402
+ # =============================================================================
1403
+
1404
+ def detect_timestamp_field(
1405
+ self, messages: list[NDArray[np.uint8]], offset: int, size: int
1406
+ ) -> FieldDetectionResult:
1407
+ """Detect timestamp fields in message data.
1408
+
1409
+ : Timestamp field detection with wraparound handling.
1410
+
1411
+ Analyzes field values for characteristics of timestamp fields:
1412
+ - Monotonically increasing values (with wraparound support)
1413
+ - Regular intervals (milliseconds, seconds, microseconds)
1414
+ - Unix timestamp patterns (seconds since epoch)
1415
+ - 32-bit or 64-bit timestamp detection
1416
+
1417
+ Args:
1418
+ messages: List of message arrays to analyze
1419
+ offset: Field offset in bytes
1420
+ size: Field size in bytes (typically 4 or 8)
1421
+
1422
+ Returns:
1423
+ FieldDetectionResult with confidence, offset, length, and evidence
1424
+
1425
+ Example:
1426
+ >>> messages = [np.array([0, 0, 0, 100], dtype=np.uint8),
1427
+ ... np.array([0, 0, 0, 200], dtype=np.uint8)]
1428
+ >>> result = inferrer.detect_timestamp_field(messages, 0, 4)
1429
+ >>> result.confidence > 0.8 # High confidence for regular intervals
1430
+ True
1431
+ """
1432
+ if size not in [4, 8]:
1433
+ return FieldDetectionResult(
1434
+ confidence=0.0,
1435
+ field_offset=offset,
1436
+ field_length=size,
1437
+ evidence={"reason": "Invalid size for timestamp (must be 4 or 8 bytes)"},
1438
+ )
1439
+
1440
+ # Extract field values (big-endian)
1441
+ values: list[int] = []
1442
+ for msg in messages:
1443
+ if size == 4:
1444
+ val = (
1445
+ int(msg[offset]) << 24
1446
+ | int(msg[offset + 1]) << 16
1447
+ | int(msg[offset + 2]) << 8
1448
+ | int(msg[offset + 3])
1449
+ )
1450
+ else: # size == 8
1451
+ val = (
1452
+ int(msg[offset]) << 56
1453
+ | int(msg[offset + 1]) << 48
1454
+ | int(msg[offset + 2]) << 40
1455
+ | int(msg[offset + 3]) << 32
1456
+ | int(msg[offset + 4]) << 24
1457
+ | int(msg[offset + 5]) << 16
1458
+ | int(msg[offset + 6]) << 8
1459
+ | int(msg[offset + 7])
1460
+ )
1461
+ values.append(val)
1462
+
1463
+ if len(values) < 3:
1464
+ return FieldDetectionResult(
1465
+ confidence=0.0,
1466
+ field_offset=offset,
1467
+ field_length=size,
1468
+ evidence={"reason": "Insufficient samples for timestamp detection"},
1469
+ )
1470
+
1471
+ # Calculate differences between consecutive values
1472
+ diffs = [values[i + 1] - values[i] for i in range(len(values) - 1)]
1473
+
1474
+ # Check for monotonically increasing (handle wraparound for 32-bit)
1475
+ max_val = (1 << (size * 8)) - 1
1476
+ increasing_count = 0
1477
+ wraparound_count = 0
1478
+
1479
+ for diff in diffs:
1480
+ if diff > 0:
1481
+ increasing_count += 1
1482
+ elif diff < -(max_val // 2): # Likely wraparound
1483
+ wraparound_count += 1
1484
+ increasing_count += 1
1485
+
1486
+ monotonic_ratio = increasing_count / len(diffs)
1487
+
1488
+ if monotonic_ratio < 0.7:
1489
+ return FieldDetectionResult(
1490
+ confidence=0.0,
1491
+ field_offset=offset,
1492
+ field_length=size,
1493
+ evidence={
1494
+ "reason": "Values not monotonically increasing",
1495
+ "monotonic_ratio": monotonic_ratio,
1496
+ },
1497
+ )
1498
+
1499
+ # Analyze intervals for regularity
1500
+ positive_diffs = [d for d in diffs if d > 0]
1501
+ if not positive_diffs:
1502
+ return FieldDetectionResult(
1503
+ confidence=0.0,
1504
+ field_offset=offset,
1505
+ field_length=size,
1506
+ evidence={"reason": "No positive increments"},
1507
+ )
1508
+
1509
+ avg_interval = sum(positive_diffs) / len(positive_diffs)
1510
+ interval_variance = np.var(positive_diffs)
1511
+ interval_std = np.sqrt(interval_variance)
1512
+
1513
+ # Regular intervals have low coefficient of variation
1514
+ cv = interval_std / avg_interval if avg_interval > 0 else float("inf")
1515
+
1516
+ # Check for Unix timestamp patterns (seconds since 1970-01-01)
1517
+ unix_epoch_2020 = 1577836800 # 2020-01-01 00:00:00 UTC
1518
+ unix_epoch_2030 = 1893456000 # 2030-01-01 00:00:00 UTC
1519
+
1520
+ is_unix_timestamp = False
1521
+ if size == 4 and all(unix_epoch_2020 <= v <= unix_epoch_2030 for v in values[:5]):
1522
+ is_unix_timestamp = True
1523
+
1524
+ # Calculate confidence based on multiple factors
1525
+ confidence = 0.0
1526
+
1527
+ # Factor 1: Monotonic increase (0-0.4)
1528
+ confidence += min(monotonic_ratio, 1.0) * 0.4
1529
+
1530
+ # Factor 2: Regular intervals (0-0.4)
1531
+ if cv < 0.1: # Very regular
1532
+ confidence += 0.4
1533
+ elif cv < 0.5: # Somewhat regular
1534
+ confidence += 0.2
1535
+ elif cv < 1.0: # Loosely regular
1536
+ confidence += 0.1
1537
+
1538
+ # Factor 3: Unix timestamp detection (0-0.2)
1539
+ if is_unix_timestamp:
1540
+ confidence += 0.2
1541
+
1542
+ evidence = {
1543
+ "monotonic_ratio": float(monotonic_ratio),
1544
+ "avg_interval": float(avg_interval),
1545
+ "interval_std": float(interval_std),
1546
+ "coefficient_of_variation": float(cv),
1547
+ "wraparound_detected": wraparound_count > 0,
1548
+ "is_unix_timestamp": is_unix_timestamp,
1549
+ "sample_values": values[:5],
1550
+ }
1551
+
1552
+ return FieldDetectionResult(
1553
+ confidence=min(confidence, 1.0),
1554
+ field_offset=offset,
1555
+ field_length=size,
1556
+ evidence=evidence,
1557
+ )
1558
+
1559
+ def detect_float_field(
1560
+ self, messages: list[NDArray[np.uint8]], offset: int, size: int
1561
+ ) -> FieldDetectionResult:
1562
+ """Detect IEEE 754 floating-point fields.
1563
+
1564
+ : Floating-point field detection.
1565
+
1566
+ Identifies floating-point encoded fields by:
1567
+ - Checking for valid IEEE 754 patterns (sign, exponent, mantissa)
1568
+ - Detecting 32-bit (single) and 64-bit (double) precision floats
1569
+ - Validating reasonable ranges (not NaN, not Inf)
1570
+ - Checking value distribution consistency
1571
+
1572
+ Args:
1573
+ messages: List of message arrays to analyze
1574
+ offset: Field offset in bytes
1575
+ size: Field size in bytes (4 for float32, 8 for float64)
1576
+
1577
+ Returns:
1578
+ FieldDetectionResult with confidence, offset, length, and evidence
1579
+
1580
+ Example:
1581
+ >>> messages = [np.array([0x40, 0x49, 0x0F, 0xDB], dtype=np.uint8)] # 3.14159
1582
+ >>> result = inferrer.detect_float_field(messages, 0, 4)
1583
+ >>> result.confidence > 0.5
1584
+ True
1585
+ """
1586
+ if size not in [4, 8]:
1587
+ return FieldDetectionResult(
1588
+ confidence=0.0,
1589
+ field_offset=offset,
1590
+ field_length=size,
1591
+ evidence={"reason": "Invalid size for float (must be 4 or 8 bytes)"},
1592
+ )
1593
+
1594
+ # Extract raw bytes and attempt IEEE 754 interpretation
1595
+ float_values: list[float] = []
1596
+ valid_count = 0
1597
+
1598
+ for msg in messages:
1599
+ raw_bytes = bytes(msg[offset : offset + size])
1600
+
1601
+ try:
1602
+ if size == 4:
1603
+ # 32-bit float (big-endian)
1604
+ val = np.frombuffer(raw_bytes, dtype=">f4")[0]
1605
+ else: # size == 8
1606
+ # 64-bit double (big-endian)
1607
+ val = np.frombuffer(raw_bytes, dtype=">f8")[0]
1608
+
1609
+ # Check for valid float (not NaN, not Inf)
1610
+ if np.isfinite(val):
1611
+ float_values.append(float(val))
1612
+ valid_count += 1
1613
+ else:
1614
+ float_values.append(0.0)
1615
+
1616
+ except (ValueError, IndexError):
1617
+ float_values.append(0.0)
1618
+
1619
+ if len(messages) == 0:
1620
+ return FieldDetectionResult(
1621
+ confidence=0.0,
1622
+ field_offset=offset,
1623
+ field_length=size,
1624
+ evidence={"reason": "No messages to analyze"},
1625
+ )
1626
+
1627
+ valid_ratio = valid_count / len(messages)
1628
+
1629
+ # Need majority valid floats
1630
+ if valid_ratio < 0.5:
1631
+ return FieldDetectionResult(
1632
+ confidence=0.0,
1633
+ field_offset=offset,
1634
+ field_length=size,
1635
+ evidence={"reason": "Too many invalid floats", "valid_ratio": valid_ratio},
1636
+ )
1637
+
1638
+ # Analyze value distribution
1639
+ valid_floats = [v for v in float_values if v != 0.0 or valid_count == len(messages)]
1640
+
1641
+ if not valid_floats:
1642
+ return FieldDetectionResult(
1643
+ confidence=0.0,
1644
+ field_offset=offset,
1645
+ field_length=size,
1646
+ evidence={"reason": "No valid float values"},
1647
+ )
1648
+
1649
+ # Check for reasonable range (not all zeros, some variance)
1650
+ float_variance = float(np.var(valid_floats))
1651
+ float_mean = float(np.mean(valid_floats))
1652
+ float_range = float(np.max(valid_floats) - np.min(valid_floats))
1653
+
1654
+ # Calculate confidence
1655
+ confidence = 0.0
1656
+
1657
+ # Factor 1: Valid float ratio (0-0.5)
1658
+ confidence += valid_ratio * 0.5
1659
+
1660
+ # Factor 2: Non-zero variance (0-0.3)
1661
+ if float_variance > 1e-6:
1662
+ confidence += 0.3
1663
+ elif float_variance > 1e-12:
1664
+ confidence += 0.15
1665
+
1666
+ # Factor 3: Reasonable range (0-0.2)
1667
+ if float_range > 0:
1668
+ confidence += 0.2
1669
+
1670
+ evidence = {
1671
+ "valid_float_ratio": float(valid_ratio),
1672
+ "float_mean": float(float_mean),
1673
+ "float_variance": float(float_variance),
1674
+ "float_range": float(float_range),
1675
+ "sample_values": valid_floats[:5],
1676
+ "size_bits": size * 8,
1677
+ }
1678
+
1679
+ return FieldDetectionResult(
1680
+ confidence=min(confidence, 1.0),
1681
+ field_offset=offset,
1682
+ field_length=size,
1683
+ evidence=evidence,
1684
+ )
1685
+
1686
+ def detect_length_field(
1687
+ self, messages: list[NDArray[np.uint8]], offset: int, size: int, msg_len: int
1688
+ ) -> FieldDetectionResult:
1689
+ """Detect length fields with endianness detection.
1690
+
1691
+ : Length field detection with byte order analysis.
1692
+
1693
+ Identifies fields that encode message or payload length by:
1694
+ - Correlating field value with message/payload size
1695
+ - Checking if value matches subsequent data length
1696
+ - Detecting big-endian vs little-endian encoding
1697
+ - Validating length values are within reasonable bounds
1698
+
1699
+ Args:
1700
+ messages: List of message arrays to analyze
1701
+ offset: Field offset in bytes
1702
+ size: Field size in bytes (typically 1, 2, or 4)
1703
+ msg_len: Total message length for validation
1704
+
1705
+ Returns:
1706
+ FieldDetectionResult with confidence, offset, length, and evidence
1707
+
1708
+ Example:
1709
+ >>> messages = [np.array([0, 10, *range(10)], dtype=np.uint8)]
1710
+ >>> result = inferrer.detect_length_field(messages, 0, 2, 12)
1711
+ >>> result.confidence > 0.7
1712
+ True
1713
+ """
1714
+ if size > 4:
1715
+ return FieldDetectionResult(
1716
+ confidence=0.0,
1717
+ field_offset=offset,
1718
+ field_length=size,
1719
+ evidence={"reason": "Invalid size for length field (max 4 bytes)"},
1720
+ )
1721
+
1722
+ # Extract field values (try both endianness)
1723
+ values_be: list[int] = [] # Big-endian
1724
+ values_le: list[int] = [] # Little-endian
1725
+
1726
+ for msg in messages:
1727
+ if size == 1:
1728
+ val = int(msg[offset])
1729
+ values_be.append(val)
1730
+ values_le.append(val)
1731
+ elif size == 2:
1732
+ val_be = int(msg[offset]) << 8 | int(msg[offset + 1])
1733
+ val_le = int(msg[offset + 1]) << 8 | int(msg[offset])
1734
+ values_be.append(val_be)
1735
+ values_le.append(val_le)
1736
+ elif size == 4:
1737
+ val_be = (
1738
+ int(msg[offset]) << 24
1739
+ | int(msg[offset + 1]) << 16
1740
+ | int(msg[offset + 2]) << 8
1741
+ | int(msg[offset + 3])
1742
+ )
1743
+ val_le = (
1744
+ int(msg[offset + 3]) << 24
1745
+ | int(msg[offset + 2]) << 16
1746
+ | int(msg[offset + 1]) << 8
1747
+ | int(msg[offset])
1748
+ )
1749
+ values_be.append(val_be)
1750
+ values_le.append(val_le)
1751
+ else: # size == 3
1752
+ val_be = int(msg[offset]) << 16 | int(msg[offset + 1]) << 8 | int(msg[offset + 2])
1753
+ val_le = int(msg[offset + 2]) << 16 | int(msg[offset + 1]) << 8 | int(msg[offset])
1754
+ values_be.append(val_be)
1755
+ values_le.append(val_le)
1756
+
1757
+ # Check correlation with message length
1758
+ def check_correlation(values: list[int]) -> tuple[float, str]:
1759
+ """Check correlation between field values and message structure."""
1760
+ # Pattern 1: Total message length
1761
+ total_len_matches = sum(1 for v in values if v == msg_len)
1762
+
1763
+ # Pattern 2: Remaining message length (after this field)
1764
+ remaining_len = msg_len - offset - size
1765
+ remaining_matches = sum(1 for v in values if v == remaining_len)
1766
+
1767
+ # Pattern 3: Payload length (common header size assumptions)
1768
+ for header_size in [4, 8, 12, 16]:
1769
+ if offset + size <= header_size:
1770
+ payload_len = msg_len - header_size
1771
+ payload_matches = sum(1 for v in values if v == payload_len)
1772
+ if payload_matches > 0:
1773
+ match_ratio = payload_matches / len(values)
1774
+ if match_ratio > 0.7:
1775
+ return (match_ratio, f"payload_length_after_{header_size}B_header")
1776
+
1777
+ # Pattern 4: Values within reasonable bounds
1778
+ reasonable = sum(1 for v in values if 0 < v < msg_len * 2)
1779
+ reasonable_ratio = reasonable / len(values) if values else 0
1780
+
1781
+ # Best match
1782
+ best_ratio = max(total_len_matches, remaining_matches) / len(values) if values else 0
1783
+
1784
+ if total_len_matches > remaining_matches:
1785
+ return (best_ratio, "total_message_length")
1786
+ elif remaining_matches > 0:
1787
+ return (best_ratio, "remaining_bytes_after_field")
1788
+ elif reasonable_ratio > 0.5:
1789
+ return (reasonable_ratio * 0.5, "reasonable_length_values")
1790
+ else:
1791
+ return (0.0, "no_correlation")
1792
+
1793
+ # Check both endianness
1794
+ corr_be, pattern_be = check_correlation(values_be)
1795
+ corr_le, pattern_le = check_correlation(values_le)
1796
+
1797
+ # Select best endianness
1798
+ if corr_be >= corr_le:
1799
+ confidence = corr_be
1800
+ endianness = "big"
1801
+ pattern = pattern_be
1802
+ values = values_be
1803
+ else:
1804
+ confidence = corr_le
1805
+ endianness = "little"
1806
+ pattern = pattern_le
1807
+ values = values_le
1808
+
1809
+ # Boost confidence if field is early in message (typical for length fields)
1810
+ if offset < 8:
1811
+ confidence = min(confidence * 1.2, 1.0)
1812
+
1813
+ evidence = {
1814
+ "endianness": endianness,
1815
+ "correlation_pattern": pattern,
1816
+ "correlation_ratio": float(confidence),
1817
+ "sample_values": values[:5],
1818
+ "message_length": msg_len,
1819
+ "field_offset": offset,
1820
+ }
1821
+
1822
+ return FieldDetectionResult(
1823
+ confidence=min(confidence, 1.0),
1824
+ field_offset=offset,
1825
+ field_length=size,
1826
+ evidence=evidence,
1827
+ )
1828
+
1829
+ def detect_enum_field(
1830
+ self, messages: list[NDArray[np.uint8]], offset: int, size: int
1831
+ ) -> FieldDetectionResult:
1832
+ """Detect enumeration fields with value frequency analysis.
1833
+
1834
+ : Enum field detection with value distribution.
1835
+
1836
+ Identifies fields with limited discrete value sets by:
1837
+ - Finding fields with <20 unique values
1838
+ - Building value frequency distribution
1839
+ - Checking for reasonable enum characteristics
1840
+ - Suggesting common enum patterns (states, commands, types)
1841
+
1842
+ Args:
1843
+ messages: List of message arrays to analyze
1844
+ offset: Field offset in bytes
1845
+ size: Field size in bytes (typically 1 or 2)
1846
+
1847
+ Returns:
1848
+ FieldDetectionResult with confidence, offset, length, and evidence
1849
+
1850
+ Example:
1851
+ >>> messages = [np.array([1], dtype=np.uint8), np.array([2], dtype=np.uint8)]
1852
+ >>> result = inferrer.detect_enum_field(messages, 0, 1)
1853
+ >>> result.confidence > 0.8
1854
+ True
1855
+ """
1856
+ if size > 4:
1857
+ return FieldDetectionResult(
1858
+ confidence=0.0,
1859
+ field_offset=offset,
1860
+ field_length=size,
1861
+ evidence={"reason": "Field too large for enum (max 4 bytes)"},
1862
+ )
1863
+
1864
+ # Extract field values
1865
+ values: list[int] = []
1866
+ for msg in messages:
1867
+ if size == 1:
1868
+ val = int(msg[offset])
1869
+ elif size == 2:
1870
+ val = int(msg[offset]) << 8 | int(msg[offset + 1])
1871
+ elif size == 4:
1872
+ val = (
1873
+ int(msg[offset]) << 24
1874
+ | int(msg[offset + 1]) << 16
1875
+ | int(msg[offset + 2]) << 8
1876
+ | int(msg[offset + 3])
1877
+ )
1878
+ else: # size == 3
1879
+ val = int(msg[offset]) << 16 | int(msg[offset + 1]) << 8 | int(msg[offset + 2])
1880
+ values.append(val)
1881
+
1882
+ # Count unique values
1883
+ unique_values = set(values)
1884
+ unique_count = len(unique_values)
1885
+
1886
+ # Not an enum if only 1 value (that's a constant)
1887
+ if unique_count <= 1:
1888
+ return FieldDetectionResult(
1889
+ confidence=0.0,
1890
+ field_offset=offset,
1891
+ field_length=size,
1892
+ evidence={"reason": "Only one unique value (constant field)"},
1893
+ )
1894
+
1895
+ # Not an enum if too many values (>20 threshold)
1896
+ if unique_count > 20:
1897
+ return FieldDetectionResult(
1898
+ confidence=0.0,
1899
+ field_offset=offset,
1900
+ field_length=size,
1901
+ evidence={"reason": f"Too many unique values ({unique_count} > 20)"},
1902
+ )
1903
+
1904
+ # Build frequency distribution
1905
+ value_counts: dict[int, int] = {}
1906
+ for v in values:
1907
+ value_counts[v] = value_counts.get(v, 0) + 1
1908
+
1909
+ # Sort by frequency
1910
+ sorted_values = sorted(value_counts.items(), key=lambda x: x[1], reverse=True)
1911
+
1912
+ # Calculate entropy
1913
+ total = len(values)
1914
+ probabilities = [count / total for _, count in sorted_values]
1915
+ entropy = -sum(p * np.log2(p) for p in probabilities if p > 0)
1916
+
1917
+ # Calculate confidence based on characteristics
1918
+ confidence = 0.0
1919
+
1920
+ # Factor 1: Few unique values (0-0.4)
1921
+ if unique_count <= 5:
1922
+ confidence += 0.4
1923
+ elif unique_count <= 10:
1924
+ confidence += 0.3
1925
+ else:
1926
+ confidence += 0.2
1927
+
1928
+ # Factor 2: Good value distribution (0-0.3)
1929
+ # Enums typically have reasonable distribution (not too skewed)
1930
+ max_freq = max(value_counts.values())
1931
+ max_freq_ratio = max_freq / total
1932
+ if 0.1 < max_freq_ratio < 0.9: # Balanced distribution
1933
+ confidence += 0.3
1934
+ elif max_freq_ratio <= 0.95: # Somewhat balanced
1935
+ confidence += 0.15
1936
+
1937
+ # Factor 3: Low entropy relative to max possible (0-0.3)
1938
+ max_entropy = np.log2(unique_count)
1939
+ if max_entropy > 0:
1940
+ entropy_ratio = entropy / max_entropy
1941
+ if entropy_ratio > 0.5: # Well-distributed
1942
+ confidence += 0.3
1943
+ else: # Skewed distribution
1944
+ confidence += 0.15
1945
+
1946
+ # Suggest enum type based on characteristics
1947
+ enum_type = "unknown"
1948
+ if unique_count <= 3:
1949
+ enum_type = "boolean_or_state"
1950
+ elif unique_count <= 8:
1951
+ enum_type = "command_or_type"
1952
+ else:
1953
+ enum_type = "extended_enum"
1954
+
1955
+ evidence = {
1956
+ "unique_count": unique_count,
1957
+ "value_distribution": dict(sorted_values[:10]), # Top 10 most frequent
1958
+ "entropy": float(entropy),
1959
+ "max_entropy": float(max_entropy),
1960
+ "suggested_enum_type": enum_type,
1961
+ "most_common_value": sorted_values[0][0],
1962
+ "most_common_frequency": sorted_values[0][1],
1963
+ }
1964
+
1965
+ return FieldDetectionResult(
1966
+ confidence=min(confidence, 1.0),
1967
+ field_offset=offset,
1968
+ field_length=size,
1969
+ evidence=evidence,
1970
+ )
1971
+
1972
+ def detect_reserved_field(
1973
+ self, messages: list[NDArray[np.uint8]], offset: int, size: int
1974
+ ) -> FieldDetectionResult:
1975
+ """Detect reserved or padding fields.
1976
+
1977
+ : Reserved field detection.
1978
+
1979
+ Identifies fields that are always zero or constant by:
1980
+ - Checking for always-zero fields (padding)
1981
+ - Detecting always-constant fields (reserved)
1982
+ - Tracking consistency across captures
1983
+ - Distinguishing from intentional constant fields
1984
+
1985
+ Args:
1986
+ messages: List of message arrays to analyze
1987
+ offset: Field offset in bytes
1988
+ size: Field size in bytes
1989
+
1990
+ Returns:
1991
+ FieldDetectionResult with confidence, offset, length, and evidence
1992
+
1993
+ Example:
1994
+ >>> messages = [np.array([0, 0, 0, 0], dtype=np.uint8) for _ in range(10)]
1995
+ >>> result = inferrer.detect_reserved_field(messages, 0, 4)
1996
+ >>> result.confidence == 1.0
1997
+ True
1998
+ """
1999
+ # Extract all bytes in field across all messages
2000
+ all_bytes: list[int] = []
2001
+ for msg in messages:
2002
+ for i in range(size):
2003
+ all_bytes.append(int(msg[offset + i]))
2004
+
2005
+ # Check if all bytes are identical
2006
+ unique_bytes = set(all_bytes)
2007
+
2008
+ if len(unique_bytes) != 1:
2009
+ # Not reserved - has variation
2010
+ return FieldDetectionResult(
2011
+ confidence=0.0,
2012
+ field_offset=offset,
2013
+ field_length=size,
2014
+ evidence={"reason": f"Field has variation ({len(unique_bytes)} unique values)"},
2015
+ )
2016
+
2017
+ constant_value = next(iter(unique_bytes))
2018
+
2019
+ # Determine field type
2020
+ if constant_value == 0:
2021
+ field_subtype = "padding_zeros"
2022
+ confidence = 1.0
2023
+ else:
2024
+ field_subtype = "reserved_constant"
2025
+ # Slightly lower confidence - could be intentional constant
2026
+ confidence = 0.9
2027
+
2028
+ # Additional checks for reserved field characteristics
2029
+ # Reserved fields are often:
2030
+ # 1. At specific alignment boundaries
2031
+ # 2. Between other fields
2032
+ # 3. At end of structures
2033
+
2034
+ alignment_bonus = 0.0
2035
+ if offset % 4 == 0 and size % 4 == 0:
2036
+ alignment_bonus = 0.05 # Aligned to 4-byte boundary
2037
+ elif offset % 2 == 0 and size % 2 == 0:
2038
+ alignment_bonus = 0.03 # Aligned to 2-byte boundary
2039
+
2040
+ confidence = min(confidence + alignment_bonus, 1.0)
2041
+
2042
+ evidence = {
2043
+ "constant_value": constant_value,
2044
+ "field_subtype": field_subtype,
2045
+ "total_bytes_checked": len(all_bytes),
2046
+ "is_aligned": alignment_bonus > 0,
2047
+ "is_zero_padding": constant_value == 0,
2048
+ }
2049
+
2050
+ return FieldDetectionResult(
2051
+ confidence=confidence,
2052
+ field_offset=offset,
2053
+ field_length=size,
2054
+ evidence=evidence,
2055
+ )
2056
+
1372
2057
 
1373
2058
  def infer_format(messages: list[bytes | NDArray[np.uint8]], min_samples: int = 10) -> MessageSchema:
1374
2059
  """Convenience function for format inference.