oscura 0.0.1__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (465) hide show
  1. oscura/__init__.py +813 -8
  2. oscura/__main__.py +392 -0
  3. oscura/analyzers/__init__.py +37 -0
  4. oscura/analyzers/digital/__init__.py +177 -0
  5. oscura/analyzers/digital/bus.py +691 -0
  6. oscura/analyzers/digital/clock.py +805 -0
  7. oscura/analyzers/digital/correlation.py +720 -0
  8. oscura/analyzers/digital/edges.py +632 -0
  9. oscura/analyzers/digital/extraction.py +413 -0
  10. oscura/analyzers/digital/quality.py +878 -0
  11. oscura/analyzers/digital/signal_quality.py +877 -0
  12. oscura/analyzers/digital/thresholds.py +708 -0
  13. oscura/analyzers/digital/timing.py +1104 -0
  14. oscura/analyzers/eye/__init__.py +46 -0
  15. oscura/analyzers/eye/diagram.py +434 -0
  16. oscura/analyzers/eye/metrics.py +555 -0
  17. oscura/analyzers/jitter/__init__.py +83 -0
  18. oscura/analyzers/jitter/ber.py +333 -0
  19. oscura/analyzers/jitter/decomposition.py +759 -0
  20. oscura/analyzers/jitter/measurements.py +413 -0
  21. oscura/analyzers/jitter/spectrum.py +220 -0
  22. oscura/analyzers/measurements.py +40 -0
  23. oscura/analyzers/packet/__init__.py +171 -0
  24. oscura/analyzers/packet/daq.py +1077 -0
  25. oscura/analyzers/packet/metrics.py +437 -0
  26. oscura/analyzers/packet/parser.py +327 -0
  27. oscura/analyzers/packet/payload.py +2156 -0
  28. oscura/analyzers/packet/payload_analysis.py +1312 -0
  29. oscura/analyzers/packet/payload_extraction.py +236 -0
  30. oscura/analyzers/packet/payload_patterns.py +670 -0
  31. oscura/analyzers/packet/stream.py +359 -0
  32. oscura/analyzers/patterns/__init__.py +266 -0
  33. oscura/analyzers/patterns/clustering.py +1036 -0
  34. oscura/analyzers/patterns/discovery.py +539 -0
  35. oscura/analyzers/patterns/learning.py +797 -0
  36. oscura/analyzers/patterns/matching.py +1091 -0
  37. oscura/analyzers/patterns/periodic.py +650 -0
  38. oscura/analyzers/patterns/sequences.py +767 -0
  39. oscura/analyzers/power/__init__.py +116 -0
  40. oscura/analyzers/power/ac_power.py +391 -0
  41. oscura/analyzers/power/basic.py +383 -0
  42. oscura/analyzers/power/conduction.py +314 -0
  43. oscura/analyzers/power/efficiency.py +297 -0
  44. oscura/analyzers/power/ripple.py +356 -0
  45. oscura/analyzers/power/soa.py +372 -0
  46. oscura/analyzers/power/switching.py +479 -0
  47. oscura/analyzers/protocol/__init__.py +150 -0
  48. oscura/analyzers/protocols/__init__.py +150 -0
  49. oscura/analyzers/protocols/base.py +500 -0
  50. oscura/analyzers/protocols/can.py +620 -0
  51. oscura/analyzers/protocols/can_fd.py +448 -0
  52. oscura/analyzers/protocols/flexray.py +405 -0
  53. oscura/analyzers/protocols/hdlc.py +399 -0
  54. oscura/analyzers/protocols/i2c.py +368 -0
  55. oscura/analyzers/protocols/i2s.py +296 -0
  56. oscura/analyzers/protocols/jtag.py +393 -0
  57. oscura/analyzers/protocols/lin.py +445 -0
  58. oscura/analyzers/protocols/manchester.py +333 -0
  59. oscura/analyzers/protocols/onewire.py +501 -0
  60. oscura/analyzers/protocols/spi.py +334 -0
  61. oscura/analyzers/protocols/swd.py +325 -0
  62. oscura/analyzers/protocols/uart.py +393 -0
  63. oscura/analyzers/protocols/usb.py +495 -0
  64. oscura/analyzers/signal_integrity/__init__.py +63 -0
  65. oscura/analyzers/signal_integrity/embedding.py +294 -0
  66. oscura/analyzers/signal_integrity/equalization.py +370 -0
  67. oscura/analyzers/signal_integrity/sparams.py +484 -0
  68. oscura/analyzers/spectral/__init__.py +53 -0
  69. oscura/analyzers/spectral/chunked.py +273 -0
  70. oscura/analyzers/spectral/chunked_fft.py +571 -0
  71. oscura/analyzers/spectral/chunked_wavelet.py +391 -0
  72. oscura/analyzers/spectral/fft.py +92 -0
  73. oscura/analyzers/statistical/__init__.py +250 -0
  74. oscura/analyzers/statistical/checksum.py +923 -0
  75. oscura/analyzers/statistical/chunked_corr.py +228 -0
  76. oscura/analyzers/statistical/classification.py +778 -0
  77. oscura/analyzers/statistical/entropy.py +1113 -0
  78. oscura/analyzers/statistical/ngrams.py +614 -0
  79. oscura/analyzers/statistics/__init__.py +119 -0
  80. oscura/analyzers/statistics/advanced.py +885 -0
  81. oscura/analyzers/statistics/basic.py +263 -0
  82. oscura/analyzers/statistics/correlation.py +630 -0
  83. oscura/analyzers/statistics/distribution.py +298 -0
  84. oscura/analyzers/statistics/outliers.py +463 -0
  85. oscura/analyzers/statistics/streaming.py +93 -0
  86. oscura/analyzers/statistics/trend.py +520 -0
  87. oscura/analyzers/validation.py +598 -0
  88. oscura/analyzers/waveform/__init__.py +36 -0
  89. oscura/analyzers/waveform/measurements.py +943 -0
  90. oscura/analyzers/waveform/measurements_with_uncertainty.py +371 -0
  91. oscura/analyzers/waveform/spectral.py +1689 -0
  92. oscura/analyzers/waveform/wavelets.py +298 -0
  93. oscura/api/__init__.py +62 -0
  94. oscura/api/dsl.py +538 -0
  95. oscura/api/fluent.py +571 -0
  96. oscura/api/operators.py +498 -0
  97. oscura/api/optimization.py +392 -0
  98. oscura/api/profiling.py +396 -0
  99. oscura/automotive/__init__.py +73 -0
  100. oscura/automotive/can/__init__.py +52 -0
  101. oscura/automotive/can/analysis.py +356 -0
  102. oscura/automotive/can/checksum.py +250 -0
  103. oscura/automotive/can/correlation.py +212 -0
  104. oscura/automotive/can/discovery.py +355 -0
  105. oscura/automotive/can/message_wrapper.py +375 -0
  106. oscura/automotive/can/models.py +385 -0
  107. oscura/automotive/can/patterns.py +381 -0
  108. oscura/automotive/can/session.py +452 -0
  109. oscura/automotive/can/state_machine.py +300 -0
  110. oscura/automotive/can/stimulus_response.py +461 -0
  111. oscura/automotive/dbc/__init__.py +15 -0
  112. oscura/automotive/dbc/generator.py +156 -0
  113. oscura/automotive/dbc/parser.py +146 -0
  114. oscura/automotive/dtc/__init__.py +30 -0
  115. oscura/automotive/dtc/database.py +3036 -0
  116. oscura/automotive/j1939/__init__.py +14 -0
  117. oscura/automotive/j1939/decoder.py +745 -0
  118. oscura/automotive/loaders/__init__.py +35 -0
  119. oscura/automotive/loaders/asc.py +98 -0
  120. oscura/automotive/loaders/blf.py +77 -0
  121. oscura/automotive/loaders/csv_can.py +136 -0
  122. oscura/automotive/loaders/dispatcher.py +136 -0
  123. oscura/automotive/loaders/mdf.py +331 -0
  124. oscura/automotive/loaders/pcap.py +132 -0
  125. oscura/automotive/obd/__init__.py +14 -0
  126. oscura/automotive/obd/decoder.py +707 -0
  127. oscura/automotive/uds/__init__.py +48 -0
  128. oscura/automotive/uds/decoder.py +265 -0
  129. oscura/automotive/uds/models.py +64 -0
  130. oscura/automotive/visualization.py +369 -0
  131. oscura/batch/__init__.py +55 -0
  132. oscura/batch/advanced.py +627 -0
  133. oscura/batch/aggregate.py +300 -0
  134. oscura/batch/analyze.py +139 -0
  135. oscura/batch/logging.py +487 -0
  136. oscura/batch/metrics.py +556 -0
  137. oscura/builders/__init__.py +41 -0
  138. oscura/builders/signal_builder.py +1131 -0
  139. oscura/cli/__init__.py +14 -0
  140. oscura/cli/batch.py +339 -0
  141. oscura/cli/characterize.py +273 -0
  142. oscura/cli/compare.py +775 -0
  143. oscura/cli/decode.py +551 -0
  144. oscura/cli/main.py +247 -0
  145. oscura/cli/shell.py +350 -0
  146. oscura/comparison/__init__.py +66 -0
  147. oscura/comparison/compare.py +397 -0
  148. oscura/comparison/golden.py +487 -0
  149. oscura/comparison/limits.py +391 -0
  150. oscura/comparison/mask.py +434 -0
  151. oscura/comparison/trace_diff.py +30 -0
  152. oscura/comparison/visualization.py +481 -0
  153. oscura/compliance/__init__.py +70 -0
  154. oscura/compliance/advanced.py +756 -0
  155. oscura/compliance/masks.py +363 -0
  156. oscura/compliance/reporting.py +483 -0
  157. oscura/compliance/testing.py +298 -0
  158. oscura/component/__init__.py +38 -0
  159. oscura/component/impedance.py +365 -0
  160. oscura/component/reactive.py +598 -0
  161. oscura/component/transmission_line.py +312 -0
  162. oscura/config/__init__.py +191 -0
  163. oscura/config/defaults.py +254 -0
  164. oscura/config/loader.py +348 -0
  165. oscura/config/memory.py +271 -0
  166. oscura/config/migration.py +458 -0
  167. oscura/config/pipeline.py +1077 -0
  168. oscura/config/preferences.py +530 -0
  169. oscura/config/protocol.py +875 -0
  170. oscura/config/schema.py +713 -0
  171. oscura/config/settings.py +420 -0
  172. oscura/config/thresholds.py +599 -0
  173. oscura/convenience.py +457 -0
  174. oscura/core/__init__.py +299 -0
  175. oscura/core/audit.py +457 -0
  176. oscura/core/backend_selector.py +405 -0
  177. oscura/core/cache.py +590 -0
  178. oscura/core/cancellation.py +439 -0
  179. oscura/core/confidence.py +225 -0
  180. oscura/core/config.py +506 -0
  181. oscura/core/correlation.py +216 -0
  182. oscura/core/cross_domain.py +422 -0
  183. oscura/core/debug.py +301 -0
  184. oscura/core/edge_cases.py +541 -0
  185. oscura/core/exceptions.py +535 -0
  186. oscura/core/gpu_backend.py +523 -0
  187. oscura/core/lazy.py +832 -0
  188. oscura/core/log_query.py +540 -0
  189. oscura/core/logging.py +931 -0
  190. oscura/core/logging_advanced.py +952 -0
  191. oscura/core/memoize.py +171 -0
  192. oscura/core/memory_check.py +274 -0
  193. oscura/core/memory_guard.py +290 -0
  194. oscura/core/memory_limits.py +336 -0
  195. oscura/core/memory_monitor.py +453 -0
  196. oscura/core/memory_progress.py +465 -0
  197. oscura/core/memory_warnings.py +315 -0
  198. oscura/core/numba_backend.py +362 -0
  199. oscura/core/performance.py +352 -0
  200. oscura/core/progress.py +524 -0
  201. oscura/core/provenance.py +358 -0
  202. oscura/core/results.py +331 -0
  203. oscura/core/types.py +504 -0
  204. oscura/core/uncertainty.py +383 -0
  205. oscura/discovery/__init__.py +52 -0
  206. oscura/discovery/anomaly_detector.py +672 -0
  207. oscura/discovery/auto_decoder.py +415 -0
  208. oscura/discovery/comparison.py +497 -0
  209. oscura/discovery/quality_validator.py +528 -0
  210. oscura/discovery/signal_detector.py +769 -0
  211. oscura/dsl/__init__.py +73 -0
  212. oscura/dsl/commands.py +246 -0
  213. oscura/dsl/interpreter.py +455 -0
  214. oscura/dsl/parser.py +689 -0
  215. oscura/dsl/repl.py +172 -0
  216. oscura/exceptions.py +59 -0
  217. oscura/exploratory/__init__.py +111 -0
  218. oscura/exploratory/error_recovery.py +642 -0
  219. oscura/exploratory/fuzzy.py +513 -0
  220. oscura/exploratory/fuzzy_advanced.py +786 -0
  221. oscura/exploratory/legacy.py +831 -0
  222. oscura/exploratory/parse.py +358 -0
  223. oscura/exploratory/recovery.py +275 -0
  224. oscura/exploratory/sync.py +382 -0
  225. oscura/exploratory/unknown.py +707 -0
  226. oscura/export/__init__.py +25 -0
  227. oscura/export/wireshark/README.md +265 -0
  228. oscura/export/wireshark/__init__.py +47 -0
  229. oscura/export/wireshark/generator.py +312 -0
  230. oscura/export/wireshark/lua_builder.py +159 -0
  231. oscura/export/wireshark/templates/dissector.lua.j2 +92 -0
  232. oscura/export/wireshark/type_mapping.py +165 -0
  233. oscura/export/wireshark/validator.py +105 -0
  234. oscura/exporters/__init__.py +94 -0
  235. oscura/exporters/csv.py +303 -0
  236. oscura/exporters/exporters.py +44 -0
  237. oscura/exporters/hdf5.py +219 -0
  238. oscura/exporters/html_export.py +701 -0
  239. oscura/exporters/json_export.py +291 -0
  240. oscura/exporters/markdown_export.py +367 -0
  241. oscura/exporters/matlab_export.py +354 -0
  242. oscura/exporters/npz_export.py +219 -0
  243. oscura/exporters/spice_export.py +210 -0
  244. oscura/extensibility/__init__.py +131 -0
  245. oscura/extensibility/docs.py +752 -0
  246. oscura/extensibility/extensions.py +1125 -0
  247. oscura/extensibility/logging.py +259 -0
  248. oscura/extensibility/measurements.py +485 -0
  249. oscura/extensibility/plugins.py +414 -0
  250. oscura/extensibility/registry.py +346 -0
  251. oscura/extensibility/templates.py +913 -0
  252. oscura/extensibility/validation.py +651 -0
  253. oscura/filtering/__init__.py +89 -0
  254. oscura/filtering/base.py +563 -0
  255. oscura/filtering/convenience.py +564 -0
  256. oscura/filtering/design.py +725 -0
  257. oscura/filtering/filters.py +32 -0
  258. oscura/filtering/introspection.py +605 -0
  259. oscura/guidance/__init__.py +24 -0
  260. oscura/guidance/recommender.py +429 -0
  261. oscura/guidance/wizard.py +518 -0
  262. oscura/inference/__init__.py +251 -0
  263. oscura/inference/active_learning/README.md +153 -0
  264. oscura/inference/active_learning/__init__.py +38 -0
  265. oscura/inference/active_learning/lstar.py +257 -0
  266. oscura/inference/active_learning/observation_table.py +230 -0
  267. oscura/inference/active_learning/oracle.py +78 -0
  268. oscura/inference/active_learning/teachers/__init__.py +15 -0
  269. oscura/inference/active_learning/teachers/simulator.py +192 -0
  270. oscura/inference/adaptive_tuning.py +453 -0
  271. oscura/inference/alignment.py +653 -0
  272. oscura/inference/bayesian.py +943 -0
  273. oscura/inference/binary.py +1016 -0
  274. oscura/inference/crc_reverse.py +711 -0
  275. oscura/inference/logic.py +288 -0
  276. oscura/inference/message_format.py +1305 -0
  277. oscura/inference/protocol.py +417 -0
  278. oscura/inference/protocol_dsl.py +1084 -0
  279. oscura/inference/protocol_library.py +1230 -0
  280. oscura/inference/sequences.py +809 -0
  281. oscura/inference/signal_intelligence.py +1509 -0
  282. oscura/inference/spectral.py +215 -0
  283. oscura/inference/state_machine.py +634 -0
  284. oscura/inference/stream.py +918 -0
  285. oscura/integrations/__init__.py +59 -0
  286. oscura/integrations/llm.py +1827 -0
  287. oscura/jupyter/__init__.py +32 -0
  288. oscura/jupyter/display.py +268 -0
  289. oscura/jupyter/magic.py +334 -0
  290. oscura/loaders/__init__.py +526 -0
  291. oscura/loaders/binary.py +69 -0
  292. oscura/loaders/configurable.py +1255 -0
  293. oscura/loaders/csv.py +26 -0
  294. oscura/loaders/csv_loader.py +473 -0
  295. oscura/loaders/hdf5.py +9 -0
  296. oscura/loaders/hdf5_loader.py +510 -0
  297. oscura/loaders/lazy.py +370 -0
  298. oscura/loaders/mmap_loader.py +583 -0
  299. oscura/loaders/numpy_loader.py +436 -0
  300. oscura/loaders/pcap.py +432 -0
  301. oscura/loaders/preprocessing.py +368 -0
  302. oscura/loaders/rigol.py +287 -0
  303. oscura/loaders/sigrok.py +321 -0
  304. oscura/loaders/tdms.py +367 -0
  305. oscura/loaders/tektronix.py +711 -0
  306. oscura/loaders/validation.py +584 -0
  307. oscura/loaders/vcd.py +464 -0
  308. oscura/loaders/wav.py +233 -0
  309. oscura/math/__init__.py +45 -0
  310. oscura/math/arithmetic.py +824 -0
  311. oscura/math/interpolation.py +413 -0
  312. oscura/onboarding/__init__.py +39 -0
  313. oscura/onboarding/help.py +498 -0
  314. oscura/onboarding/tutorials.py +405 -0
  315. oscura/onboarding/wizard.py +466 -0
  316. oscura/optimization/__init__.py +19 -0
  317. oscura/optimization/parallel.py +440 -0
  318. oscura/optimization/search.py +532 -0
  319. oscura/pipeline/__init__.py +43 -0
  320. oscura/pipeline/base.py +338 -0
  321. oscura/pipeline/composition.py +242 -0
  322. oscura/pipeline/parallel.py +448 -0
  323. oscura/pipeline/pipeline.py +375 -0
  324. oscura/pipeline/reverse_engineering.py +1119 -0
  325. oscura/plugins/__init__.py +122 -0
  326. oscura/plugins/base.py +272 -0
  327. oscura/plugins/cli.py +497 -0
  328. oscura/plugins/discovery.py +411 -0
  329. oscura/plugins/isolation.py +418 -0
  330. oscura/plugins/lifecycle.py +959 -0
  331. oscura/plugins/manager.py +493 -0
  332. oscura/plugins/registry.py +421 -0
  333. oscura/plugins/versioning.py +372 -0
  334. oscura/py.typed +0 -0
  335. oscura/quality/__init__.py +65 -0
  336. oscura/quality/ensemble.py +740 -0
  337. oscura/quality/explainer.py +338 -0
  338. oscura/quality/scoring.py +616 -0
  339. oscura/quality/warnings.py +456 -0
  340. oscura/reporting/__init__.py +248 -0
  341. oscura/reporting/advanced.py +1234 -0
  342. oscura/reporting/analyze.py +448 -0
  343. oscura/reporting/argument_preparer.py +596 -0
  344. oscura/reporting/auto_report.py +507 -0
  345. oscura/reporting/batch.py +615 -0
  346. oscura/reporting/chart_selection.py +223 -0
  347. oscura/reporting/comparison.py +330 -0
  348. oscura/reporting/config.py +615 -0
  349. oscura/reporting/content/__init__.py +39 -0
  350. oscura/reporting/content/executive.py +127 -0
  351. oscura/reporting/content/filtering.py +191 -0
  352. oscura/reporting/content/minimal.py +257 -0
  353. oscura/reporting/content/verbosity.py +162 -0
  354. oscura/reporting/core.py +508 -0
  355. oscura/reporting/core_formats/__init__.py +17 -0
  356. oscura/reporting/core_formats/multi_format.py +210 -0
  357. oscura/reporting/engine.py +836 -0
  358. oscura/reporting/export.py +366 -0
  359. oscura/reporting/formatting/__init__.py +129 -0
  360. oscura/reporting/formatting/emphasis.py +81 -0
  361. oscura/reporting/formatting/numbers.py +403 -0
  362. oscura/reporting/formatting/standards.py +55 -0
  363. oscura/reporting/formatting.py +466 -0
  364. oscura/reporting/html.py +578 -0
  365. oscura/reporting/index.py +590 -0
  366. oscura/reporting/multichannel.py +296 -0
  367. oscura/reporting/output.py +379 -0
  368. oscura/reporting/pdf.py +373 -0
  369. oscura/reporting/plots.py +731 -0
  370. oscura/reporting/pptx_export.py +360 -0
  371. oscura/reporting/renderers/__init__.py +11 -0
  372. oscura/reporting/renderers/pdf.py +94 -0
  373. oscura/reporting/sections.py +471 -0
  374. oscura/reporting/standards.py +680 -0
  375. oscura/reporting/summary_generator.py +368 -0
  376. oscura/reporting/tables.py +397 -0
  377. oscura/reporting/template_system.py +724 -0
  378. oscura/reporting/templates/__init__.py +15 -0
  379. oscura/reporting/templates/definition.py +205 -0
  380. oscura/reporting/templates/index.html +649 -0
  381. oscura/reporting/templates/index.md +173 -0
  382. oscura/schemas/__init__.py +158 -0
  383. oscura/schemas/bus_configuration.json +322 -0
  384. oscura/schemas/device_mapping.json +182 -0
  385. oscura/schemas/packet_format.json +418 -0
  386. oscura/schemas/protocol_definition.json +363 -0
  387. oscura/search/__init__.py +16 -0
  388. oscura/search/anomaly.py +292 -0
  389. oscura/search/context.py +149 -0
  390. oscura/search/pattern.py +160 -0
  391. oscura/session/__init__.py +34 -0
  392. oscura/session/annotations.py +289 -0
  393. oscura/session/history.py +313 -0
  394. oscura/session/session.py +445 -0
  395. oscura/streaming/__init__.py +43 -0
  396. oscura/streaming/chunked.py +611 -0
  397. oscura/streaming/progressive.py +393 -0
  398. oscura/streaming/realtime.py +622 -0
  399. oscura/testing/__init__.py +54 -0
  400. oscura/testing/synthetic.py +808 -0
  401. oscura/triggering/__init__.py +68 -0
  402. oscura/triggering/base.py +229 -0
  403. oscura/triggering/edge.py +353 -0
  404. oscura/triggering/pattern.py +344 -0
  405. oscura/triggering/pulse.py +581 -0
  406. oscura/triggering/window.py +453 -0
  407. oscura/ui/__init__.py +48 -0
  408. oscura/ui/formatters.py +526 -0
  409. oscura/ui/progressive_display.py +340 -0
  410. oscura/utils/__init__.py +99 -0
  411. oscura/utils/autodetect.py +338 -0
  412. oscura/utils/buffer.py +389 -0
  413. oscura/utils/lazy.py +407 -0
  414. oscura/utils/lazy_imports.py +147 -0
  415. oscura/utils/memory.py +836 -0
  416. oscura/utils/memory_advanced.py +1326 -0
  417. oscura/utils/memory_extensions.py +465 -0
  418. oscura/utils/progressive.py +352 -0
  419. oscura/utils/windowing.py +362 -0
  420. oscura/visualization/__init__.py +321 -0
  421. oscura/visualization/accessibility.py +526 -0
  422. oscura/visualization/annotations.py +374 -0
  423. oscura/visualization/axis_scaling.py +305 -0
  424. oscura/visualization/colors.py +453 -0
  425. oscura/visualization/digital.py +337 -0
  426. oscura/visualization/eye.py +420 -0
  427. oscura/visualization/histogram.py +281 -0
  428. oscura/visualization/interactive.py +858 -0
  429. oscura/visualization/jitter.py +702 -0
  430. oscura/visualization/keyboard.py +394 -0
  431. oscura/visualization/layout.py +365 -0
  432. oscura/visualization/optimization.py +1028 -0
  433. oscura/visualization/palettes.py +446 -0
  434. oscura/visualization/plot.py +92 -0
  435. oscura/visualization/power.py +290 -0
  436. oscura/visualization/power_extended.py +626 -0
  437. oscura/visualization/presets.py +467 -0
  438. oscura/visualization/protocols.py +932 -0
  439. oscura/visualization/render.py +207 -0
  440. oscura/visualization/rendering.py +444 -0
  441. oscura/visualization/reverse_engineering.py +791 -0
  442. oscura/visualization/signal_integrity.py +808 -0
  443. oscura/visualization/specialized.py +553 -0
  444. oscura/visualization/spectral.py +811 -0
  445. oscura/visualization/styles.py +381 -0
  446. oscura/visualization/thumbnails.py +311 -0
  447. oscura/visualization/time_axis.py +351 -0
  448. oscura/visualization/waveform.py +367 -0
  449. oscura/workflow/__init__.py +13 -0
  450. oscura/workflow/dag.py +377 -0
  451. oscura/workflows/__init__.py +58 -0
  452. oscura/workflows/compliance.py +280 -0
  453. oscura/workflows/digital.py +272 -0
  454. oscura/workflows/multi_trace.py +502 -0
  455. oscura/workflows/power.py +178 -0
  456. oscura/workflows/protocol.py +492 -0
  457. oscura/workflows/reverse_engineering.py +639 -0
  458. oscura/workflows/signal_integrity.py +227 -0
  459. oscura-0.1.1.dist-info/METADATA +300 -0
  460. oscura-0.1.1.dist-info/RECORD +463 -0
  461. oscura-0.1.1.dist-info/entry_points.txt +2 -0
  462. {oscura-0.0.1.dist-info → oscura-0.1.1.dist-info}/licenses/LICENSE +1 -1
  463. oscura-0.0.1.dist-info/METADATA +0 -63
  464. oscura-0.0.1.dist-info/RECORD +0 -5
  465. {oscura-0.0.1.dist-info → oscura-0.1.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,1305 @@
1
+ """Message format inference using statistical analysis.
2
+
3
+ Requirements addressed: PSI-001
4
+
5
+ This module automatically infers message field structure from collections of
6
+ similar messages for protocol reverse engineering.
7
+
8
+ Key capabilities:
9
+ - Detect field boundaries via entropy transitions
10
+ - Classify fields as constant, variable, or sequential
11
+ - Infer field types (integer, counter, timestamp, checksum)
12
+ - Detect field dependencies (length fields, checksums)
13
+ - Generate message format specifications
14
+ - Voting expert ensemble for improved boundary detection (IPART-style)
15
+
16
+ References:
17
+ IPART: IP Packet Analysis using Random Forests. IEEE ISSRE 2014.
18
+ Discoverer: Automatic Protocol Reverse Engineering. USENIX Security 2007.
19
+ """
20
+
21
+ from dataclasses import dataclass
22
+ from dataclasses import field as dataclass_field
23
+ from typing import Any, Literal
24
+
25
+ import numpy as np
26
+ from numpy.typing import NDArray
27
+
28
+ from oscura.inference.alignment import align_local
29
+
30
+
31
+ @dataclass
32
+ class InferredField:
33
+ """An inferred message field.
34
+
35
+ : Field classification and type inference.
36
+
37
+ Attributes:
38
+ name: Auto-generated field name
39
+ offset: Byte offset from message start
40
+ size: Field size in bytes
41
+ field_type: Inferred field type classification
42
+ entropy: Shannon entropy of field values
43
+ variance: Statistical variance of field values
44
+ confidence: Confidence score (0-1) for type inference
45
+ values_seen: Sample values for validation
46
+ evidence: Evidence from each expert (for ensemble methods)
47
+ """
48
+
49
+ name: str
50
+ offset: int
51
+ size: int
52
+ field_type: Literal["constant", "counter", "timestamp", "length", "checksum", "data", "unknown"]
53
+ entropy: float
54
+ variance: float
55
+ confidence: float
56
+ values_seen: list[Any] = dataclass_field(default_factory=list) # Sample values
57
+ evidence: dict[str, bool] = dataclass_field(default_factory=dict) # Expert evidence
58
+
59
+
60
+ @dataclass
61
+ class MessageSchema:
62
+ """Inferred message format schema.
63
+
64
+ : Complete message format specification.
65
+
66
+ Attributes:
67
+ total_size: Total message size in bytes
68
+ fields: List of inferred fields
69
+ field_boundaries: Byte offsets of field starts
70
+ header_size: Detected header size
71
+ payload_offset: Start of payload region
72
+ checksum_field: Detected checksum field if any
73
+ length_field: Detected length field if any
74
+ """
75
+
76
+ total_size: int
77
+ fields: list[InferredField]
78
+ field_boundaries: list[int] # Byte offsets of field starts
79
+ header_size: int # Detected header size
80
+ payload_offset: int
81
+ checksum_field: InferredField | None
82
+ length_field: InferredField | None
83
+
84
+
85
+ class MessageFormatInferrer:
86
+ """Infer message format from samples.
87
+
88
+ : Message format inference using entropy and variance analysis.
89
+
90
+ Algorithm:
91
+ 1. Detect field boundaries using entropy transitions
92
+ 2. Classify fields based on statistical patterns
93
+ 3. Detect dependencies between fields
94
+ 4. Generate complete schema
95
+ """
96
+
97
+ def __init__(self, min_samples: int = 10):
98
+ """Initialize inferrer.
99
+
100
+ Args:
101
+ min_samples: Minimum number of message samples required
102
+ """
103
+ self.min_samples = min_samples
104
+
105
+ def infer_format(self, messages: list[bytes | NDArray[np.uint8]]) -> MessageSchema:
106
+ """Infer message format from collection of similar messages.
107
+
108
+ : Complete format inference workflow.
109
+
110
+ Args:
111
+ messages: List of message samples (bytes or np.ndarray)
112
+
113
+ Returns:
114
+ MessageSchema with inferred field structure
115
+
116
+ Raises:
117
+ ValueError: If insufficient samples or invalid input
118
+ """
119
+ if len(messages) < self.min_samples:
120
+ raise ValueError(f"Need at least {self.min_samples} messages, got {len(messages)}")
121
+
122
+ # Convert to numpy arrays for processing
123
+ msg_arrays = []
124
+ for msg in messages:
125
+ if isinstance(msg, bytes):
126
+ msg_arrays.append(np.frombuffer(msg, dtype=np.uint8))
127
+ elif isinstance(msg, np.ndarray):
128
+ msg_arrays.append(msg.astype(np.uint8))
129
+ else:
130
+ raise ValueError(f"Invalid message type: {type(msg)}")
131
+
132
+ # Check all messages are same length
133
+ lengths = [len(m) for m in msg_arrays]
134
+ if len(set(lengths)) > 1:
135
+ raise ValueError(f"Messages have varying lengths: {set(lengths)}")
136
+
137
+ msg_len = lengths[0]
138
+
139
+ # Detect field boundaries
140
+ boundaries = self.detect_field_boundaries(msg_arrays, method="combined")
141
+
142
+ # Detect field types
143
+ fields = self.detect_field_types(msg_arrays, boundaries)
144
+
145
+ # Determine header size (first high-entropy transition or first 4 fields)
146
+ header_size = self._estimate_header_size(fields)
147
+
148
+ # Find checksum and length fields
149
+ checksum_field = None
150
+ length_field = None
151
+
152
+ for f in fields:
153
+ if f.field_type == "checksum":
154
+ checksum_field = f
155
+ elif f.field_type == "length":
156
+ length_field = f
157
+
158
+ # Payload starts after header
159
+ payload_offset = header_size
160
+
161
+ schema = MessageSchema(
162
+ total_size=msg_len,
163
+ fields=fields,
164
+ field_boundaries=boundaries,
165
+ header_size=header_size,
166
+ payload_offset=payload_offset,
167
+ checksum_field=checksum_field,
168
+ length_field=length_field,
169
+ )
170
+
171
+ return schema
172
+
173
+ def detect_field_boundaries(
174
+ self,
175
+ messages: list[NDArray[np.uint8]],
176
+ method: Literal["entropy", "variance", "combined"] = "combined",
177
+ ) -> list[int]:
178
+ """Detect field boundaries using entropy transitions.
179
+
180
+ : Boundary detection via statistical transitions.
181
+
182
+ Args:
183
+ messages: List of message arrays
184
+ method: Detection method ('entropy', 'variance', or 'combined')
185
+
186
+ Returns:
187
+ List of byte offsets marking field starts (always includes 0)
188
+ """
189
+ if not messages:
190
+ return [0]
191
+
192
+ msg_len = len(messages[0])
193
+ boundaries = [0] # Always start at offset 0
194
+
195
+ if method in ["entropy", "combined"]:
196
+ # Calculate entropy at each byte position
197
+ entropies = []
198
+ for offset in range(msg_len):
199
+ entropy = self._calculate_byte_entropy(messages, offset)
200
+ entropies.append(entropy)
201
+
202
+ # Find transitions (entropy changes > threshold)
203
+ entropy_threshold = 1.5 # bits
204
+ for i in range(1, len(entropies)):
205
+ delta = abs(entropies[i] - entropies[i - 1])
206
+ if delta > entropy_threshold and i not in boundaries:
207
+ boundaries.append(i)
208
+
209
+ if method in ["variance", "combined"]:
210
+ # Calculate variance at each byte position
211
+ variances = []
212
+ for offset in range(msg_len):
213
+ values = [msg[offset] for msg in messages]
214
+ variance = np.var(values)
215
+ variances.append(variance)
216
+
217
+ # Find variance transitions
218
+ var_threshold = 1000.0
219
+ for i in range(1, len(variances)):
220
+ delta = abs(variances[i] - variances[i - 1])
221
+ if delta > var_threshold and i not in boundaries:
222
+ boundaries.append(i)
223
+
224
+ # Sort and ensure we don't have too many tiny fields
225
+ boundaries = sorted(set(boundaries))
226
+
227
+ # Merge boundaries that are too close (< 2 bytes apart)
228
+ merged = [boundaries[0]]
229
+ for b in boundaries[1:]:
230
+ if b - merged[-1] >= 2:
231
+ merged.append(b)
232
+
233
+ return merged
234
+
235
+ def detect_boundaries_voting(
236
+ self,
237
+ messages: list[bytes],
238
+ min_confidence: float = 0.6,
239
+ ) -> list[int]:
240
+ """Detect field boundaries using voting expert algorithm.
241
+
242
+ : IPART-style voting expert for boundary detection.
243
+
244
+ Combines multiple detection strategies:
245
+ 1. Entropy-based detection
246
+ 2. Alignment-based detection (Smith-Waterman)
247
+ 3. Statistical variance detection
248
+ 4. Byte value distribution analysis
249
+ 5. N-gram frequency analysis
250
+
251
+ Each "expert" votes on likely boundaries. Boundaries with
252
+ votes >= min_confidence threshold are returned.
253
+
254
+ Args:
255
+ messages: List of protocol messages (bytes)
256
+ min_confidence: Minimum vote fraction to accept boundary (0.0-1.0)
257
+
258
+ Returns:
259
+ List of byte positions that are likely field boundaries
260
+
261
+ References:
262
+ IPART: IP Packet Analysis using Random Forests.
263
+ IEEE ISSRE 2014.
264
+ """
265
+ if not messages:
266
+ return [0]
267
+
268
+ # Convert to numpy arrays for processing
269
+ msg_arrays = []
270
+ for msg in messages:
271
+ msg_arrays.append(np.frombuffer(msg, dtype=np.uint8))
272
+
273
+ # Run each expert
274
+ experts = [
275
+ self._expert_entropy(msg_arrays),
276
+ self._expert_alignment(messages),
277
+ self._expert_variance(msg_arrays),
278
+ self._expert_distribution(msg_arrays),
279
+ self._expert_ngrams(msg_arrays, n=2),
280
+ ]
281
+
282
+ num_experts = len(experts)
283
+
284
+ # Collect all possible boundary positions
285
+ all_boundaries = set()
286
+ for expert_boundaries in experts:
287
+ all_boundaries.update(expert_boundaries)
288
+
289
+ # Count votes for each boundary
290
+ boundary_votes: dict[int, int] = {}
291
+ for boundary in all_boundaries:
292
+ votes = sum(1 for expert in experts if boundary in expert)
293
+ boundary_votes[boundary] = votes
294
+
295
+ # Filter by confidence threshold
296
+ min_votes = int(num_experts * min_confidence)
297
+ accepted_boundaries = [pos for pos, votes in boundary_votes.items() if votes >= min_votes]
298
+
299
+ # Always include position 0
300
+ if 0 not in accepted_boundaries:
301
+ accepted_boundaries.append(0)
302
+
303
+ # Sort and merge close boundaries
304
+ accepted_boundaries = sorted(accepted_boundaries)
305
+
306
+ # Merge boundaries that are too close (< 2 bytes apart)
307
+ merged = [accepted_boundaries[0]]
308
+ for b in accepted_boundaries[1:]:
309
+ if b - merged[-1] >= 2:
310
+ merged.append(b)
311
+
312
+ return merged
313
+
314
+ def _expert_entropy(self, messages: list[NDArray[np.uint8]]) -> set[int]:
315
+ """Detect boundaries based on entropy changes.
316
+
317
+ : Entropy-based boundary expert.
318
+
319
+ Args:
320
+ messages: List of message arrays
321
+
322
+ Returns:
323
+ Set of boundary positions
324
+ """
325
+ if not messages:
326
+ return {0}
327
+
328
+ msg_len = len(messages[0])
329
+ boundaries = {0}
330
+
331
+ # Calculate entropy at each byte position
332
+ entropies = []
333
+ for offset in range(msg_len):
334
+ entropy = self._calculate_byte_entropy(messages, offset)
335
+ entropies.append(entropy)
336
+
337
+ # Find transitions (entropy changes > threshold)
338
+ entropy_threshold = 1.5 # bits
339
+ for i in range(1, len(entropies)):
340
+ delta = abs(entropies[i] - entropies[i - 1])
341
+ if delta > entropy_threshold:
342
+ boundaries.add(i)
343
+
344
+ return boundaries
345
+
346
+ def _expert_alignment(self, messages: list[bytes]) -> set[int]:
347
+ """Detect boundaries using Smith-Waterman alignment.
348
+
349
+ : Alignment-based boundary expert.
350
+
351
+ Uses local alignment to find conserved vs. variable regions.
352
+ Transitions between regions indicate likely boundaries.
353
+
354
+ Args:
355
+ messages: List of protocol messages
356
+
357
+ Returns:
358
+ Set of boundary positions
359
+ """
360
+ if len(messages) < 2:
361
+ return {0}
362
+
363
+ boundaries = {0}
364
+
365
+ # Compare first message to several others
366
+ num_comparisons = min(5, len(messages) - 1)
367
+ for i in range(1, num_comparisons + 1):
368
+ result = align_local(messages[0], messages[i])
369
+
370
+ # Boundaries at transitions between conserved and variable regions
371
+ for start, _end in result.conserved_regions:
372
+ if start > 0:
373
+ boundaries.add(start)
374
+
375
+ for start, _end in result.variable_regions:
376
+ if start > 0:
377
+ boundaries.add(start)
378
+
379
+ return boundaries
380
+
381
+ def _expert_variance(self, messages: list[NDArray[np.uint8]]) -> set[int]:
382
+ """Detect boundaries based on statistical variance.
383
+
384
+ : Variance-based boundary expert.
385
+
386
+ Args:
387
+ messages: List of message arrays
388
+
389
+ Returns:
390
+ Set of boundary positions
391
+ """
392
+ if not messages:
393
+ return {0}
394
+
395
+ msg_len = len(messages[0])
396
+ boundaries = {0}
397
+
398
+ # Calculate variance at each byte position
399
+ variances = []
400
+ for offset in range(msg_len):
401
+ values = [msg[offset] for msg in messages]
402
+ variance = np.var(values)
403
+ variances.append(variance)
404
+
405
+ # Find variance transitions
406
+ var_threshold = 1000.0
407
+ for i in range(1, len(variances)):
408
+ delta = abs(variances[i] - variances[i - 1])
409
+ if delta > var_threshold:
410
+ boundaries.add(i)
411
+
412
+ return boundaries
413
+
414
+ def _expert_distribution(self, messages: list[NDArray[np.uint8]]) -> set[int]:
415
+ """Detect boundaries from byte value distribution changes.
416
+
417
+ : Distribution-based boundary expert.
418
+
419
+ Analyzes how the distribution of byte values changes
420
+ across positions. Sharp changes suggest boundaries.
421
+
422
+ Args:
423
+ messages: List of message arrays
424
+
425
+ Returns:
426
+ Set of boundary positions
427
+ """
428
+ if not messages:
429
+ return {0}
430
+
431
+ msg_len = len(messages[0])
432
+ boundaries = {0}
433
+
434
+ # Calculate distribution metrics at each position
435
+ distributions = []
436
+ for offset in range(msg_len):
437
+ values = [msg[offset] for msg in messages]
438
+ # Use unique count as distribution metric
439
+ unique_count = len(set(values))
440
+ distributions.append(unique_count)
441
+
442
+ # Find sharp changes in distribution
443
+ for i in range(1, len(distributions)):
444
+ # Ratio of change
445
+ if distributions[i - 1] > 0:
446
+ ratio = distributions[i] / distributions[i - 1]
447
+ # Significant change (>2x or <0.5x)
448
+ if ratio > 2.0 or ratio < 0.5:
449
+ boundaries.add(i)
450
+
451
+ return boundaries
452
+
453
+ def _expert_ngrams(self, messages: list[NDArray[np.uint8]], n: int = 2) -> set[int]:
454
+ """Detect boundaries using n-gram frequency analysis.
455
+
456
+ : N-gram based boundary expert.
457
+
458
+ Analyzes how n-gram patterns change across positions.
459
+ Different n-gram distributions suggest different fields.
460
+
461
+ Args:
462
+ messages: List of message arrays
463
+ n: N-gram size (default: 2)
464
+
465
+ Returns:
466
+ Set of boundary positions
467
+ """
468
+ if not messages or len(messages[0]) < n:
469
+ return {0}
470
+
471
+ msg_len = len(messages[0])
472
+ boundaries = {0}
473
+
474
+ # Collect n-grams at each position
475
+ ngram_sets = []
476
+ for offset in range(msg_len - n + 1):
477
+ ngrams = set()
478
+ for msg in messages:
479
+ if offset + n <= len(msg):
480
+ ngram = tuple(msg[offset : offset + n])
481
+ ngrams.add(ngram)
482
+ ngram_sets.append(ngrams)
483
+
484
+ # Find positions where n-gram patterns change significantly
485
+ for i in range(1, len(ngram_sets)):
486
+ # Calculate Jaccard similarity between adjacent positions
487
+ set1 = ngram_sets[i - 1]
488
+ set2 = ngram_sets[i]
489
+
490
+ if len(set1) == 0 or len(set2) == 0:
491
+ continue
492
+
493
+ intersection = len(set1 & set2)
494
+ union = len(set1 | set2)
495
+
496
+ if union > 0:
497
+ similarity = intersection / union
498
+ # Low similarity suggests boundary
499
+ if similarity < 0.3:
500
+ boundaries.add(i)
501
+
502
+ return boundaries
503
+
504
+ def infer_format_ensemble(
505
+ self,
506
+ messages: list[bytes | NDArray[np.uint8]],
507
+ min_field_confidence: float = 0.6,
508
+ min_boundary_confidence: float = 0.6,
509
+ ) -> MessageSchema:
510
+ """Infer message format using ensemble of techniques.
511
+
512
+ : Ensemble-based format inference with confidence scoring.
513
+
514
+ Combines:
515
+ - Voting expert for boundary detection
516
+ - Multiple field type detectors
517
+ - Confidence scoring for each field
518
+
519
+ Args:
520
+ messages: List of protocol messages
521
+ min_field_confidence: Minimum confidence to include field
522
+ min_boundary_confidence: Minimum confidence for boundaries
523
+
524
+ Returns:
525
+ Message schema with confidence-scored fields
526
+
527
+ Raises:
528
+ ValueError: If insufficient messages provided
529
+ """
530
+ if len(messages) < self.min_samples:
531
+ raise ValueError(f"Need at least {self.min_samples} messages, got {len(messages)}")
532
+
533
+ # Convert all to bytes for voting
534
+ bytes_messages = []
535
+ for msg in messages:
536
+ if isinstance(msg, bytes):
537
+ bytes_messages.append(msg)
538
+ elif isinstance(msg, np.ndarray):
539
+ bytes_messages.append(msg.tobytes())
540
+ else:
541
+ raise ValueError(f"Invalid message type: {type(msg)}")
542
+
543
+ # Check all messages are same length
544
+ lengths = [len(m) for m in bytes_messages]
545
+ if len(set(lengths)) > 1:
546
+ raise ValueError(f"Messages have varying lengths: {set(lengths)}")
547
+
548
+ msg_len = lengths[0]
549
+
550
+ # Detect boundaries with voting
551
+ boundaries = self.detect_boundaries_voting(
552
+ bytes_messages, min_confidence=min_boundary_confidence
553
+ )
554
+
555
+ # Convert to numpy arrays for field type detection
556
+ msg_arrays = []
557
+ for msg in bytes_messages:
558
+ msg_arrays.append(np.frombuffer(msg, dtype=np.uint8))
559
+
560
+ # Extract field candidates
561
+ fields: list[InferredField] = []
562
+ for i in range(len(boundaries)):
563
+ offset = boundaries[i]
564
+
565
+ # Determine field size
566
+ if i < len(boundaries) - 1:
567
+ size = boundaries[i + 1] - offset
568
+ else:
569
+ size = msg_len - offset
570
+
571
+ # Extract field values
572
+ field_data = self._extract_field_data(msg_arrays, offset, size)
573
+
574
+ # Run multiple field type detectors
575
+ entropy_type, entropy_conf = self._detect_type_entropy(field_data)
576
+ pattern_type, pattern_conf = self._detect_type_patterns(
577
+ field_data, offset, size, msg_len
578
+ )
579
+ stats_type, stats_conf = self._detect_type_statistics(field_data)
580
+
581
+ # Vote on field type
582
+ field_type, confidence, evidence = self._vote_field_type(
583
+ [
584
+ (entropy_type, entropy_conf),
585
+ (pattern_type, pattern_conf),
586
+ (stats_type, stats_conf),
587
+ ]
588
+ )
589
+
590
+ if confidence >= min_field_confidence:
591
+ # Sample values (first 5)
592
+ sample_values = field_data["values"][:5]
593
+
594
+ field_obj = InferredField(
595
+ name=f"field_{len(fields)}",
596
+ offset=offset,
597
+ size=size,
598
+ field_type=field_type, # type: ignore[arg-type]
599
+ entropy=float(field_data["entropy"]),
600
+ variance=float(field_data["variance"]),
601
+ confidence=confidence,
602
+ values_seen=sample_values,
603
+ evidence=evidence,
604
+ )
605
+
606
+ fields.append(field_obj)
607
+
608
+ # Determine header size
609
+ header_size = self._estimate_header_size(fields)
610
+
611
+ # Find checksum and length fields
612
+ checksum_field = None
613
+ length_field = None
614
+
615
+ for f in fields:
616
+ if f.field_type == "checksum":
617
+ checksum_field = f
618
+ elif f.field_type == "length":
619
+ length_field = f
620
+
621
+ # Payload starts after header
622
+ payload_offset = header_size
623
+
624
+ schema = MessageSchema(
625
+ total_size=msg_len,
626
+ fields=fields,
627
+ field_boundaries=boundaries,
628
+ header_size=header_size,
629
+ payload_offset=payload_offset,
630
+ checksum_field=checksum_field,
631
+ length_field=length_field,
632
+ )
633
+
634
+ return schema
635
+
636
+ def _extract_field_data(
637
+ self, messages: list[NDArray[np.uint8]], offset: int, size: int
638
+ ) -> dict[str, Any]:
639
+ """Extract field data for type detection.
640
+
641
+ Args:
642
+ messages: List of message arrays
643
+ offset: Field offset
644
+ size: Field size
645
+
646
+ Returns:
647
+ Dictionary with field values and statistics
648
+ """
649
+ values: list[int | tuple[int, ...]]
650
+ if size <= 4:
651
+ # Use integer representation for small fields
652
+ int_values: list[int] = []
653
+ for msg in messages:
654
+ if size == 1:
655
+ val_int = int(msg[offset])
656
+ elif size == 2:
657
+ val_int = int(msg[offset]) << 8 | int(msg[offset + 1])
658
+ elif size == 4:
659
+ val_int = (
660
+ int(msg[offset]) << 24
661
+ | int(msg[offset + 1]) << 16
662
+ | int(msg[offset + 2]) << 8
663
+ | int(msg[offset + 3])
664
+ )
665
+ else: # size == 3
666
+ val_int = (
667
+ int(msg[offset]) << 16 | int(msg[offset + 1]) << 8 | int(msg[offset + 2])
668
+ )
669
+ int_values.append(val_int)
670
+ values = list(int_values) # type: ignore[assignment]
671
+ else:
672
+ # For larger fields, use bytes
673
+ tuple_values: list[tuple[int, ...]] = []
674
+ for msg in messages:
675
+ val_tuple = tuple(int(b) for b in msg[offset : offset + size])
676
+ tuple_values.append(val_tuple)
677
+ values = list(tuple_values) # type: ignore[assignment]
678
+
679
+ # Calculate statistics
680
+ if size > 4:
681
+ # Bytes field - calculate entropy across all bytes
682
+ all_bytes_list: list[int] = []
683
+ for v in values:
684
+ if isinstance(v, tuple):
685
+ all_bytes_list.extend(v)
686
+ all_bytes = np.array(all_bytes_list, dtype=np.uint8)
687
+ entropy = self._calculate_entropy(all_bytes)
688
+ variance = float(np.var(all_bytes))
689
+ else:
690
+ entropy = self._calculate_entropy(np.array(values, dtype=np.int64))
691
+ variance = float(np.var(values))
692
+
693
+ return {
694
+ "values": values,
695
+ "offset": offset,
696
+ "size": size,
697
+ "entropy": entropy,
698
+ "variance": variance,
699
+ }
700
+
701
+ def _detect_type_entropy(self, field_data: dict[str, Any]) -> tuple[str, float]:
702
+ """Detect field type using entropy analysis.
703
+
704
+ : Entropy-based field type detection.
705
+
706
+ Args:
707
+ field_data: Field data dictionary
708
+
709
+ Returns:
710
+ Tuple of (field_type, confidence)
711
+ """
712
+ entropy = field_data["entropy"]
713
+ values = field_data["values"]
714
+
715
+ # Check if all values are identical (constant)
716
+ if len(set(values)) == 1:
717
+ return ("constant", 1.0)
718
+
719
+ # Low entropy suggests constant or semi-constant
720
+ if entropy < 1.0:
721
+ return ("constant", 0.8)
722
+ # Very high entropy suggests random data
723
+ elif entropy > 7.0:
724
+ return ("data", 0.7)
725
+ # Medium entropy could be various types
726
+ else:
727
+ return ("unknown", 0.3)
728
+
729
+ def _detect_type_patterns(
730
+ self, field_data: dict[str, Any], offset: int, size: int, msg_len: int
731
+ ) -> tuple[str, float]:
732
+ """Detect field type using pattern matching.
733
+
734
+ : Pattern-based field type detection.
735
+
736
+ Detects:
737
+ - Counters (incrementing values)
738
+ - Lengths (correlates with message size)
739
+ - Checksums (high entropy, end of message)
740
+ - Constants (no variation)
741
+ - Timestamps (steady increase)
742
+
743
+ Args:
744
+ field_data: Field data dictionary
745
+ offset: Field offset
746
+ size: Field size
747
+ msg_len: Total message length
748
+
749
+ Returns:
750
+ Tuple of (field_type, confidence)
751
+ """
752
+ values = field_data["values"]
753
+
754
+ # Check for counter (if integer values)
755
+ if not isinstance(values[0], tuple):
756
+ int_values = [v for v in values if isinstance(v, int)]
757
+ if self._detect_counter_field(int_values):
758
+ return ("counter", 0.9)
759
+
760
+ # Check for timestamp (similar to counter but larger values)
761
+ if len(int_values) >= 3:
762
+ diffs = [int_values[i + 1] - int_values[i] for i in range(len(int_values) - 1)]
763
+ positive_diffs = [d for d in diffs if d > 0]
764
+ if len(positive_diffs) >= len(diffs) * 0.7:
765
+ # Check if increments are relatively steady
766
+ if len(positive_diffs) > 0:
767
+ avg_diff = sum(positive_diffs) / len(positive_diffs)
768
+ if avg_diff > 100: # Large increments suggest timestamp
769
+ return ("timestamp", 0.7)
770
+
771
+ # Check for length field (small values, near start)
772
+ if offset < 8 and size <= 2 and not isinstance(values[0], tuple):
773
+ int_values = [v for v in values if isinstance(v, int)]
774
+ if int_values:
775
+ max_val = max(int_values)
776
+ if max_val < msg_len * 2:
777
+ return ("length", 0.6)
778
+
779
+ # Check for checksum (near end of message, but not the entire message)
780
+ if offset + size >= msg_len - 4 and offset > 0:
781
+ return ("checksum", 0.5)
782
+
783
+ return ("unknown", 0.3)
784
+
785
+ def _detect_type_statistics(self, field_data: dict[str, Any]) -> tuple[str, float]:
786
+ """Detect field type using statistical properties.
787
+
788
+ : Statistics-based field type detection.
789
+
790
+ Args:
791
+ field_data: Field data dictionary
792
+
793
+ Returns:
794
+ Tuple of (field_type, confidence)
795
+ """
796
+ variance = field_data["variance"]
797
+ entropy = field_data["entropy"]
798
+ values = field_data["values"]
799
+
800
+ # Check if all values identical (truly constant)
801
+ if len(set(values)) == 1:
802
+ return ("constant", 0.9)
803
+ # Very low variance suggests constant
804
+ elif variance < 10:
805
+ return ("constant", 0.7)
806
+ # High entropy and variance suggests data
807
+ elif entropy > 6.0 and variance > 1000:
808
+ return ("data", 0.6)
809
+ else:
810
+ return ("unknown", 0.4)
811
+
812
+ def _vote_field_type(
813
+ self, detections: list[tuple[str, float]]
814
+ ) -> tuple[str, float, dict[str, bool]]:
815
+ """Vote on field type from multiple detectors.
816
+
817
+ : Voting mechanism for field type.
818
+
819
+ Args:
820
+ detections: List of (field_type, confidence) tuples from detectors
821
+
822
+ Returns:
823
+ Tuple of (field_type, confidence, evidence_dict)
824
+ """
825
+ # Weight votes by confidence
826
+ votes: dict[str, float] = {}
827
+ evidence: dict[str, bool] = {}
828
+
829
+ detector_names = ["entropy", "patterns", "statistics"]
830
+
831
+ for i, (field_type, confidence) in enumerate(detections):
832
+ detector_name = detector_names[i] if i < len(detector_names) else f"detector_{i}"
833
+
834
+ if field_type not in votes:
835
+ votes[field_type] = 0.0
836
+
837
+ votes[field_type] += confidence
838
+
839
+ # Record evidence
840
+ evidence[f"{detector_name}_voted_{field_type}"] = True
841
+
842
+ # Find type with highest vote
843
+ if not votes:
844
+ return ("unknown", 0.0, evidence)
845
+
846
+ best_type = max(votes.items(), key=lambda x: x[1])
847
+ field_type = best_type[0]
848
+ total_confidence = best_type[1]
849
+
850
+ # Calculate total possible votes
851
+ total_possible = sum(conf for _, conf in detections)
852
+
853
+ # Normalize confidence as fraction of total possible votes
854
+ if total_possible > 0:
855
+ normalized_confidence = total_confidence / total_possible
856
+ else:
857
+ normalized_confidence = 0.0
858
+
859
+ return (field_type, normalized_confidence, evidence)
860
+
861
+ def detect_field_types(
862
+ self, messages: list[NDArray[np.uint8]], boundaries: list[int]
863
+ ) -> list[InferredField]:
864
+ """Classify field types based on value patterns.
865
+
866
+ : Field type classification.
867
+
868
+ Args:
869
+ messages: List of message arrays
870
+ boundaries: Field boundary offsets
871
+
872
+ Returns:
873
+ List of InferredField objects
874
+ """
875
+ fields = []
876
+
877
+ for i in range(len(boundaries)):
878
+ offset = boundaries[i]
879
+
880
+ # Determine field size
881
+ if i < len(boundaries) - 1:
882
+ size = boundaries[i + 1] - offset
883
+ else:
884
+ size = len(messages[0]) - offset
885
+
886
+ # Extract field values
887
+ values: list[int | tuple[int, ...]]
888
+ if size <= 4:
889
+ # Use integer representation for small fields
890
+ int_values: list[int] = []
891
+ for msg in messages:
892
+ if size == 1:
893
+ val_int = int(msg[offset])
894
+ elif size == 2:
895
+ val_int = int(msg[offset]) << 8 | int(msg[offset + 1])
896
+ elif size == 4:
897
+ val_int = (
898
+ int(msg[offset]) << 24
899
+ | int(msg[offset + 1]) << 16
900
+ | int(msg[offset + 2]) << 8
901
+ | int(msg[offset + 3])
902
+ )
903
+ else: # size == 3
904
+ val_int = (
905
+ int(msg[offset]) << 16
906
+ | int(msg[offset + 1]) << 8
907
+ | int(msg[offset + 2])
908
+ )
909
+ int_values.append(val_int)
910
+ values = list(int_values) # type: ignore[assignment]
911
+ else:
912
+ # For larger fields, use bytes
913
+ tuple_values: list[tuple[int, ...]] = []
914
+ for msg in messages:
915
+ val_tuple = tuple(int(b) for b in msg[offset : offset + size])
916
+ tuple_values.append(val_tuple)
917
+ values = list(tuple_values) # type: ignore[assignment]
918
+
919
+ # Calculate statistics
920
+ if size > 4:
921
+ # Bytes field - calculate entropy across all bytes
922
+ all_bytes_list: list[int] = []
923
+ for v in values:
924
+ if isinstance(v, tuple):
925
+ all_bytes_list.extend(v)
926
+ all_bytes = np.array(all_bytes_list, dtype=np.uint8)
927
+ entropy = self._calculate_entropy(all_bytes)
928
+ variance = float(np.var(all_bytes))
929
+ else:
930
+ entropy = self._calculate_entropy(np.array(values, dtype=np.int64))
931
+ variance = float(np.var(values))
932
+
933
+ # Classify field type
934
+ field_type, confidence = self._classify_field(values, offset, size, messages)
935
+
936
+ # Sample values (first 5)
937
+ sample_values = values[:5]
938
+
939
+ field_obj = InferredField(
940
+ name=f"field_{i}",
941
+ offset=offset,
942
+ size=size,
943
+ field_type=field_type, # type: ignore[arg-type]
944
+ entropy=float(entropy),
945
+ variance=float(variance),
946
+ confidence=confidence,
947
+ values_seen=sample_values,
948
+ )
949
+
950
+ fields.append(field_obj)
951
+
952
+ return fields
953
+
954
+ def find_dependencies(
955
+ self, messages: list[NDArray[np.uint8]], schema: MessageSchema
956
+ ) -> dict[str, str]:
957
+ """Find dependencies between fields (e.g., length->payload).
958
+
959
+ : Field dependency detection.
960
+
961
+ Args:
962
+ messages: List of message arrays
963
+ schema: Inferred message schema
964
+
965
+ Returns:
966
+ Dictionary mapping field names to dependency descriptions
967
+ """
968
+ dependencies = {}
969
+
970
+ # Check for length field dependencies
971
+ for field in schema.fields:
972
+ if field.field_type == "length":
973
+ # Check if any field size correlates with this length value
974
+ for msg in messages:
975
+ _length_val = self._extract_field_value(msg, field)
976
+ # Look for fields that might be variable length
977
+ # This is a simplified check
978
+ dependencies[field.name] = "Potential length indicator"
979
+
980
+ return dependencies
981
+
982
+ def _calculate_byte_entropy(self, messages: list[NDArray[np.uint8]], offset: int) -> float:
983
+ """Calculate entropy at byte offset across messages.
984
+
985
+ : Entropy calculation for boundary detection.
986
+
987
+ Args:
988
+ messages: List of message arrays
989
+ offset: Byte offset to analyze
990
+
991
+ Returns:
992
+ Shannon entropy in bits
993
+ """
994
+ values = [msg[offset] for msg in messages]
995
+ return float(self._calculate_entropy(np.array(values)))
996
+
997
+ def _calculate_entropy(self, values: NDArray[np.int_ | np.uint8]) -> float:
998
+ """Calculate Shannon entropy of values.
999
+
1000
+ Args:
1001
+ values: Array of values
1002
+
1003
+ Returns:
1004
+ Entropy in bits
1005
+ """
1006
+ if len(values) == 0:
1007
+ return 0.0
1008
+
1009
+ # Count frequencies
1010
+ _unique, counts = np.unique(values, return_counts=True)
1011
+ probabilities = counts / len(values)
1012
+
1013
+ # Calculate Shannon entropy
1014
+ entropy = -np.sum(probabilities * np.log2(probabilities + 1e-10))
1015
+ return float(entropy)
1016
+
1017
+ def _classify_field(
1018
+ self,
1019
+ values: list[int | tuple[int, ...]],
1020
+ offset: int,
1021
+ size: int,
1022
+ messages: list[NDArray[np.uint8]],
1023
+ ) -> tuple[str, float]:
1024
+ """Classify field type based on patterns.
1025
+
1026
+ : Field type classification logic.
1027
+
1028
+ Args:
1029
+ values: Field values across all messages
1030
+ offset: Field offset
1031
+ size: Field size
1032
+ messages: Original messages
1033
+
1034
+ Returns:
1035
+ Tuple of (field_type, confidence)
1036
+ """
1037
+ # Handle byte fields (larger than 4 bytes)
1038
+ if isinstance(values[0], tuple):
1039
+ # Check if all tuples are identical (truly constant)
1040
+ if len(set(values)) == 1:
1041
+ return ("constant", 1.0)
1042
+
1043
+ entropy = self._calculate_entropy(np.concatenate([np.array(v) for v in values]))
1044
+ if entropy < 1.0:
1045
+ return ("constant", 0.9)
1046
+ elif entropy > 7.0:
1047
+ return ("data", 0.6)
1048
+ else:
1049
+ return ("data", 0.5)
1050
+
1051
+ # Check for constant field
1052
+ if len(set(values)) == 1:
1053
+ return ("constant", 1.0)
1054
+
1055
+ # Check for counter field
1056
+ if not isinstance(values[0], tuple) and self._detect_counter_field( # type: ignore[misc, unreachable]
1057
+ [v for v in values if isinstance(v, int)]
1058
+ ):
1059
+ return ("counter", 0.9)
1060
+
1061
+ # Check for checksum (if near end of message)
1062
+ msg_len = len(messages[0])
1063
+ if offset + size >= msg_len - 4: # Within last 4 bytes
1064
+ if self._detect_checksum_field(messages, offset, size):
1065
+ return ("checksum", 0.8)
1066
+
1067
+ # Check for length field (small values, near start)
1068
+ if offset < 8 and size <= 2:
1069
+ if not isinstance(values[0], tuple): # type: ignore[unreachable]
1070
+ max_val = max(v for v in values if isinstance(v, int))
1071
+ if max_val < msg_len * 2: # Reasonable length value
1072
+ return ("length", 0.6)
1073
+
1074
+ # Check variance for classification
1075
+ variance = np.var(values)
1076
+ entropy = self._calculate_entropy(np.array(values))
1077
+
1078
+ if variance < 10:
1079
+ return ("constant", 0.6)
1080
+ elif entropy > 6.0:
1081
+ return ("data", 0.7)
1082
+ else:
1083
+ return ("unknown", 0.5)
1084
+
1085
+ def _detect_counter_field(self, values: list[int]) -> bool:
1086
+ """Check if values form a counter sequence.
1087
+
1088
+ : Counter field detection.
1089
+
1090
+ Args:
1091
+ values: List of integer values
1092
+
1093
+ Returns:
1094
+ True if values appear to be a counter
1095
+ """
1096
+ if len(values) < 3:
1097
+ return False
1098
+
1099
+ # Check for monotonic increase
1100
+ diffs = [values[i + 1] - values[i] for i in range(len(values) - 1)]
1101
+
1102
+ # Allow wrapping
1103
+ diffs_filtered = [d for d in diffs if d >= 0]
1104
+
1105
+ # Check if most differences are 1 (counter increments)
1106
+ if len(diffs_filtered) < len(diffs) * 0.7:
1107
+ return False
1108
+
1109
+ ones = sum(1 for d in diffs_filtered if d == 1)
1110
+ return ones >= len(diffs_filtered) * 0.7
1111
+
1112
+ def _detect_checksum_field(
1113
+ self, messages: list[NDArray[np.uint8]], field_offset: int, field_size: int
1114
+ ) -> bool:
1115
+ """Check if field is likely a checksum.
1116
+
1117
+ : Checksum field detection.
1118
+
1119
+ Args:
1120
+ messages: List of message arrays
1121
+ field_offset: Offset of potential checksum field
1122
+ field_size: Size of potential checksum field
1123
+
1124
+ Returns:
1125
+ True if field appears to be a checksum
1126
+ """
1127
+ if field_size not in [1, 2, 4]:
1128
+ return False
1129
+
1130
+ # Try simple XOR checksum
1131
+ for msg in messages[: min(5, len(messages))]:
1132
+ # Calculate XOR of all bytes before checksum
1133
+ xor_sum = 0
1134
+ for i in range(field_offset):
1135
+ xor_sum ^= int(msg[i])
1136
+
1137
+ # Extract checksum value
1138
+ if field_size == 1:
1139
+ checksum = int(msg[field_offset])
1140
+ elif field_size == 2:
1141
+ checksum = int(msg[field_offset]) << 8 | int(msg[field_offset + 1])
1142
+ else:
1143
+ checksum = (
1144
+ int(msg[field_offset]) << 24
1145
+ | int(msg[field_offset + 1]) << 16
1146
+ | int(msg[field_offset + 2]) << 8
1147
+ | int(msg[field_offset + 3])
1148
+ )
1149
+
1150
+ # For single-byte, compare
1151
+ if field_size == 1 and (xor_sum & 0xFF) == checksum:
1152
+ continue
1153
+ else:
1154
+ return False # Not a match
1155
+
1156
+ return True # All matched
1157
+
1158
+ def _estimate_header_size(self, fields: list[InferredField]) -> int:
1159
+ """Estimate header size from field patterns.
1160
+
1161
+ Args:
1162
+ fields: List of inferred fields
1163
+
1164
+ Returns:
1165
+ Estimated header size in bytes
1166
+ """
1167
+ # Look for transition from low-entropy to high-entropy
1168
+ for i, field in enumerate(fields):
1169
+ if field.field_type == "data" and field.entropy > 6.0:
1170
+ if i > 0:
1171
+ return field.offset
1172
+
1173
+ # Default: first 4 fields or 16 bytes
1174
+ if len(fields) >= 5:
1175
+ # Header includes first 4 fields, so return offset of 5th field
1176
+ return fields[4].offset
1177
+ elif len(fields) >= 4:
1178
+ # If exactly 4 fields, header is up to end of 4th field
1179
+ return fields[3].offset + fields[3].size
1180
+ elif fields:
1181
+ # Fewer than 4 fields - use offset of last field
1182
+ return min(16, fields[-1].offset)
1183
+ else:
1184
+ return 16
1185
+
1186
+ def _extract_field_value(self, msg: NDArray[np.uint8], field: InferredField) -> int:
1187
+ """Extract field value from message.
1188
+
1189
+ Args:
1190
+ msg: Message array
1191
+ field: Field definition
1192
+
1193
+ Returns:
1194
+ Field value as integer
1195
+ """
1196
+ if field.size == 1:
1197
+ return int(msg[field.offset])
1198
+ elif field.size == 2:
1199
+ return int(msg[field.offset]) << 8 | int(msg[field.offset + 1])
1200
+ elif field.size == 4:
1201
+ return (
1202
+ int(msg[field.offset]) << 24
1203
+ | int(msg[field.offset + 1]) << 16
1204
+ | int(msg[field.offset + 2]) << 8
1205
+ | int(msg[field.offset + 3])
1206
+ )
1207
+ else:
1208
+ # Return first byte for larger fields
1209
+ return int(msg[field.offset])
1210
+
1211
+
1212
+ def infer_format(messages: list[bytes | NDArray[np.uint8]], min_samples: int = 10) -> MessageSchema:
1213
+ """Convenience function for format inference.
1214
+
1215
+ : Top-level API for message format inference.
1216
+
1217
+ Args:
1218
+ messages: List of message samples (bytes or np.ndarray)
1219
+ min_samples: Minimum required samples
1220
+
1221
+ Returns:
1222
+ MessageSchema with inferred structure
1223
+ """
1224
+ inferrer = MessageFormatInferrer(min_samples=min_samples)
1225
+ return inferrer.infer_format(messages)
1226
+
1227
+
1228
+ def detect_field_types(
1229
+ messages: list[bytes | NDArray[np.uint8]] | bytes | NDArray[np.uint8],
1230
+ boundaries: list[int] | None = None,
1231
+ ) -> list[InferredField]:
1232
+ """Detect field types at boundaries.
1233
+
1234
+ : Field type detection.
1235
+
1236
+ Args:
1237
+ messages: List of message samples OR a single message
1238
+ boundaries: Field boundary offsets (auto-detected if not provided)
1239
+
1240
+ Returns:
1241
+ List of InferredField objects
1242
+
1243
+ Raises:
1244
+ ValueError: If message type is invalid.
1245
+ """
1246
+ inferrer = MessageFormatInferrer()
1247
+
1248
+ # Handle single message case - convert to list
1249
+ if isinstance(messages, (bytes, np.ndarray)):
1250
+ messages_list: list[bytes | NDArray[np.uint8]] = [messages]
1251
+ else:
1252
+ messages_list = messages
1253
+
1254
+ # Convert to arrays
1255
+ msg_arrays = []
1256
+ for msg in messages_list:
1257
+ if isinstance(msg, bytes):
1258
+ msg_arrays.append(np.frombuffer(msg, dtype=np.uint8))
1259
+ elif isinstance(msg, np.ndarray):
1260
+ msg_arrays.append(msg.astype(np.uint8))
1261
+ else:
1262
+ raise ValueError(f"Invalid message type: {type(msg)}")
1263
+
1264
+ # Auto-detect boundaries if not provided
1265
+ if boundaries is None:
1266
+ boundaries = inferrer.detect_field_boundaries(msg_arrays, method="combined")
1267
+
1268
+ return inferrer.detect_field_types(msg_arrays, boundaries)
1269
+
1270
+
1271
+ def find_dependencies(
1272
+ messages: list[bytes | NDArray[np.uint8]], schema: MessageSchema | None = None
1273
+ ) -> dict[str, str]:
1274
+ """Find field dependencies.
1275
+
1276
+ : Field dependency analysis.
1277
+
1278
+ Args:
1279
+ messages: List of message samples
1280
+ schema: Message schema (auto-inferred if not provided)
1281
+
1282
+ Returns:
1283
+ Dictionary of dependencies
1284
+
1285
+ Raises:
1286
+ ValueError: If message type is invalid.
1287
+ """
1288
+ inferrer = MessageFormatInferrer()
1289
+
1290
+ # Convert to arrays
1291
+ msg_arrays = []
1292
+ for msg in messages:
1293
+ if isinstance(msg, bytes):
1294
+ msg_arrays.append(np.frombuffer(msg, dtype=np.uint8))
1295
+ elif isinstance(msg, np.ndarray):
1296
+ msg_arrays.append(msg.astype(np.uint8))
1297
+ else:
1298
+ raise ValueError(f"Invalid message type: {type(msg)}")
1299
+
1300
+ # Auto-infer schema if not provided
1301
+ if schema is None:
1302
+ # Cast to expected type (msg_arrays contains only NDArray after conversion)
1303
+ schema = inferrer.infer_format(msg_arrays) # type: ignore[arg-type]
1304
+
1305
+ return inferrer.find_dependencies(msg_arrays, schema) # type: ignore[arg-type]