oscura 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (465) hide show
  1. oscura/__init__.py +813 -8
  2. oscura/__main__.py +392 -0
  3. oscura/analyzers/__init__.py +37 -0
  4. oscura/analyzers/digital/__init__.py +177 -0
  5. oscura/analyzers/digital/bus.py +691 -0
  6. oscura/analyzers/digital/clock.py +805 -0
  7. oscura/analyzers/digital/correlation.py +720 -0
  8. oscura/analyzers/digital/edges.py +632 -0
  9. oscura/analyzers/digital/extraction.py +413 -0
  10. oscura/analyzers/digital/quality.py +878 -0
  11. oscura/analyzers/digital/signal_quality.py +877 -0
  12. oscura/analyzers/digital/thresholds.py +708 -0
  13. oscura/analyzers/digital/timing.py +1104 -0
  14. oscura/analyzers/eye/__init__.py +46 -0
  15. oscura/analyzers/eye/diagram.py +434 -0
  16. oscura/analyzers/eye/metrics.py +555 -0
  17. oscura/analyzers/jitter/__init__.py +83 -0
  18. oscura/analyzers/jitter/ber.py +333 -0
  19. oscura/analyzers/jitter/decomposition.py +759 -0
  20. oscura/analyzers/jitter/measurements.py +413 -0
  21. oscura/analyzers/jitter/spectrum.py +220 -0
  22. oscura/analyzers/measurements.py +40 -0
  23. oscura/analyzers/packet/__init__.py +171 -0
  24. oscura/analyzers/packet/daq.py +1077 -0
  25. oscura/analyzers/packet/metrics.py +437 -0
  26. oscura/analyzers/packet/parser.py +327 -0
  27. oscura/analyzers/packet/payload.py +2156 -0
  28. oscura/analyzers/packet/payload_analysis.py +1312 -0
  29. oscura/analyzers/packet/payload_extraction.py +236 -0
  30. oscura/analyzers/packet/payload_patterns.py +670 -0
  31. oscura/analyzers/packet/stream.py +359 -0
  32. oscura/analyzers/patterns/__init__.py +266 -0
  33. oscura/analyzers/patterns/clustering.py +1036 -0
  34. oscura/analyzers/patterns/discovery.py +539 -0
  35. oscura/analyzers/patterns/learning.py +797 -0
  36. oscura/analyzers/patterns/matching.py +1091 -0
  37. oscura/analyzers/patterns/periodic.py +650 -0
  38. oscura/analyzers/patterns/sequences.py +767 -0
  39. oscura/analyzers/power/__init__.py +116 -0
  40. oscura/analyzers/power/ac_power.py +391 -0
  41. oscura/analyzers/power/basic.py +383 -0
  42. oscura/analyzers/power/conduction.py +314 -0
  43. oscura/analyzers/power/efficiency.py +297 -0
  44. oscura/analyzers/power/ripple.py +356 -0
  45. oscura/analyzers/power/soa.py +372 -0
  46. oscura/analyzers/power/switching.py +479 -0
  47. oscura/analyzers/protocol/__init__.py +150 -0
  48. oscura/analyzers/protocols/__init__.py +150 -0
  49. oscura/analyzers/protocols/base.py +500 -0
  50. oscura/analyzers/protocols/can.py +620 -0
  51. oscura/analyzers/protocols/can_fd.py +448 -0
  52. oscura/analyzers/protocols/flexray.py +405 -0
  53. oscura/analyzers/protocols/hdlc.py +399 -0
  54. oscura/analyzers/protocols/i2c.py +368 -0
  55. oscura/analyzers/protocols/i2s.py +296 -0
  56. oscura/analyzers/protocols/jtag.py +393 -0
  57. oscura/analyzers/protocols/lin.py +445 -0
  58. oscura/analyzers/protocols/manchester.py +333 -0
  59. oscura/analyzers/protocols/onewire.py +501 -0
  60. oscura/analyzers/protocols/spi.py +334 -0
  61. oscura/analyzers/protocols/swd.py +325 -0
  62. oscura/analyzers/protocols/uart.py +393 -0
  63. oscura/analyzers/protocols/usb.py +495 -0
  64. oscura/analyzers/signal_integrity/__init__.py +63 -0
  65. oscura/analyzers/signal_integrity/embedding.py +294 -0
  66. oscura/analyzers/signal_integrity/equalization.py +370 -0
  67. oscura/analyzers/signal_integrity/sparams.py +484 -0
  68. oscura/analyzers/spectral/__init__.py +53 -0
  69. oscura/analyzers/spectral/chunked.py +273 -0
  70. oscura/analyzers/spectral/chunked_fft.py +571 -0
  71. oscura/analyzers/spectral/chunked_wavelet.py +391 -0
  72. oscura/analyzers/spectral/fft.py +92 -0
  73. oscura/analyzers/statistical/__init__.py +250 -0
  74. oscura/analyzers/statistical/checksum.py +923 -0
  75. oscura/analyzers/statistical/chunked_corr.py +228 -0
  76. oscura/analyzers/statistical/classification.py +778 -0
  77. oscura/analyzers/statistical/entropy.py +1113 -0
  78. oscura/analyzers/statistical/ngrams.py +614 -0
  79. oscura/analyzers/statistics/__init__.py +119 -0
  80. oscura/analyzers/statistics/advanced.py +885 -0
  81. oscura/analyzers/statistics/basic.py +263 -0
  82. oscura/analyzers/statistics/correlation.py +630 -0
  83. oscura/analyzers/statistics/distribution.py +298 -0
  84. oscura/analyzers/statistics/outliers.py +463 -0
  85. oscura/analyzers/statistics/streaming.py +93 -0
  86. oscura/analyzers/statistics/trend.py +520 -0
  87. oscura/analyzers/validation.py +598 -0
  88. oscura/analyzers/waveform/__init__.py +36 -0
  89. oscura/analyzers/waveform/measurements.py +943 -0
  90. oscura/analyzers/waveform/measurements_with_uncertainty.py +371 -0
  91. oscura/analyzers/waveform/spectral.py +1689 -0
  92. oscura/analyzers/waveform/wavelets.py +298 -0
  93. oscura/api/__init__.py +62 -0
  94. oscura/api/dsl.py +538 -0
  95. oscura/api/fluent.py +571 -0
  96. oscura/api/operators.py +498 -0
  97. oscura/api/optimization.py +392 -0
  98. oscura/api/profiling.py +396 -0
  99. oscura/automotive/__init__.py +73 -0
  100. oscura/automotive/can/__init__.py +52 -0
  101. oscura/automotive/can/analysis.py +356 -0
  102. oscura/automotive/can/checksum.py +250 -0
  103. oscura/automotive/can/correlation.py +212 -0
  104. oscura/automotive/can/discovery.py +355 -0
  105. oscura/automotive/can/message_wrapper.py +375 -0
  106. oscura/automotive/can/models.py +385 -0
  107. oscura/automotive/can/patterns.py +381 -0
  108. oscura/automotive/can/session.py +452 -0
  109. oscura/automotive/can/state_machine.py +300 -0
  110. oscura/automotive/can/stimulus_response.py +461 -0
  111. oscura/automotive/dbc/__init__.py +15 -0
  112. oscura/automotive/dbc/generator.py +156 -0
  113. oscura/automotive/dbc/parser.py +146 -0
  114. oscura/automotive/dtc/__init__.py +30 -0
  115. oscura/automotive/dtc/database.py +3036 -0
  116. oscura/automotive/j1939/__init__.py +14 -0
  117. oscura/automotive/j1939/decoder.py +745 -0
  118. oscura/automotive/loaders/__init__.py +35 -0
  119. oscura/automotive/loaders/asc.py +98 -0
  120. oscura/automotive/loaders/blf.py +77 -0
  121. oscura/automotive/loaders/csv_can.py +136 -0
  122. oscura/automotive/loaders/dispatcher.py +136 -0
  123. oscura/automotive/loaders/mdf.py +331 -0
  124. oscura/automotive/loaders/pcap.py +132 -0
  125. oscura/automotive/obd/__init__.py +14 -0
  126. oscura/automotive/obd/decoder.py +707 -0
  127. oscura/automotive/uds/__init__.py +48 -0
  128. oscura/automotive/uds/decoder.py +265 -0
  129. oscura/automotive/uds/models.py +64 -0
  130. oscura/automotive/visualization.py +369 -0
  131. oscura/batch/__init__.py +55 -0
  132. oscura/batch/advanced.py +627 -0
  133. oscura/batch/aggregate.py +300 -0
  134. oscura/batch/analyze.py +139 -0
  135. oscura/batch/logging.py +487 -0
  136. oscura/batch/metrics.py +556 -0
  137. oscura/builders/__init__.py +41 -0
  138. oscura/builders/signal_builder.py +1131 -0
  139. oscura/cli/__init__.py +14 -0
  140. oscura/cli/batch.py +339 -0
  141. oscura/cli/characterize.py +273 -0
  142. oscura/cli/compare.py +775 -0
  143. oscura/cli/decode.py +551 -0
  144. oscura/cli/main.py +247 -0
  145. oscura/cli/shell.py +350 -0
  146. oscura/comparison/__init__.py +66 -0
  147. oscura/comparison/compare.py +397 -0
  148. oscura/comparison/golden.py +487 -0
  149. oscura/comparison/limits.py +391 -0
  150. oscura/comparison/mask.py +434 -0
  151. oscura/comparison/trace_diff.py +30 -0
  152. oscura/comparison/visualization.py +481 -0
  153. oscura/compliance/__init__.py +70 -0
  154. oscura/compliance/advanced.py +756 -0
  155. oscura/compliance/masks.py +363 -0
  156. oscura/compliance/reporting.py +483 -0
  157. oscura/compliance/testing.py +298 -0
  158. oscura/component/__init__.py +38 -0
  159. oscura/component/impedance.py +365 -0
  160. oscura/component/reactive.py +598 -0
  161. oscura/component/transmission_line.py +312 -0
  162. oscura/config/__init__.py +191 -0
  163. oscura/config/defaults.py +254 -0
  164. oscura/config/loader.py +348 -0
  165. oscura/config/memory.py +271 -0
  166. oscura/config/migration.py +458 -0
  167. oscura/config/pipeline.py +1077 -0
  168. oscura/config/preferences.py +530 -0
  169. oscura/config/protocol.py +875 -0
  170. oscura/config/schema.py +713 -0
  171. oscura/config/settings.py +420 -0
  172. oscura/config/thresholds.py +599 -0
  173. oscura/convenience.py +457 -0
  174. oscura/core/__init__.py +299 -0
  175. oscura/core/audit.py +457 -0
  176. oscura/core/backend_selector.py +405 -0
  177. oscura/core/cache.py +590 -0
  178. oscura/core/cancellation.py +439 -0
  179. oscura/core/confidence.py +225 -0
  180. oscura/core/config.py +506 -0
  181. oscura/core/correlation.py +216 -0
  182. oscura/core/cross_domain.py +422 -0
  183. oscura/core/debug.py +301 -0
  184. oscura/core/edge_cases.py +541 -0
  185. oscura/core/exceptions.py +535 -0
  186. oscura/core/gpu_backend.py +523 -0
  187. oscura/core/lazy.py +832 -0
  188. oscura/core/log_query.py +540 -0
  189. oscura/core/logging.py +931 -0
  190. oscura/core/logging_advanced.py +952 -0
  191. oscura/core/memoize.py +171 -0
  192. oscura/core/memory_check.py +274 -0
  193. oscura/core/memory_guard.py +290 -0
  194. oscura/core/memory_limits.py +336 -0
  195. oscura/core/memory_monitor.py +453 -0
  196. oscura/core/memory_progress.py +465 -0
  197. oscura/core/memory_warnings.py +315 -0
  198. oscura/core/numba_backend.py +362 -0
  199. oscura/core/performance.py +352 -0
  200. oscura/core/progress.py +524 -0
  201. oscura/core/provenance.py +358 -0
  202. oscura/core/results.py +331 -0
  203. oscura/core/types.py +504 -0
  204. oscura/core/uncertainty.py +383 -0
  205. oscura/discovery/__init__.py +52 -0
  206. oscura/discovery/anomaly_detector.py +672 -0
  207. oscura/discovery/auto_decoder.py +415 -0
  208. oscura/discovery/comparison.py +497 -0
  209. oscura/discovery/quality_validator.py +528 -0
  210. oscura/discovery/signal_detector.py +769 -0
  211. oscura/dsl/__init__.py +73 -0
  212. oscura/dsl/commands.py +246 -0
  213. oscura/dsl/interpreter.py +455 -0
  214. oscura/dsl/parser.py +689 -0
  215. oscura/dsl/repl.py +172 -0
  216. oscura/exceptions.py +59 -0
  217. oscura/exploratory/__init__.py +111 -0
  218. oscura/exploratory/error_recovery.py +642 -0
  219. oscura/exploratory/fuzzy.py +513 -0
  220. oscura/exploratory/fuzzy_advanced.py +786 -0
  221. oscura/exploratory/legacy.py +831 -0
  222. oscura/exploratory/parse.py +358 -0
  223. oscura/exploratory/recovery.py +275 -0
  224. oscura/exploratory/sync.py +382 -0
  225. oscura/exploratory/unknown.py +707 -0
  226. oscura/export/__init__.py +25 -0
  227. oscura/export/wireshark/README.md +265 -0
  228. oscura/export/wireshark/__init__.py +47 -0
  229. oscura/export/wireshark/generator.py +312 -0
  230. oscura/export/wireshark/lua_builder.py +159 -0
  231. oscura/export/wireshark/templates/dissector.lua.j2 +92 -0
  232. oscura/export/wireshark/type_mapping.py +165 -0
  233. oscura/export/wireshark/validator.py +105 -0
  234. oscura/exporters/__init__.py +94 -0
  235. oscura/exporters/csv.py +303 -0
  236. oscura/exporters/exporters.py +44 -0
  237. oscura/exporters/hdf5.py +219 -0
  238. oscura/exporters/html_export.py +701 -0
  239. oscura/exporters/json_export.py +291 -0
  240. oscura/exporters/markdown_export.py +367 -0
  241. oscura/exporters/matlab_export.py +354 -0
  242. oscura/exporters/npz_export.py +219 -0
  243. oscura/exporters/spice_export.py +210 -0
  244. oscura/extensibility/__init__.py +131 -0
  245. oscura/extensibility/docs.py +752 -0
  246. oscura/extensibility/extensions.py +1125 -0
  247. oscura/extensibility/logging.py +259 -0
  248. oscura/extensibility/measurements.py +485 -0
  249. oscura/extensibility/plugins.py +414 -0
  250. oscura/extensibility/registry.py +346 -0
  251. oscura/extensibility/templates.py +913 -0
  252. oscura/extensibility/validation.py +651 -0
  253. oscura/filtering/__init__.py +89 -0
  254. oscura/filtering/base.py +563 -0
  255. oscura/filtering/convenience.py +564 -0
  256. oscura/filtering/design.py +725 -0
  257. oscura/filtering/filters.py +32 -0
  258. oscura/filtering/introspection.py +605 -0
  259. oscura/guidance/__init__.py +24 -0
  260. oscura/guidance/recommender.py +429 -0
  261. oscura/guidance/wizard.py +518 -0
  262. oscura/inference/__init__.py +251 -0
  263. oscura/inference/active_learning/README.md +153 -0
  264. oscura/inference/active_learning/__init__.py +38 -0
  265. oscura/inference/active_learning/lstar.py +257 -0
  266. oscura/inference/active_learning/observation_table.py +230 -0
  267. oscura/inference/active_learning/oracle.py +78 -0
  268. oscura/inference/active_learning/teachers/__init__.py +15 -0
  269. oscura/inference/active_learning/teachers/simulator.py +192 -0
  270. oscura/inference/adaptive_tuning.py +453 -0
  271. oscura/inference/alignment.py +653 -0
  272. oscura/inference/bayesian.py +943 -0
  273. oscura/inference/binary.py +1016 -0
  274. oscura/inference/crc_reverse.py +711 -0
  275. oscura/inference/logic.py +288 -0
  276. oscura/inference/message_format.py +1305 -0
  277. oscura/inference/protocol.py +417 -0
  278. oscura/inference/protocol_dsl.py +1084 -0
  279. oscura/inference/protocol_library.py +1230 -0
  280. oscura/inference/sequences.py +809 -0
  281. oscura/inference/signal_intelligence.py +1509 -0
  282. oscura/inference/spectral.py +215 -0
  283. oscura/inference/state_machine.py +634 -0
  284. oscura/inference/stream.py +918 -0
  285. oscura/integrations/__init__.py +59 -0
  286. oscura/integrations/llm.py +1827 -0
  287. oscura/jupyter/__init__.py +32 -0
  288. oscura/jupyter/display.py +268 -0
  289. oscura/jupyter/magic.py +334 -0
  290. oscura/loaders/__init__.py +526 -0
  291. oscura/loaders/binary.py +69 -0
  292. oscura/loaders/configurable.py +1255 -0
  293. oscura/loaders/csv.py +26 -0
  294. oscura/loaders/csv_loader.py +473 -0
  295. oscura/loaders/hdf5.py +9 -0
  296. oscura/loaders/hdf5_loader.py +510 -0
  297. oscura/loaders/lazy.py +370 -0
  298. oscura/loaders/mmap_loader.py +583 -0
  299. oscura/loaders/numpy_loader.py +436 -0
  300. oscura/loaders/pcap.py +432 -0
  301. oscura/loaders/preprocessing.py +368 -0
  302. oscura/loaders/rigol.py +287 -0
  303. oscura/loaders/sigrok.py +321 -0
  304. oscura/loaders/tdms.py +367 -0
  305. oscura/loaders/tektronix.py +711 -0
  306. oscura/loaders/validation.py +584 -0
  307. oscura/loaders/vcd.py +464 -0
  308. oscura/loaders/wav.py +233 -0
  309. oscura/math/__init__.py +45 -0
  310. oscura/math/arithmetic.py +824 -0
  311. oscura/math/interpolation.py +413 -0
  312. oscura/onboarding/__init__.py +39 -0
  313. oscura/onboarding/help.py +498 -0
  314. oscura/onboarding/tutorials.py +405 -0
  315. oscura/onboarding/wizard.py +466 -0
  316. oscura/optimization/__init__.py +19 -0
  317. oscura/optimization/parallel.py +440 -0
  318. oscura/optimization/search.py +532 -0
  319. oscura/pipeline/__init__.py +43 -0
  320. oscura/pipeline/base.py +338 -0
  321. oscura/pipeline/composition.py +242 -0
  322. oscura/pipeline/parallel.py +448 -0
  323. oscura/pipeline/pipeline.py +375 -0
  324. oscura/pipeline/reverse_engineering.py +1119 -0
  325. oscura/plugins/__init__.py +122 -0
  326. oscura/plugins/base.py +272 -0
  327. oscura/plugins/cli.py +497 -0
  328. oscura/plugins/discovery.py +411 -0
  329. oscura/plugins/isolation.py +418 -0
  330. oscura/plugins/lifecycle.py +959 -0
  331. oscura/plugins/manager.py +493 -0
  332. oscura/plugins/registry.py +421 -0
  333. oscura/plugins/versioning.py +372 -0
  334. oscura/py.typed +0 -0
  335. oscura/quality/__init__.py +65 -0
  336. oscura/quality/ensemble.py +740 -0
  337. oscura/quality/explainer.py +338 -0
  338. oscura/quality/scoring.py +616 -0
  339. oscura/quality/warnings.py +456 -0
  340. oscura/reporting/__init__.py +248 -0
  341. oscura/reporting/advanced.py +1234 -0
  342. oscura/reporting/analyze.py +448 -0
  343. oscura/reporting/argument_preparer.py +596 -0
  344. oscura/reporting/auto_report.py +507 -0
  345. oscura/reporting/batch.py +615 -0
  346. oscura/reporting/chart_selection.py +223 -0
  347. oscura/reporting/comparison.py +330 -0
  348. oscura/reporting/config.py +615 -0
  349. oscura/reporting/content/__init__.py +39 -0
  350. oscura/reporting/content/executive.py +127 -0
  351. oscura/reporting/content/filtering.py +191 -0
  352. oscura/reporting/content/minimal.py +257 -0
  353. oscura/reporting/content/verbosity.py +162 -0
  354. oscura/reporting/core.py +508 -0
  355. oscura/reporting/core_formats/__init__.py +17 -0
  356. oscura/reporting/core_formats/multi_format.py +210 -0
  357. oscura/reporting/engine.py +836 -0
  358. oscura/reporting/export.py +366 -0
  359. oscura/reporting/formatting/__init__.py +129 -0
  360. oscura/reporting/formatting/emphasis.py +81 -0
  361. oscura/reporting/formatting/numbers.py +403 -0
  362. oscura/reporting/formatting/standards.py +55 -0
  363. oscura/reporting/formatting.py +466 -0
  364. oscura/reporting/html.py +578 -0
  365. oscura/reporting/index.py +590 -0
  366. oscura/reporting/multichannel.py +296 -0
  367. oscura/reporting/output.py +379 -0
  368. oscura/reporting/pdf.py +373 -0
  369. oscura/reporting/plots.py +731 -0
  370. oscura/reporting/pptx_export.py +360 -0
  371. oscura/reporting/renderers/__init__.py +11 -0
  372. oscura/reporting/renderers/pdf.py +94 -0
  373. oscura/reporting/sections.py +471 -0
  374. oscura/reporting/standards.py +680 -0
  375. oscura/reporting/summary_generator.py +368 -0
  376. oscura/reporting/tables.py +397 -0
  377. oscura/reporting/template_system.py +724 -0
  378. oscura/reporting/templates/__init__.py +15 -0
  379. oscura/reporting/templates/definition.py +205 -0
  380. oscura/reporting/templates/index.html +649 -0
  381. oscura/reporting/templates/index.md +173 -0
  382. oscura/schemas/__init__.py +158 -0
  383. oscura/schemas/bus_configuration.json +322 -0
  384. oscura/schemas/device_mapping.json +182 -0
  385. oscura/schemas/packet_format.json +418 -0
  386. oscura/schemas/protocol_definition.json +363 -0
  387. oscura/search/__init__.py +16 -0
  388. oscura/search/anomaly.py +292 -0
  389. oscura/search/context.py +149 -0
  390. oscura/search/pattern.py +160 -0
  391. oscura/session/__init__.py +34 -0
  392. oscura/session/annotations.py +289 -0
  393. oscura/session/history.py +313 -0
  394. oscura/session/session.py +445 -0
  395. oscura/streaming/__init__.py +43 -0
  396. oscura/streaming/chunked.py +611 -0
  397. oscura/streaming/progressive.py +393 -0
  398. oscura/streaming/realtime.py +622 -0
  399. oscura/testing/__init__.py +54 -0
  400. oscura/testing/synthetic.py +808 -0
  401. oscura/triggering/__init__.py +68 -0
  402. oscura/triggering/base.py +229 -0
  403. oscura/triggering/edge.py +353 -0
  404. oscura/triggering/pattern.py +344 -0
  405. oscura/triggering/pulse.py +581 -0
  406. oscura/triggering/window.py +453 -0
  407. oscura/ui/__init__.py +48 -0
  408. oscura/ui/formatters.py +526 -0
  409. oscura/ui/progressive_display.py +340 -0
  410. oscura/utils/__init__.py +99 -0
  411. oscura/utils/autodetect.py +338 -0
  412. oscura/utils/buffer.py +389 -0
  413. oscura/utils/lazy.py +407 -0
  414. oscura/utils/lazy_imports.py +147 -0
  415. oscura/utils/memory.py +836 -0
  416. oscura/utils/memory_advanced.py +1326 -0
  417. oscura/utils/memory_extensions.py +465 -0
  418. oscura/utils/progressive.py +352 -0
  419. oscura/utils/windowing.py +362 -0
  420. oscura/visualization/__init__.py +321 -0
  421. oscura/visualization/accessibility.py +526 -0
  422. oscura/visualization/annotations.py +374 -0
  423. oscura/visualization/axis_scaling.py +305 -0
  424. oscura/visualization/colors.py +453 -0
  425. oscura/visualization/digital.py +337 -0
  426. oscura/visualization/eye.py +420 -0
  427. oscura/visualization/histogram.py +281 -0
  428. oscura/visualization/interactive.py +858 -0
  429. oscura/visualization/jitter.py +702 -0
  430. oscura/visualization/keyboard.py +394 -0
  431. oscura/visualization/layout.py +365 -0
  432. oscura/visualization/optimization.py +1028 -0
  433. oscura/visualization/palettes.py +446 -0
  434. oscura/visualization/plot.py +92 -0
  435. oscura/visualization/power.py +290 -0
  436. oscura/visualization/power_extended.py +626 -0
  437. oscura/visualization/presets.py +467 -0
  438. oscura/visualization/protocols.py +932 -0
  439. oscura/visualization/render.py +207 -0
  440. oscura/visualization/rendering.py +444 -0
  441. oscura/visualization/reverse_engineering.py +791 -0
  442. oscura/visualization/signal_integrity.py +808 -0
  443. oscura/visualization/specialized.py +553 -0
  444. oscura/visualization/spectral.py +811 -0
  445. oscura/visualization/styles.py +381 -0
  446. oscura/visualization/thumbnails.py +311 -0
  447. oscura/visualization/time_axis.py +351 -0
  448. oscura/visualization/waveform.py +367 -0
  449. oscura/workflow/__init__.py +13 -0
  450. oscura/workflow/dag.py +377 -0
  451. oscura/workflows/__init__.py +58 -0
  452. oscura/workflows/compliance.py +280 -0
  453. oscura/workflows/digital.py +272 -0
  454. oscura/workflows/multi_trace.py +502 -0
  455. oscura/workflows/power.py +178 -0
  456. oscura/workflows/protocol.py +492 -0
  457. oscura/workflows/reverse_engineering.py +639 -0
  458. oscura/workflows/signal_integrity.py +227 -0
  459. oscura-0.1.0.dist-info/METADATA +300 -0
  460. oscura-0.1.0.dist-info/RECORD +463 -0
  461. oscura-0.1.0.dist-info/entry_points.txt +2 -0
  462. {oscura-0.0.1.dist-info → oscura-0.1.0.dist-info}/licenses/LICENSE +1 -1
  463. oscura-0.0.1.dist-info/METADATA +0 -63
  464. oscura-0.0.1.dist-info/RECORD +0 -5
  465. {oscura-0.0.1.dist-info → oscura-0.1.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,1312 @@
1
+ """Payload field inference and comparison analysis.
2
+
3
+ RE-PAY-004: Payload Field Inference
4
+ RE-PAY-005: Payload Comparison and Differential Analysis
5
+
6
+ This module provides field structure inference, payload comparison,
7
+ similarity computation, and clustering for binary payloads.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import logging
13
+ import struct
14
+ from collections import Counter
15
+ from collections.abc import Sequence
16
+ from dataclasses import dataclass, field
17
+ from typing import TYPE_CHECKING, Any, Literal
18
+
19
+ import numpy as np
20
+
21
+ if TYPE_CHECKING:
22
+ from oscura.analyzers.packet.payload_extraction import PayloadInfo
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ # =============================================================================
28
+ # RE-PAY-004: Field Inference Data Classes
29
+ # =============================================================================
30
+
31
+
32
+ @dataclass
33
+ class InferredField:
34
+ """Inferred field from binary payload.
35
+
36
+ Implements RE-PAY-004: Inferred field structure.
37
+
38
+ Attributes:
39
+ name: Field name (auto-generated).
40
+ offset: Byte offset within message.
41
+ size: Field size in bytes.
42
+ inferred_type: Inferred data type.
43
+ endianness: Detected endianness.
44
+ is_constant: Whether field is constant across messages.
45
+ is_sequence: Whether field appears to be a counter/sequence.
46
+ is_checksum: Whether field appears to be a checksum.
47
+ constant_value: Value if constant.
48
+ confidence: Inference confidence (0-1).
49
+ sample_values: Sample values from messages.
50
+ """
51
+
52
+ name: str
53
+ offset: int
54
+ size: int
55
+ inferred_type: Literal[
56
+ "uint8",
57
+ "uint16",
58
+ "uint32",
59
+ "uint64",
60
+ "int8",
61
+ "int16",
62
+ "int32",
63
+ "int64",
64
+ "float32",
65
+ "float64",
66
+ "bytes",
67
+ "string",
68
+ "unknown",
69
+ ]
70
+ endianness: Literal["big", "little", "n/a"] = "n/a"
71
+ is_constant: bool = False
72
+ is_sequence: bool = False
73
+ is_checksum: bool = False
74
+ constant_value: bytes | None = None
75
+ confidence: float = 0.5
76
+ sample_values: list[Any] = field(default_factory=list)
77
+
78
+
79
+ @dataclass
80
+ class MessageSchema:
81
+ """Inferred message schema.
82
+
83
+ Implements RE-PAY-004: Complete message schema.
84
+
85
+ Attributes:
86
+ fields: List of inferred fields.
87
+ message_length: Total message length.
88
+ fixed_length: Whether all messages have same length.
89
+ length_range: (min, max) length range.
90
+ sample_count: Number of samples analyzed.
91
+ confidence: Overall schema confidence.
92
+ """
93
+
94
+ fields: list[InferredField]
95
+ message_length: int
96
+ fixed_length: bool
97
+ length_range: tuple[int, int]
98
+ sample_count: int
99
+ confidence: float
100
+
101
+
102
+ # =============================================================================
103
+ # RE-PAY-005: Comparison Data Classes
104
+ # =============================================================================
105
+
106
+
107
+ @dataclass
108
+ class PayloadDiff:
109
+ """Difference between two payloads.
110
+
111
+ Implements RE-PAY-005: Payload comparison result.
112
+
113
+ Attributes:
114
+ common_prefix_length: Length of common prefix.
115
+ common_suffix_length: Length of common suffix.
116
+ differences: List of (offset, byte_a, byte_b) for differences.
117
+ similarity: Similarity score (0-1).
118
+ edit_distance: Levenshtein edit distance.
119
+ """
120
+
121
+ common_prefix_length: int
122
+ common_suffix_length: int
123
+ differences: list[tuple[int, int, int]]
124
+ similarity: float
125
+ edit_distance: int
126
+
127
+
128
+ @dataclass
129
+ class VariablePositions:
130
+ """Analysis of which byte positions vary across payloads.
131
+
132
+ Implements RE-PAY-005: Variable position analysis.
133
+
134
+ Attributes:
135
+ constant_positions: Positions that are constant.
136
+ variable_positions: Positions that vary.
137
+ constant_values: Values at constant positions.
138
+ variance_by_position: Variance at each position.
139
+ """
140
+
141
+ constant_positions: list[int]
142
+ variable_positions: list[int]
143
+ constant_values: dict[int, int]
144
+ variance_by_position: np.ndarray[tuple[int], np.dtype[np.float64]]
145
+
146
+
147
+ @dataclass
148
+ class PayloadCluster:
149
+ """Cluster of similar payloads.
150
+
151
+ Implements RE-PAY-005: Payload clustering result.
152
+
153
+ Attributes:
154
+ cluster_id: Cluster identifier.
155
+ payloads: List of payload data in cluster.
156
+ indices: Original indices of payloads.
157
+ representative: Representative payload (centroid).
158
+ size: Number of payloads in cluster.
159
+ """
160
+
161
+ cluster_id: int
162
+ payloads: list[bytes]
163
+ indices: list[int]
164
+ representative: bytes
165
+ size: int
166
+
167
+
168
+ # =============================================================================
169
+ # RE-PAY-004: Field Inference Class
170
+ # =============================================================================
171
+
172
+
173
+ class FieldInferrer:
174
+ """Infer field structure within binary payloads.
175
+
176
+ Implements RE-PAY-004: Payload Field Inference.
177
+
178
+ Uses statistical analysis, alignment detection, and type inference
179
+ to reconstruct message formats from binary payload samples.
180
+
181
+ Example:
182
+ >>> inferrer = FieldInferrer()
183
+ >>> messages = [pkt.data for pkt in udp_packets]
184
+ >>> schema = inferrer.infer_fields(messages)
185
+ >>> for field in schema.fields:
186
+ ... print(f"{field.name}: {field.inferred_type} at offset {field.offset}")
187
+ """
188
+
189
+ def __init__(
190
+ self,
191
+ min_samples: int = 10,
192
+ entropy_threshold: float = 0.5,
193
+ sequence_threshold: int = 3,
194
+ ) -> None:
195
+ """Initialize field inferrer.
196
+
197
+ Args:
198
+ min_samples: Minimum samples for reliable inference.
199
+ entropy_threshold: Entropy change threshold for boundary detection.
200
+ sequence_threshold: Minimum consecutive incrementing values for sequence.
201
+ """
202
+ self.min_samples = min_samples
203
+ self.entropy_threshold = entropy_threshold
204
+ self.sequence_threshold = sequence_threshold
205
+
206
+ def infer_fields(
207
+ self,
208
+ messages: Sequence[bytes],
209
+ min_samples: int | None = None,
210
+ ) -> MessageSchema:
211
+ """Infer field structure from message samples.
212
+
213
+ Implements RE-PAY-004: Complete field inference.
214
+
215
+ Args:
216
+ messages: List of binary message samples.
217
+ min_samples: Override minimum sample count.
218
+
219
+ Returns:
220
+ MessageSchema with inferred field structure.
221
+
222
+ Example:
223
+ >>> schema = inferrer.infer_fields(messages)
224
+ >>> print(f"Detected {len(schema.fields)} fields")
225
+ """
226
+ if not messages:
227
+ return MessageSchema(
228
+ fields=[],
229
+ message_length=0,
230
+ fixed_length=True,
231
+ length_range=(0, 0),
232
+ sample_count=0,
233
+ confidence=0.0,
234
+ )
235
+
236
+ min_samples = min_samples or self.min_samples
237
+ lengths = [len(m) for m in messages]
238
+ min_len = min(lengths)
239
+ max_len = max(lengths)
240
+ fixed_length = min_len == max_len
241
+
242
+ # Use shortest message length for analysis
243
+ analysis_length = min_len
244
+
245
+ # Find field boundaries using entropy transitions
246
+ boundaries = self._detect_field_boundaries(messages, analysis_length)
247
+
248
+ # Infer field types for each segment
249
+ fields = []
250
+ for i, (start, end) in enumerate(boundaries):
251
+ field = self._infer_field(messages, start, end, i)
252
+ fields.append(field)
253
+
254
+ # Calculate overall confidence
255
+ if fields:
256
+ confidence = sum(f.confidence for f in fields) / len(fields)
257
+ else:
258
+ confidence = 0.0
259
+
260
+ return MessageSchema(
261
+ fields=fields,
262
+ message_length=analysis_length,
263
+ fixed_length=fixed_length,
264
+ length_range=(min_len, max_len),
265
+ sample_count=len(messages),
266
+ confidence=confidence,
267
+ )
268
+
269
+ def detect_field_types(
270
+ self,
271
+ messages: Sequence[bytes],
272
+ boundaries: list[tuple[int, int]],
273
+ ) -> list[InferredField]:
274
+ """Detect field types for given boundaries.
275
+
276
+ Implements RE-PAY-004: Field type detection.
277
+
278
+ Args:
279
+ messages: Message samples.
280
+ boundaries: List of (start, end) field boundaries.
281
+
282
+ Returns:
283
+ List of InferredField with type information.
284
+ """
285
+ fields = []
286
+ for i, (start, end) in enumerate(boundaries):
287
+ field = self._infer_field(messages, start, end, i)
288
+ fields.append(field)
289
+ return fields
290
+
291
+ def find_sequence_fields(
292
+ self,
293
+ messages: Sequence[bytes],
294
+ ) -> list[tuple[int, int]]:
295
+ """Find fields that appear to be sequence/counter values.
296
+
297
+ Implements RE-PAY-004: Sequence field detection.
298
+
299
+ Args:
300
+ messages: Message samples (should be in order).
301
+
302
+ Returns:
303
+ List of (offset, size) for sequence fields.
304
+
305
+ Raises:
306
+ ValueError: If a message is too short for the expected field size.
307
+ """
308
+ if len(messages) < self.sequence_threshold:
309
+ return []
310
+
311
+ min_len = min(len(m) for m in messages)
312
+ sequence_fields = []
313
+
314
+ # Check each possible field size at each offset
315
+ for size in [1, 2, 4]:
316
+ for offset in range(min_len - size + 1):
317
+ values = []
318
+ try:
319
+ for msg in messages:
320
+ # Validate message length before slicing
321
+ if len(msg) < offset + size:
322
+ raise ValueError(
323
+ f"Message too short: expected at least {offset + size} bytes, "
324
+ f"got {len(msg)} bytes"
325
+ )
326
+ # Try both endianness
327
+ val_be = int.from_bytes(msg[offset : offset + size], "big")
328
+ values.append(val_be)
329
+
330
+ if self._is_sequence(values):
331
+ sequence_fields.append((offset, size))
332
+ except (ValueError, IndexError) as e:
333
+ # Skip this offset/size combination if extraction fails
334
+ logger.debug(f"Skipping field at offset={offset}, size={size}: {e}")
335
+ continue
336
+
337
+ return sequence_fields
338
+
339
+ def find_checksum_fields(
340
+ self,
341
+ messages: Sequence[bytes],
342
+ ) -> list[tuple[int, int, str]]:
343
+ """Find fields that appear to be checksums.
344
+
345
+ Implements RE-PAY-004: Checksum field detection.
346
+
347
+ Args:
348
+ messages: Message samples.
349
+
350
+ Returns:
351
+ List of (offset, size, algorithm_hint) for checksum fields.
352
+
353
+ Raises:
354
+ ValueError: If checksum field offset and size exceed message length.
355
+ """
356
+ if len(messages) < 5:
357
+ return []
358
+
359
+ min_len = min(len(m) for m in messages)
360
+ checksum_fields = []
361
+
362
+ # Common checksum sizes and positions
363
+ for size in [1, 2, 4]:
364
+ # Check last position (most common)
365
+ for offset in [min_len - size, 0]:
366
+ if offset < 0:
367
+ continue
368
+
369
+ try:
370
+ # Validate offset and size before processing
371
+ if offset + size > min_len:
372
+ raise ValueError(
373
+ f"Invalid checksum field: offset={offset} + size={size} exceeds "
374
+ f"minimum message length={min_len}"
375
+ )
376
+
377
+ # Extract field values and message content
378
+ score = self._check_checksum_correlation(messages, offset, size)
379
+
380
+ if score > 0.8:
381
+ algorithm = self._guess_checksum_algorithm(messages, offset, size)
382
+ checksum_fields.append((offset, size, algorithm))
383
+ except (ValueError, IndexError) as e:
384
+ # Skip this offset/size combination if validation fails
385
+ logger.debug(f"Skipping checksum field at offset={offset}, size={size}: {e}")
386
+ continue
387
+
388
+ return checksum_fields
389
+
390
+ def _detect_field_boundaries(
391
+ self,
392
+ messages: Sequence[bytes],
393
+ max_length: int,
394
+ ) -> list[tuple[int, int]]:
395
+ """Detect field boundaries using entropy analysis.
396
+
397
+ Args:
398
+ messages: Message samples.
399
+ max_length: Maximum length to analyze.
400
+
401
+ Returns:
402
+ List of (start, end) boundaries.
403
+ """
404
+ if max_length == 0:
405
+ return []
406
+
407
+ # Calculate per-byte entropy
408
+ byte_entropies = []
409
+ for pos in range(max_length):
410
+ values = [m[pos] for m in messages if len(m) > pos]
411
+ if len(values) < 2:
412
+ byte_entropies.append(0.0)
413
+ continue
414
+
415
+ counts = Counter(values)
416
+ total = len(values)
417
+ entropy = 0.0
418
+ for count in counts.values():
419
+ if count > 0:
420
+ p = count / total
421
+ entropy -= p * np.log2(p)
422
+ byte_entropies.append(entropy)
423
+
424
+ # Find boundaries at entropy transitions
425
+ boundaries = []
426
+ current_start = 0
427
+
428
+ for i in range(1, len(byte_entropies)):
429
+ delta = abs(byte_entropies[i] - byte_entropies[i - 1])
430
+
431
+ # Also check for constant vs variable patterns
432
+ if delta > self.entropy_threshold:
433
+ if i > current_start:
434
+ boundaries.append((current_start, i))
435
+ current_start = i
436
+
437
+ # Add final segment
438
+ if max_length > current_start:
439
+ boundaries.append((current_start, max_length))
440
+
441
+ # Merge very small segments
442
+ merged: list[tuple[int, int]] = []
443
+ for start, end in boundaries:
444
+ if merged and start - merged[-1][1] == 0 and end - start < 2:
445
+ # Merge with previous
446
+ merged[-1] = (merged[-1][0], end)
447
+ else:
448
+ merged.append((start, end))
449
+
450
+ return merged if merged else [(0, max_length)]
451
+
452
+ def _infer_field(
453
+ self,
454
+ messages: Sequence[bytes],
455
+ start: int,
456
+ end: int,
457
+ index: int,
458
+ ) -> InferredField:
459
+ """Infer type for a single field.
460
+
461
+ Args:
462
+ messages: Message samples.
463
+ start: Field start offset.
464
+ end: Field end offset.
465
+ index: Field index for naming.
466
+
467
+ Returns:
468
+ InferredField with inferred type.
469
+ """
470
+ size = end - start
471
+ name = f"field_{index}"
472
+
473
+ # Extract field values
474
+ values = []
475
+ raw_values = []
476
+ for msg in messages:
477
+ if len(msg) >= end:
478
+ field_bytes = msg[start:end]
479
+ raw_values.append(field_bytes)
480
+ values.append(field_bytes)
481
+
482
+ if not values:
483
+ return InferredField(
484
+ name=name,
485
+ offset=start,
486
+ size=size,
487
+ inferred_type="unknown",
488
+ confidence=0.0,
489
+ )
490
+
491
+ # Check if constant
492
+ unique_values = set(raw_values)
493
+ is_constant = len(unique_values) == 1
494
+
495
+ # Check if sequence
496
+ is_sequence = False
497
+ if not is_constant and size in [1, 2, 4, 8]:
498
+ int_values = [int.from_bytes(v, "big") for v in raw_values]
499
+ is_sequence = self._is_sequence(int_values)
500
+
501
+ # Check for checksum patterns
502
+ is_checksum = False
503
+ if start >= min(len(m) for m in messages) - 4:
504
+ score = self._check_checksum_correlation(messages, start, size)
505
+ is_checksum = score > 0.7
506
+
507
+ # Infer type
508
+ inferred_type, endianness, confidence = self._infer_type(raw_values, size)
509
+
510
+ # Sample values for debugging
511
+ sample_values: list[int | str] = []
512
+ for v in raw_values[:5]:
513
+ if inferred_type.startswith("uint") or inferred_type.startswith("int"):
514
+ try:
515
+ # Cast endianness to Literal type for type checker
516
+ byte_order: Literal["big", "little"] = (
517
+ "big" if endianness == "n/a" else endianness # type: ignore[assignment]
518
+ )
519
+ sample_values.append(int.from_bytes(v, byte_order))
520
+ except Exception:
521
+ sample_values.append(v.hex())
522
+ elif inferred_type == "string":
523
+ try:
524
+ sample_values.append(v.decode("utf-8", errors="replace"))
525
+ except Exception:
526
+ sample_values.append(v.hex())
527
+ else:
528
+ sample_values.append(v.hex())
529
+
530
+ # Cast to Literal types for type checker
531
+ inferred_type_literal: Literal[
532
+ "uint8",
533
+ "uint16",
534
+ "uint32",
535
+ "uint64",
536
+ "int8",
537
+ "int16",
538
+ "int32",
539
+ "int64",
540
+ "float32",
541
+ "float64",
542
+ "bytes",
543
+ "string",
544
+ "unknown",
545
+ ] = inferred_type # type: ignore[assignment]
546
+ endianness_literal: Literal["big", "little", "n/a"] = endianness # type: ignore[assignment]
547
+
548
+ return InferredField(
549
+ name=name,
550
+ offset=start,
551
+ size=size,
552
+ inferred_type=inferred_type_literal,
553
+ endianness=endianness_literal,
554
+ is_constant=is_constant,
555
+ is_sequence=is_sequence,
556
+ is_checksum=is_checksum,
557
+ constant_value=raw_values[0] if is_constant else None,
558
+ confidence=confidence,
559
+ sample_values=sample_values,
560
+ )
561
+
562
+ def _infer_type(
563
+ self,
564
+ values: list[bytes],
565
+ size: int,
566
+ ) -> tuple[str, str, float]:
567
+ """Infer data type from values.
568
+
569
+ Args:
570
+ values: Field values.
571
+ size: Field size.
572
+
573
+ Returns:
574
+ Tuple of (type, endianness, confidence).
575
+ """
576
+ if not values:
577
+ return "unknown", "n/a", 0.0
578
+
579
+ # Check for string (high printable ratio)
580
+ printable_ratio = sum(
581
+ 1 for v in values for b in v if 32 <= b <= 126 or b in (9, 10, 13)
582
+ ) / (len(values) * size)
583
+
584
+ if printable_ratio > 0.8:
585
+ return "string", "n/a", printable_ratio
586
+
587
+ # Check for standard integer sizes
588
+ if size == 1:
589
+ return "uint8", "n/a", 0.9
590
+
591
+ elif size == 2:
592
+ # Try to detect endianness
593
+ be_variance = np.var([int.from_bytes(v, "big") for v in values])
594
+ le_variance = np.var([int.from_bytes(v, "little") for v in values])
595
+
596
+ if be_variance < le_variance:
597
+ endian = "big"
598
+ else:
599
+ endian = "little"
600
+
601
+ return "uint16", endian, 0.8
602
+
603
+ elif size == 4:
604
+ # Check for float
605
+ float_valid = 0
606
+ for v in values:
607
+ try:
608
+ f = struct.unpack(">f", v)[0]
609
+ if not (np.isnan(f) or np.isinf(f)) and -1e10 < f < 1e10:
610
+ float_valid += 1
611
+ except Exception:
612
+ pass
613
+
614
+ if float_valid / len(values) > 0.8:
615
+ return "float32", "big", 0.7
616
+
617
+ # Otherwise integer
618
+ be_variance = np.var([int.from_bytes(v, "big") for v in values])
619
+ le_variance = np.var([int.from_bytes(v, "little") for v in values])
620
+ endian = "big" if be_variance < le_variance else "little"
621
+ return "uint32", endian, 0.8
622
+
623
+ elif size == 8:
624
+ # Check for float64 or uint64
625
+ be_variance = np.var([int.from_bytes(v, "big") for v in values])
626
+ le_variance = np.var([int.from_bytes(v, "little") for v in values])
627
+ endian = "big" if be_variance < le_variance else "little"
628
+ return "uint64", endian, 0.7
629
+
630
+ else:
631
+ return "bytes", "n/a", 0.6
632
+
633
+ def _is_sequence(self, values: list[int]) -> bool:
634
+ """Check if values form a sequence.
635
+
636
+ Args:
637
+ values: Integer values.
638
+
639
+ Returns:
640
+ True if values are incrementing/decrementing.
641
+ """
642
+ if len(values) < self.sequence_threshold:
643
+ return False
644
+
645
+ # Check for incrementing sequence
646
+ diffs = [values[i + 1] - values[i] for i in range(len(values) - 1)]
647
+
648
+ # Most diffs should be 1 (or consistent)
649
+ counter = Counter(diffs)
650
+ if not counter:
651
+ return False
652
+
653
+ most_common_diff, count = counter.most_common(1)[0]
654
+ ratio = count / len(diffs)
655
+
656
+ return ratio > 0.8 and most_common_diff in [1, -1, 0]
657
+
658
+ def _check_checksum_correlation(
659
+ self,
660
+ messages: Sequence[bytes],
661
+ offset: int,
662
+ size: int,
663
+ ) -> float:
664
+ """Check if field correlates with message content like a checksum.
665
+
666
+ Args:
667
+ messages: Message samples.
668
+ offset: Field offset.
669
+ size: Field size.
670
+
671
+ Returns:
672
+ Correlation score (0-1).
673
+ """
674
+ # Simple heuristic: checksum fields have high correlation with
675
+ # changes in other parts of the message
676
+
677
+ if len(messages) < 5:
678
+ return 0.0
679
+
680
+ # Extract checksum values and message content
681
+ checksums = []
682
+ contents = []
683
+
684
+ for msg in messages:
685
+ if len(msg) >= offset + size:
686
+ checksums.append(int.from_bytes(msg[offset : offset + size], "big"))
687
+ # Content before checksum
688
+ content = msg[:offset] + msg[offset + size :]
689
+ contents.append(sum(content) % 65536)
690
+
691
+ if len(checksums) < 5:
692
+ return 0.0
693
+
694
+ # Check if checksum changes correlate with content changes
695
+ unique_contents = len(set(contents))
696
+ unique_checksums = len(set(checksums))
697
+
698
+ if unique_contents == 1 and unique_checksums == 1:
699
+ return 0.3 # Both constant - inconclusive
700
+
701
+ # Simple correlation check
702
+ if unique_contents > 1 and unique_checksums > 1:
703
+ return 0.8
704
+
705
+ return 0.3
706
+
707
+ def _guess_checksum_algorithm(
708
+ self,
709
+ messages: Sequence[bytes],
710
+ offset: int,
711
+ size: int,
712
+ ) -> str:
713
+ """Guess the checksum algorithm.
714
+
715
+ Args:
716
+ messages: Message samples.
717
+ offset: Checksum offset.
718
+ size: Checksum size.
719
+
720
+ Returns:
721
+ Algorithm name hint.
722
+ """
723
+ if size == 1:
724
+ return "xor8_or_sum8"
725
+ elif size == 2:
726
+ return "crc16_or_sum16"
727
+ elif size == 4:
728
+ return "crc32"
729
+ return "unknown"
730
+
731
+
732
+ # =============================================================================
733
+ # RE-PAY-004: Convenience Functions
734
+ # =============================================================================
735
+
736
+
737
+ def infer_fields(messages: Sequence[bytes], min_samples: int = 10) -> MessageSchema:
738
+ """Infer field structure from message samples.
739
+
740
+ Implements RE-PAY-004: Payload Field Inference.
741
+
742
+ Args:
743
+ messages: List of binary message samples.
744
+ min_samples: Minimum samples for reliable inference.
745
+
746
+ Returns:
747
+ MessageSchema with inferred field structure.
748
+
749
+ Example:
750
+ >>> messages = [pkt.data for pkt in packets]
751
+ >>> schema = infer_fields(messages)
752
+ >>> for field in schema.fields:
753
+ ... print(f"{field.name}: {field.inferred_type}")
754
+ """
755
+ inferrer = FieldInferrer(min_samples=min_samples)
756
+ return inferrer.infer_fields(messages)
757
+
758
+
759
+ def detect_field_types(
760
+ messages: Sequence[bytes],
761
+ boundaries: list[tuple[int, int]],
762
+ ) -> list[InferredField]:
763
+ """Detect field types for given boundaries.
764
+
765
+ Implements RE-PAY-004: Field type detection.
766
+
767
+ Args:
768
+ messages: Message samples.
769
+ boundaries: List of (start, end) field boundaries.
770
+
771
+ Returns:
772
+ List of InferredField with type information.
773
+ """
774
+ inferrer = FieldInferrer()
775
+ return inferrer.detect_field_types(messages, boundaries)
776
+
777
+
778
+ def find_sequence_fields(messages: Sequence[bytes]) -> list[tuple[int, int]]:
779
+ """Find fields that appear to be sequence/counter values.
780
+
781
+ Implements RE-PAY-004: Sequence field detection.
782
+
783
+ Args:
784
+ messages: Message samples (should be in order).
785
+
786
+ Returns:
787
+ List of (offset, size) for sequence fields.
788
+ """
789
+ inferrer = FieldInferrer()
790
+ return inferrer.find_sequence_fields(messages)
791
+
792
+
793
+ def find_checksum_fields(messages: Sequence[bytes]) -> list[tuple[int, int, str]]:
794
+ """Find fields that appear to be checksums.
795
+
796
+ Implements RE-PAY-004: Checksum field detection.
797
+
798
+ Args:
799
+ messages: Message samples.
800
+
801
+ Returns:
802
+ List of (offset, size, algorithm_hint) for checksum fields.
803
+ """
804
+ inferrer = FieldInferrer()
805
+ return inferrer.find_checksum_fields(messages)
806
+
807
+
808
+ # =============================================================================
809
+ # RE-PAY-005: Comparison Functions
810
+ # =============================================================================
811
+
812
+
813
+ def diff_payloads(payload_a: bytes, payload_b: bytes) -> PayloadDiff:
814
+ """Compare two payloads and identify differences.
815
+
816
+ Implements RE-PAY-005: Payload differential analysis.
817
+
818
+ Args:
819
+ payload_a: First payload.
820
+ payload_b: Second payload.
821
+
822
+ Returns:
823
+ PayloadDiff with comparison results.
824
+
825
+ Example:
826
+ >>> diff = diff_payloads(pkt1.data, pkt2.data)
827
+ >>> print(f"Common prefix: {diff.common_prefix_length} bytes")
828
+ >>> print(f"Different bytes: {len(diff.differences)}")
829
+ """
830
+ # Find common prefix
831
+ common_prefix = 0
832
+ min_len = min(len(payload_a), len(payload_b))
833
+ for i in range(min_len):
834
+ if payload_a[i] == payload_b[i]:
835
+ common_prefix += 1
836
+ else:
837
+ break
838
+
839
+ # Find common suffix
840
+ common_suffix = 0
841
+ for i in range(1, min_len - common_prefix + 1):
842
+ if payload_a[-i] == payload_b[-i]:
843
+ common_suffix += 1
844
+ else:
845
+ break
846
+
847
+ # Find all differences
848
+ differences = []
849
+ for i in range(min_len):
850
+ if payload_a[i] != payload_b[i]:
851
+ differences.append((i, payload_a[i], payload_b[i]))
852
+
853
+ # Add length differences
854
+ if len(payload_a) > len(payload_b):
855
+ for i in range(len(payload_b), len(payload_a)):
856
+ differences.append((i, payload_a[i], -1))
857
+ elif len(payload_b) > len(payload_a):
858
+ for i in range(len(payload_a), len(payload_b)):
859
+ differences.append((i, -1, payload_b[i]))
860
+
861
+ # Calculate similarity
862
+ max_len = max(len(payload_a), len(payload_b))
863
+ if max_len == 0:
864
+ similarity = 1.0
865
+ else:
866
+ matching = min_len - len([d for d in differences if d[0] < min_len])
867
+ similarity = matching / max_len
868
+
869
+ # Calculate edit distance (simplified Levenshtein)
870
+ edit_distance = _levenshtein_distance(payload_a, payload_b)
871
+
872
+ return PayloadDiff(
873
+ common_prefix_length=common_prefix,
874
+ common_suffix_length=common_suffix,
875
+ differences=differences,
876
+ similarity=similarity,
877
+ edit_distance=edit_distance,
878
+ )
879
+
880
+
881
+ def find_common_bytes(payloads: Sequence[bytes]) -> bytes:
882
+ """Find common prefix across all payloads.
883
+
884
+ Implements RE-PAY-005: Common byte analysis.
885
+
886
+ Args:
887
+ payloads: List of payloads to analyze.
888
+
889
+ Returns:
890
+ Common prefix bytes.
891
+ """
892
+ if not payloads:
893
+ return b""
894
+
895
+ if len(payloads) == 1:
896
+ return payloads[0]
897
+
898
+ # Find minimum length
899
+ min_len = min(len(p) for p in payloads)
900
+
901
+ # Find common prefix
902
+ common = bytearray()
903
+ for i in range(min_len):
904
+ byte = payloads[0][i]
905
+ if all(p[i] == byte for p in payloads):
906
+ common.append(byte)
907
+ else:
908
+ break
909
+
910
+ return bytes(common)
911
+
912
+
913
+ def find_variable_positions(payloads: Sequence[bytes]) -> VariablePositions:
914
+ """Identify which byte positions vary across payloads.
915
+
916
+ Implements RE-PAY-005: Variable position detection.
917
+
918
+ Args:
919
+ payloads: List of payloads to analyze.
920
+
921
+ Returns:
922
+ VariablePositions with constant and variable position info.
923
+
924
+ Example:
925
+ >>> result = find_variable_positions(payloads)
926
+ >>> print(f"Constant positions: {result.constant_positions}")
927
+ >>> print(f"Variable positions: {result.variable_positions}")
928
+ """
929
+ if not payloads:
930
+ return VariablePositions(
931
+ constant_positions=[],
932
+ variable_positions=[],
933
+ constant_values={},
934
+ variance_by_position=np.array([]),
935
+ )
936
+
937
+ # Use shortest payload length
938
+ min_len = min(len(p) for p in payloads)
939
+
940
+ constant_positions = []
941
+ variable_positions = []
942
+ constant_values = {}
943
+ variances = []
944
+
945
+ for i in range(min_len):
946
+ values = [p[i] for p in payloads]
947
+ unique = set(values)
948
+
949
+ if len(unique) == 1:
950
+ constant_positions.append(i)
951
+ constant_values[i] = values[0]
952
+ variances.append(0.0)
953
+ else:
954
+ variable_positions.append(i)
955
+ variances.append(float(np.var(values)))
956
+
957
+ return VariablePositions(
958
+ constant_positions=constant_positions,
959
+ variable_positions=variable_positions,
960
+ constant_values=constant_values,
961
+ variance_by_position=np.array(variances),
962
+ )
963
+
964
+
965
+ def compute_similarity(
966
+ payload_a: bytes,
967
+ payload_b: bytes,
968
+ metric: Literal["levenshtein", "hamming", "jaccard"] = "levenshtein",
969
+ ) -> float:
970
+ """Compute similarity between two payloads.
971
+
972
+ Implements RE-PAY-005: Similarity computation.
973
+
974
+ Args:
975
+ payload_a: First payload.
976
+ payload_b: Second payload.
977
+ metric: Similarity metric to use.
978
+
979
+ Returns:
980
+ Similarity score (0-1).
981
+ """
982
+ if metric == "levenshtein":
983
+ max_len = max(len(payload_a), len(payload_b))
984
+ if max_len == 0:
985
+ return 1.0
986
+ distance = _levenshtein_distance(payload_a, payload_b)
987
+ return 1.0 - (distance / max_len)
988
+
989
+ elif metric == "hamming":
990
+ if len(payload_a) != len(payload_b):
991
+ # Pad shorter one
992
+ max_len = max(len(payload_a), len(payload_b))
993
+ payload_a = payload_a.ljust(max_len, b"\x00")
994
+ payload_b = payload_b.ljust(max_len, b"\x00")
995
+
996
+ matches = sum(a == b for a, b in zip(payload_a, payload_b, strict=True))
997
+ return matches / len(payload_a) if payload_a else 1.0
998
+
999
+ # metric == "jaccard"
1000
+ # Treat bytes as sets
1001
+ set_a = set(payload_a)
1002
+ set_b = set(payload_b)
1003
+ intersection = len(set_a & set_b)
1004
+ union = len(set_a | set_b)
1005
+ return intersection / union if union > 0 else 1.0
1006
+
1007
+
1008
+ def cluster_payloads(
1009
+ payloads: Sequence[bytes],
1010
+ threshold: float = 0.8,
1011
+ algorithm: Literal["greedy", "dbscan"] = "greedy",
1012
+ ) -> list[PayloadCluster]:
1013
+ """Cluster similar payloads together.
1014
+
1015
+ Implements RE-PAY-005: Payload clustering.
1016
+
1017
+ Args:
1018
+ payloads: List of payloads to cluster.
1019
+ threshold: Similarity threshold for clustering.
1020
+ algorithm: Clustering algorithm.
1021
+
1022
+ Returns:
1023
+ List of PayloadCluster objects.
1024
+
1025
+ Example:
1026
+ >>> clusters = cluster_payloads(payloads, threshold=0.85)
1027
+ >>> for c in clusters:
1028
+ ... print(f"Cluster {c.cluster_id}: {c.size} payloads")
1029
+ """
1030
+ if not payloads:
1031
+ return []
1032
+
1033
+ if algorithm == "greedy":
1034
+ return _cluster_greedy_optimized(payloads, threshold)
1035
+ # algorithm == "dbscan"
1036
+ return _cluster_dbscan(payloads, threshold)
1037
+
1038
+
1039
+ def correlate_request_response(
1040
+ requests: Sequence[PayloadInfo],
1041
+ responses: Sequence[PayloadInfo],
1042
+ max_delay: float = 1.0,
1043
+ ) -> list[tuple[PayloadInfo, PayloadInfo, float]]:
1044
+ """Correlate request payloads with responses.
1045
+
1046
+ Implements RE-PAY-005: Request-response correlation.
1047
+
1048
+ Args:
1049
+ requests: List of request PayloadInfo.
1050
+ responses: List of response PayloadInfo.
1051
+ max_delay: Maximum time between request and response.
1052
+
1053
+ Returns:
1054
+ List of (request, response, latency) tuples.
1055
+ """
1056
+ pairs = []
1057
+
1058
+ for request in requests:
1059
+ if request.timestamp is None:
1060
+ continue
1061
+
1062
+ best_response = None
1063
+ best_latency = float("inf")
1064
+
1065
+ for response in responses:
1066
+ if response.timestamp is None:
1067
+ continue
1068
+
1069
+ latency = response.timestamp - request.timestamp
1070
+ if 0 <= latency <= max_delay and latency < best_latency:
1071
+ best_response = response
1072
+ best_latency = latency
1073
+
1074
+ if best_response is not None:
1075
+ pairs.append((request, best_response, best_latency))
1076
+
1077
+ return pairs
1078
+
1079
+
1080
+ # =============================================================================
1081
+ # Helper Functions
1082
+ # =============================================================================
1083
+
1084
+
1085
+ def _levenshtein_distance(a: bytes, b: bytes) -> int:
1086
+ """Calculate Levenshtein edit distance between two byte sequences."""
1087
+ if len(a) < len(b):
1088
+ return _levenshtein_distance(b, a)
1089
+
1090
+ if len(b) == 0:
1091
+ return len(a)
1092
+
1093
+ previous_row: list[int] = list(range(len(b) + 1))
1094
+ for i, c1 in enumerate(a):
1095
+ current_row = [i + 1]
1096
+ for j, c2 in enumerate(b):
1097
+ insertions = previous_row[j + 1] + 1
1098
+ deletions = current_row[j] + 1
1099
+ substitutions = previous_row[j] + (c1 != c2)
1100
+ current_row.append(min(insertions, deletions, substitutions))
1101
+ previous_row = current_row
1102
+
1103
+ return previous_row[-1]
1104
+
1105
+
1106
+ def _fast_similarity(payload_a: bytes, payload_b: bytes, threshold: float) -> float | None:
1107
+ """Fast similarity check with early termination.
1108
+
1109
+ Uses length-based filtering and sampling to quickly reject dissimilar payloads.
1110
+ Returns None if payloads are likely similar (needs full check),
1111
+ or a similarity value if they can be quickly determined.
1112
+
1113
+ Args:
1114
+ payload_a: First payload.
1115
+ payload_b: Second payload.
1116
+ threshold: Similarity threshold for clustering.
1117
+
1118
+ Returns:
1119
+ Similarity value if quickly determined, None if full check needed.
1120
+ """
1121
+ len_a = len(payload_a)
1122
+ len_b = len(payload_b)
1123
+
1124
+ # Empty payloads
1125
+ if len_a == 0 and len_b == 0:
1126
+ return 1.0
1127
+ if len_a == 0 or len_b == 0:
1128
+ return 0.0
1129
+
1130
+ # Length difference filter: if lengths differ by more than (1-threshold)*max_len,
1131
+ # similarity can't exceed threshold
1132
+ max_len = max(len_a, len_b)
1133
+ min_len = min(len_a, len_b)
1134
+ _length_diff = max_len - min_len
1135
+
1136
+ # Maximum possible similarity given length difference
1137
+ max_possible_similarity = min_len / max_len
1138
+ if max_possible_similarity < threshold:
1139
+ return max_possible_similarity
1140
+
1141
+ # For same-length payloads, use fast hamming similarity
1142
+ if len_a == len_b:
1143
+ # Sample comparison for large payloads
1144
+ if len_a > 50:
1145
+ # Sample first 16, last 16, and some middle bytes
1146
+ sample_size = min(48, len_a)
1147
+ mismatches = 0
1148
+
1149
+ # First 16 bytes
1150
+ for i in range(min(16, len_a)):
1151
+ if payload_a[i] != payload_b[i]:
1152
+ mismatches += 1
1153
+
1154
+ # Last 16 bytes
1155
+ for i in range(1, min(17, len_a + 1)):
1156
+ if payload_a[-i] != payload_b[-i]:
1157
+ mismatches += 1
1158
+
1159
+ # Middle samples (len_a > 32 always true here since len_a > 50)
1160
+ step = (len_a - 32) // 16
1161
+ if step > 0:
1162
+ for i in range(16, len_a - 16, step):
1163
+ if payload_a[i] != payload_b[i]:
1164
+ mismatches += 1
1165
+
1166
+ # Estimate similarity from sample
1167
+ estimated_similarity = 1.0 - (mismatches / sample_size)
1168
+
1169
+ # If sample shows very low similarity, reject early
1170
+ if estimated_similarity < threshold * 0.8:
1171
+ return estimated_similarity
1172
+
1173
+ # Full hamming comparison for same-length payloads (faster than Levenshtein)
1174
+ matches = sum(a == b for a, b in zip(payload_a, payload_b, strict=True))
1175
+ return matches / len_a
1176
+
1177
+ # For different-length payloads, use common prefix/suffix heuristic
1178
+ common_prefix = 0
1179
+ for i in range(min_len):
1180
+ if payload_a[i] == payload_b[i]:
1181
+ common_prefix += 1
1182
+ else:
1183
+ break
1184
+
1185
+ common_suffix = 0
1186
+ for i in range(1, min_len - common_prefix + 1):
1187
+ if payload_a[-i] == payload_b[-i]:
1188
+ common_suffix += 1
1189
+ else:
1190
+ break
1191
+
1192
+ # Estimate similarity from prefix/suffix
1193
+ common_bytes = common_prefix + common_suffix
1194
+ estimated_similarity = common_bytes / max_len
1195
+
1196
+ # If common bytes suggest low similarity, reject
1197
+ if estimated_similarity < threshold * 0.7:
1198
+ return estimated_similarity
1199
+
1200
+ # Need full comparison
1201
+ return None
1202
+
1203
+
1204
+ def _cluster_greedy_optimized(
1205
+ payloads: Sequence[bytes],
1206
+ threshold: float,
1207
+ ) -> list[PayloadCluster]:
1208
+ """Optimized greedy clustering algorithm.
1209
+
1210
+ Uses fast pre-filtering based on length and sampling to avoid
1211
+ expensive Levenshtein distance calculations when possible.
1212
+
1213
+ Args:
1214
+ payloads: Sequence of payload bytes to cluster.
1215
+ threshold: Similarity threshold for clustering (0.0 to 1.0).
1216
+
1217
+ Returns:
1218
+ List of PayloadCluster objects containing clustered payloads.
1219
+ """
1220
+ clusters: list[PayloadCluster] = []
1221
+ assigned = [False] * len(payloads)
1222
+
1223
+ # Precompute lengths for fast filtering
1224
+ lengths = [len(p) for p in payloads]
1225
+
1226
+ for i, payload in enumerate(payloads):
1227
+ if assigned[i]:
1228
+ continue
1229
+
1230
+ # Start new cluster
1231
+ cluster_payloads = [payload]
1232
+ cluster_indices = [i]
1233
+ assigned[i] = True
1234
+
1235
+ payload_len = lengths[i]
1236
+
1237
+ # Find similar payloads
1238
+ for j in range(i + 1, len(payloads)):
1239
+ if assigned[j]:
1240
+ continue
1241
+
1242
+ other_len = lengths[j]
1243
+
1244
+ # Quick length-based rejection
1245
+ max_len = max(payload_len, other_len)
1246
+ min_len = min(payload_len, other_len)
1247
+ if min_len / max_len < threshold:
1248
+ continue
1249
+
1250
+ # Try fast similarity check first
1251
+ fast_result = _fast_similarity(payload, payloads[j], threshold)
1252
+
1253
+ if fast_result is not None:
1254
+ similarity = fast_result
1255
+ else:
1256
+ # Fall back to Levenshtein for uncertain cases
1257
+ similarity = compute_similarity(payload, payloads[j])
1258
+
1259
+ if similarity >= threshold:
1260
+ cluster_payloads.append(payloads[j])
1261
+ cluster_indices.append(j)
1262
+ assigned[j] = True
1263
+
1264
+ clusters.append(
1265
+ PayloadCluster(
1266
+ cluster_id=len(clusters),
1267
+ payloads=cluster_payloads,
1268
+ indices=cluster_indices,
1269
+ representative=payload,
1270
+ size=len(cluster_payloads),
1271
+ )
1272
+ )
1273
+
1274
+ return clusters
1275
+
1276
+
1277
+ def _cluster_greedy(
1278
+ payloads: Sequence[bytes],
1279
+ threshold: float,
1280
+ ) -> list[PayloadCluster]:
1281
+ """Greedy clustering algorithm (legacy, uses optimized version)."""
1282
+ return _cluster_greedy_optimized(payloads, threshold)
1283
+
1284
+
1285
+ def _cluster_dbscan(
1286
+ payloads: Sequence[bytes],
1287
+ threshold: float,
1288
+ ) -> list[PayloadCluster]:
1289
+ """DBSCAN-style clustering (simplified)."""
1290
+ # For simplicity, fall back to greedy
1291
+ # Full DBSCAN would require scipy or custom implementation
1292
+ return _cluster_greedy_optimized(payloads, threshold)
1293
+
1294
+
1295
+ __all__ = [
1296
+ "FieldInferrer",
1297
+ "InferredField",
1298
+ "MessageSchema",
1299
+ "PayloadCluster",
1300
+ "PayloadDiff",
1301
+ "VariablePositions",
1302
+ "cluster_payloads",
1303
+ "compute_similarity",
1304
+ "correlate_request_response",
1305
+ "detect_field_types",
1306
+ "diff_payloads",
1307
+ "find_checksum_fields",
1308
+ "find_common_bytes",
1309
+ "find_sequence_fields",
1310
+ "find_variable_positions",
1311
+ "infer_fields",
1312
+ ]