oscura 0.0.1__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (465) hide show
  1. oscura/__init__.py +813 -8
  2. oscura/__main__.py +392 -0
  3. oscura/analyzers/__init__.py +37 -0
  4. oscura/analyzers/digital/__init__.py +177 -0
  5. oscura/analyzers/digital/bus.py +691 -0
  6. oscura/analyzers/digital/clock.py +805 -0
  7. oscura/analyzers/digital/correlation.py +720 -0
  8. oscura/analyzers/digital/edges.py +632 -0
  9. oscura/analyzers/digital/extraction.py +413 -0
  10. oscura/analyzers/digital/quality.py +878 -0
  11. oscura/analyzers/digital/signal_quality.py +877 -0
  12. oscura/analyzers/digital/thresholds.py +708 -0
  13. oscura/analyzers/digital/timing.py +1104 -0
  14. oscura/analyzers/eye/__init__.py +46 -0
  15. oscura/analyzers/eye/diagram.py +434 -0
  16. oscura/analyzers/eye/metrics.py +555 -0
  17. oscura/analyzers/jitter/__init__.py +83 -0
  18. oscura/analyzers/jitter/ber.py +333 -0
  19. oscura/analyzers/jitter/decomposition.py +759 -0
  20. oscura/analyzers/jitter/measurements.py +413 -0
  21. oscura/analyzers/jitter/spectrum.py +220 -0
  22. oscura/analyzers/measurements.py +40 -0
  23. oscura/analyzers/packet/__init__.py +171 -0
  24. oscura/analyzers/packet/daq.py +1077 -0
  25. oscura/analyzers/packet/metrics.py +437 -0
  26. oscura/analyzers/packet/parser.py +327 -0
  27. oscura/analyzers/packet/payload.py +2156 -0
  28. oscura/analyzers/packet/payload_analysis.py +1312 -0
  29. oscura/analyzers/packet/payload_extraction.py +236 -0
  30. oscura/analyzers/packet/payload_patterns.py +670 -0
  31. oscura/analyzers/packet/stream.py +359 -0
  32. oscura/analyzers/patterns/__init__.py +266 -0
  33. oscura/analyzers/patterns/clustering.py +1036 -0
  34. oscura/analyzers/patterns/discovery.py +539 -0
  35. oscura/analyzers/patterns/learning.py +797 -0
  36. oscura/analyzers/patterns/matching.py +1091 -0
  37. oscura/analyzers/patterns/periodic.py +650 -0
  38. oscura/analyzers/patterns/sequences.py +767 -0
  39. oscura/analyzers/power/__init__.py +116 -0
  40. oscura/analyzers/power/ac_power.py +391 -0
  41. oscura/analyzers/power/basic.py +383 -0
  42. oscura/analyzers/power/conduction.py +314 -0
  43. oscura/analyzers/power/efficiency.py +297 -0
  44. oscura/analyzers/power/ripple.py +356 -0
  45. oscura/analyzers/power/soa.py +372 -0
  46. oscura/analyzers/power/switching.py +479 -0
  47. oscura/analyzers/protocol/__init__.py +150 -0
  48. oscura/analyzers/protocols/__init__.py +150 -0
  49. oscura/analyzers/protocols/base.py +500 -0
  50. oscura/analyzers/protocols/can.py +620 -0
  51. oscura/analyzers/protocols/can_fd.py +448 -0
  52. oscura/analyzers/protocols/flexray.py +405 -0
  53. oscura/analyzers/protocols/hdlc.py +399 -0
  54. oscura/analyzers/protocols/i2c.py +368 -0
  55. oscura/analyzers/protocols/i2s.py +296 -0
  56. oscura/analyzers/protocols/jtag.py +393 -0
  57. oscura/analyzers/protocols/lin.py +445 -0
  58. oscura/analyzers/protocols/manchester.py +333 -0
  59. oscura/analyzers/protocols/onewire.py +501 -0
  60. oscura/analyzers/protocols/spi.py +334 -0
  61. oscura/analyzers/protocols/swd.py +325 -0
  62. oscura/analyzers/protocols/uart.py +393 -0
  63. oscura/analyzers/protocols/usb.py +495 -0
  64. oscura/analyzers/signal_integrity/__init__.py +63 -0
  65. oscura/analyzers/signal_integrity/embedding.py +294 -0
  66. oscura/analyzers/signal_integrity/equalization.py +370 -0
  67. oscura/analyzers/signal_integrity/sparams.py +484 -0
  68. oscura/analyzers/spectral/__init__.py +53 -0
  69. oscura/analyzers/spectral/chunked.py +273 -0
  70. oscura/analyzers/spectral/chunked_fft.py +571 -0
  71. oscura/analyzers/spectral/chunked_wavelet.py +391 -0
  72. oscura/analyzers/spectral/fft.py +92 -0
  73. oscura/analyzers/statistical/__init__.py +250 -0
  74. oscura/analyzers/statistical/checksum.py +923 -0
  75. oscura/analyzers/statistical/chunked_corr.py +228 -0
  76. oscura/analyzers/statistical/classification.py +778 -0
  77. oscura/analyzers/statistical/entropy.py +1113 -0
  78. oscura/analyzers/statistical/ngrams.py +614 -0
  79. oscura/analyzers/statistics/__init__.py +119 -0
  80. oscura/analyzers/statistics/advanced.py +885 -0
  81. oscura/analyzers/statistics/basic.py +263 -0
  82. oscura/analyzers/statistics/correlation.py +630 -0
  83. oscura/analyzers/statistics/distribution.py +298 -0
  84. oscura/analyzers/statistics/outliers.py +463 -0
  85. oscura/analyzers/statistics/streaming.py +93 -0
  86. oscura/analyzers/statistics/trend.py +520 -0
  87. oscura/analyzers/validation.py +598 -0
  88. oscura/analyzers/waveform/__init__.py +36 -0
  89. oscura/analyzers/waveform/measurements.py +943 -0
  90. oscura/analyzers/waveform/measurements_with_uncertainty.py +371 -0
  91. oscura/analyzers/waveform/spectral.py +1689 -0
  92. oscura/analyzers/waveform/wavelets.py +298 -0
  93. oscura/api/__init__.py +62 -0
  94. oscura/api/dsl.py +538 -0
  95. oscura/api/fluent.py +571 -0
  96. oscura/api/operators.py +498 -0
  97. oscura/api/optimization.py +392 -0
  98. oscura/api/profiling.py +396 -0
  99. oscura/automotive/__init__.py +73 -0
  100. oscura/automotive/can/__init__.py +52 -0
  101. oscura/automotive/can/analysis.py +356 -0
  102. oscura/automotive/can/checksum.py +250 -0
  103. oscura/automotive/can/correlation.py +212 -0
  104. oscura/automotive/can/discovery.py +355 -0
  105. oscura/automotive/can/message_wrapper.py +375 -0
  106. oscura/automotive/can/models.py +385 -0
  107. oscura/automotive/can/patterns.py +381 -0
  108. oscura/automotive/can/session.py +452 -0
  109. oscura/automotive/can/state_machine.py +300 -0
  110. oscura/automotive/can/stimulus_response.py +461 -0
  111. oscura/automotive/dbc/__init__.py +15 -0
  112. oscura/automotive/dbc/generator.py +156 -0
  113. oscura/automotive/dbc/parser.py +146 -0
  114. oscura/automotive/dtc/__init__.py +30 -0
  115. oscura/automotive/dtc/database.py +3036 -0
  116. oscura/automotive/j1939/__init__.py +14 -0
  117. oscura/automotive/j1939/decoder.py +745 -0
  118. oscura/automotive/loaders/__init__.py +35 -0
  119. oscura/automotive/loaders/asc.py +98 -0
  120. oscura/automotive/loaders/blf.py +77 -0
  121. oscura/automotive/loaders/csv_can.py +136 -0
  122. oscura/automotive/loaders/dispatcher.py +136 -0
  123. oscura/automotive/loaders/mdf.py +331 -0
  124. oscura/automotive/loaders/pcap.py +132 -0
  125. oscura/automotive/obd/__init__.py +14 -0
  126. oscura/automotive/obd/decoder.py +707 -0
  127. oscura/automotive/uds/__init__.py +48 -0
  128. oscura/automotive/uds/decoder.py +265 -0
  129. oscura/automotive/uds/models.py +64 -0
  130. oscura/automotive/visualization.py +369 -0
  131. oscura/batch/__init__.py +55 -0
  132. oscura/batch/advanced.py +627 -0
  133. oscura/batch/aggregate.py +300 -0
  134. oscura/batch/analyze.py +139 -0
  135. oscura/batch/logging.py +487 -0
  136. oscura/batch/metrics.py +556 -0
  137. oscura/builders/__init__.py +41 -0
  138. oscura/builders/signal_builder.py +1131 -0
  139. oscura/cli/__init__.py +14 -0
  140. oscura/cli/batch.py +339 -0
  141. oscura/cli/characterize.py +273 -0
  142. oscura/cli/compare.py +775 -0
  143. oscura/cli/decode.py +551 -0
  144. oscura/cli/main.py +247 -0
  145. oscura/cli/shell.py +350 -0
  146. oscura/comparison/__init__.py +66 -0
  147. oscura/comparison/compare.py +397 -0
  148. oscura/comparison/golden.py +487 -0
  149. oscura/comparison/limits.py +391 -0
  150. oscura/comparison/mask.py +434 -0
  151. oscura/comparison/trace_diff.py +30 -0
  152. oscura/comparison/visualization.py +481 -0
  153. oscura/compliance/__init__.py +70 -0
  154. oscura/compliance/advanced.py +756 -0
  155. oscura/compliance/masks.py +363 -0
  156. oscura/compliance/reporting.py +483 -0
  157. oscura/compliance/testing.py +298 -0
  158. oscura/component/__init__.py +38 -0
  159. oscura/component/impedance.py +365 -0
  160. oscura/component/reactive.py +598 -0
  161. oscura/component/transmission_line.py +312 -0
  162. oscura/config/__init__.py +191 -0
  163. oscura/config/defaults.py +254 -0
  164. oscura/config/loader.py +348 -0
  165. oscura/config/memory.py +271 -0
  166. oscura/config/migration.py +458 -0
  167. oscura/config/pipeline.py +1077 -0
  168. oscura/config/preferences.py +530 -0
  169. oscura/config/protocol.py +875 -0
  170. oscura/config/schema.py +713 -0
  171. oscura/config/settings.py +420 -0
  172. oscura/config/thresholds.py +599 -0
  173. oscura/convenience.py +457 -0
  174. oscura/core/__init__.py +299 -0
  175. oscura/core/audit.py +457 -0
  176. oscura/core/backend_selector.py +405 -0
  177. oscura/core/cache.py +590 -0
  178. oscura/core/cancellation.py +439 -0
  179. oscura/core/confidence.py +225 -0
  180. oscura/core/config.py +506 -0
  181. oscura/core/correlation.py +216 -0
  182. oscura/core/cross_domain.py +422 -0
  183. oscura/core/debug.py +301 -0
  184. oscura/core/edge_cases.py +541 -0
  185. oscura/core/exceptions.py +535 -0
  186. oscura/core/gpu_backend.py +523 -0
  187. oscura/core/lazy.py +832 -0
  188. oscura/core/log_query.py +540 -0
  189. oscura/core/logging.py +931 -0
  190. oscura/core/logging_advanced.py +952 -0
  191. oscura/core/memoize.py +171 -0
  192. oscura/core/memory_check.py +274 -0
  193. oscura/core/memory_guard.py +290 -0
  194. oscura/core/memory_limits.py +336 -0
  195. oscura/core/memory_monitor.py +453 -0
  196. oscura/core/memory_progress.py +465 -0
  197. oscura/core/memory_warnings.py +315 -0
  198. oscura/core/numba_backend.py +362 -0
  199. oscura/core/performance.py +352 -0
  200. oscura/core/progress.py +524 -0
  201. oscura/core/provenance.py +358 -0
  202. oscura/core/results.py +331 -0
  203. oscura/core/types.py +504 -0
  204. oscura/core/uncertainty.py +383 -0
  205. oscura/discovery/__init__.py +52 -0
  206. oscura/discovery/anomaly_detector.py +672 -0
  207. oscura/discovery/auto_decoder.py +415 -0
  208. oscura/discovery/comparison.py +497 -0
  209. oscura/discovery/quality_validator.py +528 -0
  210. oscura/discovery/signal_detector.py +769 -0
  211. oscura/dsl/__init__.py +73 -0
  212. oscura/dsl/commands.py +246 -0
  213. oscura/dsl/interpreter.py +455 -0
  214. oscura/dsl/parser.py +689 -0
  215. oscura/dsl/repl.py +172 -0
  216. oscura/exceptions.py +59 -0
  217. oscura/exploratory/__init__.py +111 -0
  218. oscura/exploratory/error_recovery.py +642 -0
  219. oscura/exploratory/fuzzy.py +513 -0
  220. oscura/exploratory/fuzzy_advanced.py +786 -0
  221. oscura/exploratory/legacy.py +831 -0
  222. oscura/exploratory/parse.py +358 -0
  223. oscura/exploratory/recovery.py +275 -0
  224. oscura/exploratory/sync.py +382 -0
  225. oscura/exploratory/unknown.py +707 -0
  226. oscura/export/__init__.py +25 -0
  227. oscura/export/wireshark/README.md +265 -0
  228. oscura/export/wireshark/__init__.py +47 -0
  229. oscura/export/wireshark/generator.py +312 -0
  230. oscura/export/wireshark/lua_builder.py +159 -0
  231. oscura/export/wireshark/templates/dissector.lua.j2 +92 -0
  232. oscura/export/wireshark/type_mapping.py +165 -0
  233. oscura/export/wireshark/validator.py +105 -0
  234. oscura/exporters/__init__.py +94 -0
  235. oscura/exporters/csv.py +303 -0
  236. oscura/exporters/exporters.py +44 -0
  237. oscura/exporters/hdf5.py +219 -0
  238. oscura/exporters/html_export.py +701 -0
  239. oscura/exporters/json_export.py +291 -0
  240. oscura/exporters/markdown_export.py +367 -0
  241. oscura/exporters/matlab_export.py +354 -0
  242. oscura/exporters/npz_export.py +219 -0
  243. oscura/exporters/spice_export.py +210 -0
  244. oscura/extensibility/__init__.py +131 -0
  245. oscura/extensibility/docs.py +752 -0
  246. oscura/extensibility/extensions.py +1125 -0
  247. oscura/extensibility/logging.py +259 -0
  248. oscura/extensibility/measurements.py +485 -0
  249. oscura/extensibility/plugins.py +414 -0
  250. oscura/extensibility/registry.py +346 -0
  251. oscura/extensibility/templates.py +913 -0
  252. oscura/extensibility/validation.py +651 -0
  253. oscura/filtering/__init__.py +89 -0
  254. oscura/filtering/base.py +563 -0
  255. oscura/filtering/convenience.py +564 -0
  256. oscura/filtering/design.py +725 -0
  257. oscura/filtering/filters.py +32 -0
  258. oscura/filtering/introspection.py +605 -0
  259. oscura/guidance/__init__.py +24 -0
  260. oscura/guidance/recommender.py +429 -0
  261. oscura/guidance/wizard.py +518 -0
  262. oscura/inference/__init__.py +251 -0
  263. oscura/inference/active_learning/README.md +153 -0
  264. oscura/inference/active_learning/__init__.py +38 -0
  265. oscura/inference/active_learning/lstar.py +257 -0
  266. oscura/inference/active_learning/observation_table.py +230 -0
  267. oscura/inference/active_learning/oracle.py +78 -0
  268. oscura/inference/active_learning/teachers/__init__.py +15 -0
  269. oscura/inference/active_learning/teachers/simulator.py +192 -0
  270. oscura/inference/adaptive_tuning.py +453 -0
  271. oscura/inference/alignment.py +653 -0
  272. oscura/inference/bayesian.py +943 -0
  273. oscura/inference/binary.py +1016 -0
  274. oscura/inference/crc_reverse.py +711 -0
  275. oscura/inference/logic.py +288 -0
  276. oscura/inference/message_format.py +1305 -0
  277. oscura/inference/protocol.py +417 -0
  278. oscura/inference/protocol_dsl.py +1084 -0
  279. oscura/inference/protocol_library.py +1230 -0
  280. oscura/inference/sequences.py +809 -0
  281. oscura/inference/signal_intelligence.py +1509 -0
  282. oscura/inference/spectral.py +215 -0
  283. oscura/inference/state_machine.py +634 -0
  284. oscura/inference/stream.py +918 -0
  285. oscura/integrations/__init__.py +59 -0
  286. oscura/integrations/llm.py +1827 -0
  287. oscura/jupyter/__init__.py +32 -0
  288. oscura/jupyter/display.py +268 -0
  289. oscura/jupyter/magic.py +334 -0
  290. oscura/loaders/__init__.py +526 -0
  291. oscura/loaders/binary.py +69 -0
  292. oscura/loaders/configurable.py +1255 -0
  293. oscura/loaders/csv.py +26 -0
  294. oscura/loaders/csv_loader.py +473 -0
  295. oscura/loaders/hdf5.py +9 -0
  296. oscura/loaders/hdf5_loader.py +510 -0
  297. oscura/loaders/lazy.py +370 -0
  298. oscura/loaders/mmap_loader.py +583 -0
  299. oscura/loaders/numpy_loader.py +436 -0
  300. oscura/loaders/pcap.py +432 -0
  301. oscura/loaders/preprocessing.py +368 -0
  302. oscura/loaders/rigol.py +287 -0
  303. oscura/loaders/sigrok.py +321 -0
  304. oscura/loaders/tdms.py +367 -0
  305. oscura/loaders/tektronix.py +711 -0
  306. oscura/loaders/validation.py +584 -0
  307. oscura/loaders/vcd.py +464 -0
  308. oscura/loaders/wav.py +233 -0
  309. oscura/math/__init__.py +45 -0
  310. oscura/math/arithmetic.py +824 -0
  311. oscura/math/interpolation.py +413 -0
  312. oscura/onboarding/__init__.py +39 -0
  313. oscura/onboarding/help.py +498 -0
  314. oscura/onboarding/tutorials.py +405 -0
  315. oscura/onboarding/wizard.py +466 -0
  316. oscura/optimization/__init__.py +19 -0
  317. oscura/optimization/parallel.py +440 -0
  318. oscura/optimization/search.py +532 -0
  319. oscura/pipeline/__init__.py +43 -0
  320. oscura/pipeline/base.py +338 -0
  321. oscura/pipeline/composition.py +242 -0
  322. oscura/pipeline/parallel.py +448 -0
  323. oscura/pipeline/pipeline.py +375 -0
  324. oscura/pipeline/reverse_engineering.py +1119 -0
  325. oscura/plugins/__init__.py +122 -0
  326. oscura/plugins/base.py +272 -0
  327. oscura/plugins/cli.py +497 -0
  328. oscura/plugins/discovery.py +411 -0
  329. oscura/plugins/isolation.py +418 -0
  330. oscura/plugins/lifecycle.py +959 -0
  331. oscura/plugins/manager.py +493 -0
  332. oscura/plugins/registry.py +421 -0
  333. oscura/plugins/versioning.py +372 -0
  334. oscura/py.typed +0 -0
  335. oscura/quality/__init__.py +65 -0
  336. oscura/quality/ensemble.py +740 -0
  337. oscura/quality/explainer.py +338 -0
  338. oscura/quality/scoring.py +616 -0
  339. oscura/quality/warnings.py +456 -0
  340. oscura/reporting/__init__.py +248 -0
  341. oscura/reporting/advanced.py +1234 -0
  342. oscura/reporting/analyze.py +448 -0
  343. oscura/reporting/argument_preparer.py +596 -0
  344. oscura/reporting/auto_report.py +507 -0
  345. oscura/reporting/batch.py +615 -0
  346. oscura/reporting/chart_selection.py +223 -0
  347. oscura/reporting/comparison.py +330 -0
  348. oscura/reporting/config.py +615 -0
  349. oscura/reporting/content/__init__.py +39 -0
  350. oscura/reporting/content/executive.py +127 -0
  351. oscura/reporting/content/filtering.py +191 -0
  352. oscura/reporting/content/minimal.py +257 -0
  353. oscura/reporting/content/verbosity.py +162 -0
  354. oscura/reporting/core.py +508 -0
  355. oscura/reporting/core_formats/__init__.py +17 -0
  356. oscura/reporting/core_formats/multi_format.py +210 -0
  357. oscura/reporting/engine.py +836 -0
  358. oscura/reporting/export.py +366 -0
  359. oscura/reporting/formatting/__init__.py +129 -0
  360. oscura/reporting/formatting/emphasis.py +81 -0
  361. oscura/reporting/formatting/numbers.py +403 -0
  362. oscura/reporting/formatting/standards.py +55 -0
  363. oscura/reporting/formatting.py +466 -0
  364. oscura/reporting/html.py +578 -0
  365. oscura/reporting/index.py +590 -0
  366. oscura/reporting/multichannel.py +296 -0
  367. oscura/reporting/output.py +379 -0
  368. oscura/reporting/pdf.py +373 -0
  369. oscura/reporting/plots.py +731 -0
  370. oscura/reporting/pptx_export.py +360 -0
  371. oscura/reporting/renderers/__init__.py +11 -0
  372. oscura/reporting/renderers/pdf.py +94 -0
  373. oscura/reporting/sections.py +471 -0
  374. oscura/reporting/standards.py +680 -0
  375. oscura/reporting/summary_generator.py +368 -0
  376. oscura/reporting/tables.py +397 -0
  377. oscura/reporting/template_system.py +724 -0
  378. oscura/reporting/templates/__init__.py +15 -0
  379. oscura/reporting/templates/definition.py +205 -0
  380. oscura/reporting/templates/index.html +649 -0
  381. oscura/reporting/templates/index.md +173 -0
  382. oscura/schemas/__init__.py +158 -0
  383. oscura/schemas/bus_configuration.json +322 -0
  384. oscura/schemas/device_mapping.json +182 -0
  385. oscura/schemas/packet_format.json +418 -0
  386. oscura/schemas/protocol_definition.json +363 -0
  387. oscura/search/__init__.py +16 -0
  388. oscura/search/anomaly.py +292 -0
  389. oscura/search/context.py +149 -0
  390. oscura/search/pattern.py +160 -0
  391. oscura/session/__init__.py +34 -0
  392. oscura/session/annotations.py +289 -0
  393. oscura/session/history.py +313 -0
  394. oscura/session/session.py +445 -0
  395. oscura/streaming/__init__.py +43 -0
  396. oscura/streaming/chunked.py +611 -0
  397. oscura/streaming/progressive.py +393 -0
  398. oscura/streaming/realtime.py +622 -0
  399. oscura/testing/__init__.py +54 -0
  400. oscura/testing/synthetic.py +808 -0
  401. oscura/triggering/__init__.py +68 -0
  402. oscura/triggering/base.py +229 -0
  403. oscura/triggering/edge.py +353 -0
  404. oscura/triggering/pattern.py +344 -0
  405. oscura/triggering/pulse.py +581 -0
  406. oscura/triggering/window.py +453 -0
  407. oscura/ui/__init__.py +48 -0
  408. oscura/ui/formatters.py +526 -0
  409. oscura/ui/progressive_display.py +340 -0
  410. oscura/utils/__init__.py +99 -0
  411. oscura/utils/autodetect.py +338 -0
  412. oscura/utils/buffer.py +389 -0
  413. oscura/utils/lazy.py +407 -0
  414. oscura/utils/lazy_imports.py +147 -0
  415. oscura/utils/memory.py +836 -0
  416. oscura/utils/memory_advanced.py +1326 -0
  417. oscura/utils/memory_extensions.py +465 -0
  418. oscura/utils/progressive.py +352 -0
  419. oscura/utils/windowing.py +362 -0
  420. oscura/visualization/__init__.py +321 -0
  421. oscura/visualization/accessibility.py +526 -0
  422. oscura/visualization/annotations.py +374 -0
  423. oscura/visualization/axis_scaling.py +305 -0
  424. oscura/visualization/colors.py +453 -0
  425. oscura/visualization/digital.py +337 -0
  426. oscura/visualization/eye.py +420 -0
  427. oscura/visualization/histogram.py +281 -0
  428. oscura/visualization/interactive.py +858 -0
  429. oscura/visualization/jitter.py +702 -0
  430. oscura/visualization/keyboard.py +394 -0
  431. oscura/visualization/layout.py +365 -0
  432. oscura/visualization/optimization.py +1028 -0
  433. oscura/visualization/palettes.py +446 -0
  434. oscura/visualization/plot.py +92 -0
  435. oscura/visualization/power.py +290 -0
  436. oscura/visualization/power_extended.py +626 -0
  437. oscura/visualization/presets.py +467 -0
  438. oscura/visualization/protocols.py +932 -0
  439. oscura/visualization/render.py +207 -0
  440. oscura/visualization/rendering.py +444 -0
  441. oscura/visualization/reverse_engineering.py +791 -0
  442. oscura/visualization/signal_integrity.py +808 -0
  443. oscura/visualization/specialized.py +553 -0
  444. oscura/visualization/spectral.py +811 -0
  445. oscura/visualization/styles.py +381 -0
  446. oscura/visualization/thumbnails.py +311 -0
  447. oscura/visualization/time_axis.py +351 -0
  448. oscura/visualization/waveform.py +367 -0
  449. oscura/workflow/__init__.py +13 -0
  450. oscura/workflow/dag.py +377 -0
  451. oscura/workflows/__init__.py +58 -0
  452. oscura/workflows/compliance.py +280 -0
  453. oscura/workflows/digital.py +272 -0
  454. oscura/workflows/multi_trace.py +502 -0
  455. oscura/workflows/power.py +178 -0
  456. oscura/workflows/protocol.py +492 -0
  457. oscura/workflows/reverse_engineering.py +639 -0
  458. oscura/workflows/signal_integrity.py +227 -0
  459. oscura-0.1.1.dist-info/METADATA +300 -0
  460. oscura-0.1.1.dist-info/RECORD +463 -0
  461. oscura-0.1.1.dist-info/entry_points.txt +2 -0
  462. {oscura-0.0.1.dist-info → oscura-0.1.1.dist-info}/licenses/LICENSE +1 -1
  463. oscura-0.0.1.dist-info/METADATA +0 -63
  464. oscura-0.0.1.dist-info/RECORD +0 -5
  465. {oscura-0.0.1.dist-info → oscura-0.1.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,797 @@
1
+ """Pattern learning and automatic discovery from binary data.
2
+
3
+ - RE-PAT-004: Pattern Learning and Discovery
4
+
5
+ This module provides machine learning inspired approaches for discovering
6
+ patterns in binary data without prior knowledge, including entropy-based
7
+ segmentation, frequency analysis, and structural inference.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from collections import Counter, defaultdict
13
+ from collections.abc import Sequence
14
+ from dataclasses import dataclass, field
15
+
16
+ import numpy as np
17
+
18
+
19
+ @dataclass
20
+ class LearnedPattern:
21
+ """A pattern discovered through learning.
22
+
23
+ Implements RE-PAT-004: Learned pattern representation.
24
+
25
+ Attributes:
26
+ pattern: The pattern bytes.
27
+ frequency: Number of occurrences.
28
+ confidence: Confidence score (0-1).
29
+ positions: List of positions where found.
30
+ context_before: Common bytes appearing before pattern.
31
+ context_after: Common bytes appearing after pattern.
32
+ is_structural: Whether pattern appears to be structural.
33
+ is_delimiter: Whether pattern appears to be a delimiter.
34
+ """
35
+
36
+ pattern: bytes
37
+ frequency: int
38
+ confidence: float
39
+ positions: list[int] = field(default_factory=list)
40
+ context_before: bytes = b""
41
+ context_after: bytes = b""
42
+ is_structural: bool = False
43
+ is_delimiter: bool = False
44
+
45
+
46
+ @dataclass
47
+ class StructureHypothesis:
48
+ """Hypothesis about data structure.
49
+
50
+ Implements RE-PAT-004: Structure hypothesis.
51
+
52
+ Attributes:
53
+ field_boundaries: Detected field boundaries.
54
+ field_types: Inferred field types.
55
+ header_size: Estimated header size.
56
+ record_size: Estimated record size (if fixed).
57
+ delimiters: Detected delimiters.
58
+ confidence: Overall confidence.
59
+ """
60
+
61
+ field_boundaries: list[int]
62
+ field_types: list[str]
63
+ header_size: int
64
+ record_size: int | None
65
+ delimiters: list[bytes]
66
+ confidence: float
67
+
68
+
69
+ @dataclass
70
+ class NgramModel:
71
+ """N-gram language model for binary data.
72
+
73
+ Implements RE-PAT-004: N-gram modeling.
74
+
75
+ Attributes:
76
+ n: N-gram size.
77
+ counts: N-gram frequency counts.
78
+ total: Total n-grams observed.
79
+ vocabulary_size: Number of unique n-grams.
80
+ """
81
+
82
+ n: int
83
+ counts: dict[bytes, int] = field(default_factory=dict)
84
+ total: int = 0
85
+ vocabulary_size: int = 0
86
+
87
+
88
+ class PatternLearner:
89
+ """Learn patterns from binary data samples.
90
+
91
+ Implements RE-PAT-004: Pattern Learning and Discovery.
92
+
93
+ Uses entropy analysis, n-gram frequency, and positional statistics
94
+ to discover recurring patterns without prior knowledge.
95
+
96
+ Example:
97
+ >>> learner = PatternLearner()
98
+ >>> learner.add_sample(data1)
99
+ >>> learner.add_sample(data2)
100
+ >>> patterns = learner.learn_patterns()
101
+ """
102
+
103
+ def __init__(
104
+ self,
105
+ min_pattern_length: int = 2,
106
+ max_pattern_length: int = 16,
107
+ min_frequency: int = 3,
108
+ min_confidence: float = 0.5,
109
+ ) -> None:
110
+ """Initialize pattern learner.
111
+
112
+ Args:
113
+ min_pattern_length: Minimum pattern length to consider.
114
+ max_pattern_length: Maximum pattern length to consider.
115
+ min_frequency: Minimum occurrences to consider pattern.
116
+ min_confidence: Minimum confidence threshold.
117
+ """
118
+ self.min_pattern_length = min_pattern_length
119
+ self.max_pattern_length = max_pattern_length
120
+ self.min_frequency = min_frequency
121
+ self.min_confidence = min_confidence
122
+
123
+ self._samples: list[bytes] = []
124
+ self._ngram_models: dict[int, NgramModel] = {}
125
+ self._position_stats: dict[bytes, list[int]] = defaultdict(list)
126
+
127
+ def add_sample(self, data: bytes) -> None:
128
+ """Add a data sample for learning.
129
+
130
+ Args:
131
+ data: Binary data sample.
132
+ """
133
+ self._samples.append(data)
134
+
135
+ def add_samples(self, samples: Sequence[bytes]) -> None:
136
+ """Add multiple data samples.
137
+
138
+ Args:
139
+ samples: List of binary data samples.
140
+ """
141
+ self._samples.extend(samples)
142
+
143
+ def learn_patterns(self, top_k: int = 20) -> list[LearnedPattern]:
144
+ """Learn patterns from accumulated samples.
145
+
146
+ Implements RE-PAT-004: Pattern discovery workflow.
147
+
148
+ Args:
149
+ top_k: Maximum number of patterns to return.
150
+
151
+ Returns:
152
+ List of discovered patterns, sorted by confidence.
153
+ """
154
+ if not self._samples:
155
+ return []
156
+
157
+ # Build n-gram models
158
+ self._build_ngram_models()
159
+
160
+ # Find candidate patterns
161
+ candidates = self._find_candidates()
162
+
163
+ # Score and filter patterns
164
+ scored = self._score_patterns(candidates)
165
+
166
+ # Sort by confidence and return top K
167
+ scored.sort(key=lambda p: -p.confidence)
168
+ return scored[:top_k]
169
+
170
+ def learn_structure(self) -> StructureHypothesis:
171
+ """Learn structural patterns from samples.
172
+
173
+ Implements RE-PAT-004: Structure inference.
174
+
175
+ Returns:
176
+ StructureHypothesis about data organization.
177
+ """
178
+ if not self._samples:
179
+ return StructureHypothesis(
180
+ field_boundaries=[],
181
+ field_types=[],
182
+ header_size=0,
183
+ record_size=None,
184
+ delimiters=[],
185
+ confidence=0.0,
186
+ )
187
+
188
+ # Analyze entropy profile for field boundaries
189
+ boundaries = self._detect_field_boundaries()
190
+
191
+ # Infer field types
192
+ field_types = self._infer_field_types(boundaries)
193
+
194
+ # Estimate header size
195
+ header_size = self._estimate_header_size(boundaries)
196
+
197
+ # Check for fixed record size
198
+ record_size = self._detect_record_size()
199
+
200
+ # Find delimiters
201
+ delimiters = self._find_delimiters()
202
+
203
+ # Calculate confidence
204
+ confidence = self._calculate_structure_confidence(
205
+ boundaries, field_types, record_size, delimiters
206
+ )
207
+
208
+ return StructureHypothesis(
209
+ field_boundaries=boundaries,
210
+ field_types=field_types,
211
+ header_size=header_size,
212
+ record_size=record_size,
213
+ delimiters=delimiters,
214
+ confidence=confidence,
215
+ )
216
+
217
+ def predict_next_bytes(
218
+ self, context: bytes, n_predictions: int = 5
219
+ ) -> list[tuple[bytes, float]]:
220
+ """Predict likely next bytes given context.
221
+
222
+ Implements RE-PAT-004: Byte prediction using n-gram models.
223
+
224
+ Args:
225
+ context: Context bytes.
226
+ n_predictions: Number of predictions to return.
227
+
228
+ Returns:
229
+ List of (next_byte, probability) tuples.
230
+ """
231
+ predictions = []
232
+
233
+ # Use largest n-gram model that fits context
234
+ for n in range(min(len(context) + 1, self.max_pattern_length), 0, -1):
235
+ if n not in self._ngram_models:
236
+ continue
237
+
238
+ model = self._ngram_models[n]
239
+ prefix = context[-(n - 1) :] if n > 1 else b""
240
+
241
+ # Find matching prefixes
242
+ matching = {}
243
+ for ngram, count in model.counts.items():
244
+ if ngram[:-1] == prefix:
245
+ matching[ngram[-1:]] = count
246
+
247
+ if matching:
248
+ total = sum(matching.values())
249
+ for byte_val, count in matching.items():
250
+ prob = count / total
251
+ predictions.append((byte_val, prob))
252
+ break
253
+
254
+ # Sort by probability
255
+ predictions.sort(key=lambda x: -x[1])
256
+ return predictions[:n_predictions]
257
+
258
+ def build_ngram_model(self, n: int) -> NgramModel:
259
+ """Build n-gram model from samples.
260
+
261
+ Args:
262
+ n: N-gram size.
263
+
264
+ Returns:
265
+ NgramModel with frequency statistics.
266
+ """
267
+ model = NgramModel(n=n)
268
+
269
+ for sample in self._samples:
270
+ for i in range(len(sample) - n + 1):
271
+ ngram = sample[i : i + n]
272
+ if ngram not in model.counts:
273
+ model.counts[ngram] = 0
274
+ model.vocabulary_size += 1
275
+ model.counts[ngram] += 1
276
+ model.total += 1
277
+
278
+ self._ngram_models[n] = model
279
+ return model
280
+
281
+ def _build_ngram_models(self) -> None:
282
+ """Build n-gram models for all sizes."""
283
+ for n in range(self.min_pattern_length, self.max_pattern_length + 1):
284
+ self.build_ngram_model(n)
285
+
286
+ def _find_candidates(self) -> dict[bytes, int]:
287
+ """Find candidate patterns based on frequency.
288
+
289
+ Returns:
290
+ Dictionary mapping patterns to frequencies.
291
+ """
292
+ candidates = {}
293
+
294
+ for n in range(self.min_pattern_length, self.max_pattern_length + 1):
295
+ if n not in self._ngram_models:
296
+ continue
297
+
298
+ model = self._ngram_models[n]
299
+ for pattern, count in model.counts.items():
300
+ if count >= self.min_frequency:
301
+ candidates[pattern] = count
302
+
303
+ return candidates
304
+
305
+ def _score_patterns(self, candidates: dict[bytes, int]) -> list[LearnedPattern]:
306
+ """Score candidate patterns.
307
+
308
+ Args:
309
+ candidates: Dictionary of pattern -> frequency.
310
+
311
+ Returns:
312
+ List of scored LearnedPattern objects.
313
+ """
314
+ patterns = []
315
+
316
+ for pattern, frequency in candidates.items():
317
+ # Find all positions across samples
318
+ positions = []
319
+ for sample_idx, sample in enumerate(self._samples):
320
+ start = 0
321
+ while True:
322
+ pos = sample.find(pattern, start)
323
+ if pos == -1:
324
+ break
325
+ positions.append((sample_idx, pos))
326
+ start = pos + 1
327
+
328
+ # Calculate confidence based on distribution
329
+ confidence = self._calculate_pattern_confidence(pattern, positions)
330
+
331
+ if confidence < self.min_confidence:
332
+ continue
333
+
334
+ # Get context
335
+ context_before, context_after = self._get_context(pattern, positions)
336
+
337
+ # Check if structural
338
+ is_structural = self._is_structural(pattern, positions)
339
+
340
+ # Check if delimiter
341
+ is_delimiter = self._is_delimiter(pattern, positions)
342
+
343
+ patterns.append(
344
+ LearnedPattern(
345
+ pattern=pattern,
346
+ frequency=frequency,
347
+ confidence=confidence,
348
+ positions=[p for _, p in positions],
349
+ context_before=context_before,
350
+ context_after=context_after,
351
+ is_structural=is_structural,
352
+ is_delimiter=is_delimiter,
353
+ )
354
+ )
355
+
356
+ return patterns
357
+
358
+ def _calculate_pattern_confidence(
359
+ self, pattern: bytes, positions: list[tuple[int, int]]
360
+ ) -> float:
361
+ """Calculate confidence score for pattern.
362
+
363
+ Args:
364
+ pattern: The pattern.
365
+ positions: List of (sample_idx, position) tuples.
366
+
367
+ Returns:
368
+ Confidence score (0-1).
369
+ """
370
+ if not positions:
371
+ return 0.0
372
+
373
+ # Factor 1: Frequency across samples
374
+ samples_with_pattern = len({p[0] for p in positions})
375
+ sample_coverage = samples_with_pattern / len(self._samples)
376
+
377
+ # Factor 2: Positional consistency
378
+ position_offsets = [p[1] for p in positions]
379
+ if len(position_offsets) > 1:
380
+ variance = float(np.var(position_offsets))
381
+ max_pos = max(max(len(s) for s in self._samples), 1)
382
+ position_consistency = 1.0 / (1.0 + variance / (max_pos**2))
383
+ else:
384
+ position_consistency = 0.5
385
+
386
+ # Factor 3: Pattern complexity (non-trivial patterns)
387
+ unique_bytes = len(set(pattern))
388
+ complexity = unique_bytes / len(pattern) if pattern else 0
389
+
390
+ # Combined score
391
+ confidence = 0.4 * sample_coverage + 0.3 * position_consistency + 0.3 * complexity
392
+
393
+ return float(min(1.0, confidence))
394
+
395
+ def _get_context(self, pattern: bytes, positions: list[tuple[int, int]]) -> tuple[bytes, bytes]:
396
+ """Get common context before and after pattern.
397
+
398
+ Args:
399
+ pattern: The pattern.
400
+ positions: List of (sample_idx, position) tuples.
401
+
402
+ Returns:
403
+ Tuple of (context_before, context_after).
404
+ """
405
+ before_bytes = []
406
+ after_bytes = []
407
+
408
+ context_len = min(4, self.min_pattern_length)
409
+
410
+ for sample_idx, pos in positions[:100]: # Limit samples
411
+ sample = self._samples[sample_idx]
412
+
413
+ # Bytes before
414
+ if pos >= context_len:
415
+ before_bytes.append(sample[pos - context_len : pos])
416
+
417
+ # Bytes after
418
+ end_pos = pos + len(pattern)
419
+ if end_pos + context_len <= len(sample):
420
+ after_bytes.append(sample[end_pos : end_pos + context_len])
421
+
422
+ # Find most common
423
+ context_before = b""
424
+ context_after = b""
425
+
426
+ if before_bytes:
427
+ counter = Counter(before_bytes)
428
+ most_common = counter.most_common(1)
429
+ if most_common and most_common[0][1] >= 2:
430
+ context_before = most_common[0][0]
431
+
432
+ if after_bytes:
433
+ counter = Counter(after_bytes)
434
+ most_common = counter.most_common(1)
435
+ if most_common and most_common[0][1] >= 2:
436
+ context_after = most_common[0][0]
437
+
438
+ return context_before, context_after
439
+
440
+ def _is_structural(self, pattern: bytes, positions: list[tuple[int, int]]) -> bool:
441
+ """Check if pattern appears structural.
442
+
443
+ Args:
444
+ pattern: The pattern.
445
+ positions: List of positions.
446
+
447
+ Returns:
448
+ True if pattern appears structural.
449
+ """
450
+ if not positions:
451
+ return False
452
+
453
+ # Structural patterns tend to appear at consistent offsets
454
+ offsets = [p[1] for p in positions]
455
+ if len(set(offsets)) == 1:
456
+ return True
457
+
458
+ # Or at regular intervals
459
+ if len(offsets) > 2:
460
+ diffs = [
461
+ offsets[i + 1] - offsets[i]
462
+ for i in range(len(offsets) - 1)
463
+ if offsets[i + 1] > offsets[i]
464
+ ]
465
+ if diffs and len(set(diffs)) == 1:
466
+ return True
467
+
468
+ return False
469
+
470
+ def _is_delimiter(self, pattern: bytes, positions: list[tuple[int, int]]) -> bool:
471
+ """Check if pattern appears to be a delimiter.
472
+
473
+ Args:
474
+ pattern: The pattern.
475
+ positions: List of positions.
476
+
477
+ Returns:
478
+ True if pattern appears to be a delimiter.
479
+ """
480
+ # Delimiters often have regular spacing
481
+ if not positions:
482
+ return False
483
+
484
+ # Group by sample
485
+ by_sample = defaultdict(list)
486
+ for sample_idx, pos in positions:
487
+ by_sample[sample_idx].append(pos)
488
+
489
+ regular_count = 0
490
+ for sample_positions in by_sample.values():
491
+ if len(sample_positions) >= 3:
492
+ diffs = [
493
+ sample_positions[i + 1] - sample_positions[i]
494
+ for i in range(len(sample_positions) - 1)
495
+ ]
496
+ # Check for regular intervals
497
+ if len(set(diffs)) == 1 or (diffs and max(diffs) - min(diffs) < 4):
498
+ regular_count += 1
499
+
500
+ return regular_count >= len(by_sample) * 0.5
501
+
502
+ def _detect_field_boundaries(self) -> list[int]:
503
+ """Detect field boundaries using entropy transitions."""
504
+ if not self._samples:
505
+ return []
506
+
507
+ # Use first sample or combined samples
508
+ combined = b"".join(self._samples[:10])
509
+
510
+ from oscura.analyzers.statistical.entropy import detect_entropy_transitions
511
+
512
+ try:
513
+ transitions = detect_entropy_transitions(combined, window=64, threshold=0.8, min_gap=4)
514
+ return [t.offset for t in transitions]
515
+ except ValueError:
516
+ return []
517
+
518
+ def _infer_field_types(self, boundaries: list[int]) -> list[str]:
519
+ """Infer field types based on content patterns.
520
+
521
+ Args:
522
+ boundaries: Field boundary offsets.
523
+
524
+ Returns:
525
+ List of inferred field types.
526
+ """
527
+ if not boundaries or not self._samples:
528
+ return []
529
+
530
+ field_types = []
531
+ sample = self._samples[0]
532
+ boundaries = [0] + boundaries + [len(sample)]
533
+
534
+ for i in range(len(boundaries) - 1):
535
+ start = boundaries[i]
536
+ end = min(boundaries[i + 1], len(sample))
537
+ field_data = sample[start:end]
538
+
539
+ field_type = self._classify_field(field_data)
540
+ field_types.append(field_type)
541
+
542
+ return field_types
543
+
544
+ def _classify_field(self, data: bytes) -> str:
545
+ """Classify a field based on its content.
546
+
547
+ Args:
548
+ data: Field data.
549
+
550
+ Returns:
551
+ Field type string.
552
+ """
553
+ if not data:
554
+ return "empty"
555
+
556
+ # Check for constant
557
+ if len(set(data)) == 1:
558
+ return "constant"
559
+
560
+ # Check for counter (monotonic)
561
+ if len(data) <= 4:
562
+ values = list(data)
563
+ if all(values[i] <= values[i + 1] for i in range(len(values) - 1)):
564
+ return "counter"
565
+
566
+ # Check for printable text
567
+ printable = sum(1 for b in data if 32 <= b <= 126)
568
+ if printable / len(data) > 0.8:
569
+ return "text"
570
+
571
+ # Check for high entropy (random/encrypted)
572
+ from oscura.analyzers.statistical.entropy import shannon_entropy
573
+
574
+ entropy = shannon_entropy(data)
575
+ if entropy > 7.0:
576
+ return "random"
577
+ elif entropy > 5.0:
578
+ return "binary"
579
+
580
+ return "structured"
581
+
582
+ def _estimate_header_size(self, boundaries: list[int]) -> int:
583
+ """Estimate header size from boundaries.
584
+
585
+ Args:
586
+ boundaries: Field boundary offsets.
587
+
588
+ Returns:
589
+ Estimated header size.
590
+ """
591
+ if not boundaries:
592
+ return 0
593
+
594
+ # Header typically ends at first high-entropy transition
595
+ for b in boundaries:
596
+ if b > 0:
597
+ return b
598
+
599
+ return boundaries[0] if boundaries else 0
600
+
601
+ def _detect_record_size(self) -> int | None:
602
+ """Detect fixed record size if present.
603
+
604
+ Returns:
605
+ Record size or None if variable.
606
+ """
607
+ if len(self._samples) < 2:
608
+ return None
609
+
610
+ # Check if all samples have same length
611
+ lengths = [len(s) for s in self._samples]
612
+ if len(set(lengths)) == 1:
613
+ return lengths[0]
614
+
615
+ # Check for GCD of lengths (might indicate record size)
616
+ from functools import reduce
617
+ from math import gcd
618
+
619
+ if all(length > 0 for length in lengths):
620
+ common_div = reduce(gcd, lengths)
621
+ if common_div > 1 and common_div != min(lengths):
622
+ return common_div
623
+
624
+ return None
625
+
626
+ def _find_delimiters(self) -> list[bytes]:
627
+ """Find delimiter patterns.
628
+
629
+ Returns:
630
+ List of likely delimiter bytes.
631
+ """
632
+ patterns = self.learn_patterns(top_k=50)
633
+ return [p.pattern for p in patterns if p.is_delimiter][:5]
634
+
635
+ def _calculate_structure_confidence(
636
+ self,
637
+ boundaries: list[int],
638
+ field_types: list[str],
639
+ record_size: int | None,
640
+ delimiters: list[bytes],
641
+ ) -> float:
642
+ """Calculate confidence in structure hypothesis.
643
+
644
+ Args:
645
+ boundaries: Detected boundaries.
646
+ field_types: Inferred types.
647
+ record_size: Detected record size.
648
+ delimiters: Found delimiters.
649
+
650
+ Returns:
651
+ Confidence score (0-1).
652
+ """
653
+ score = 0.0
654
+
655
+ # Having boundaries adds confidence
656
+ if boundaries:
657
+ score += 0.3
658
+
659
+ # Having non-unknown field types adds confidence
660
+ known_types = sum(1 for t in field_types if t != "structured")
661
+ if field_types:
662
+ score += 0.2 * (known_types / len(field_types))
663
+
664
+ # Fixed record size adds confidence
665
+ if record_size is not None:
666
+ score += 0.2
667
+
668
+ # Delimiters add confidence
669
+ if delimiters:
670
+ score += 0.2
671
+
672
+ # Multiple samples add confidence
673
+ if len(self._samples) > 5:
674
+ score += 0.1
675
+
676
+ return min(1.0, score)
677
+
678
+
679
+ def learn_patterns_from_data(
680
+ data: bytes | Sequence[bytes],
681
+ min_length: int = 2,
682
+ max_length: int = 16,
683
+ min_frequency: int = 3,
684
+ top_k: int = 20,
685
+ ) -> list[LearnedPattern]:
686
+ """Learn patterns from binary data.
687
+
688
+ Implements RE-PAT-004: Pattern Learning and Discovery.
689
+
690
+ Args:
691
+ data: Single data sample or list of samples.
692
+ min_length: Minimum pattern length.
693
+ max_length: Maximum pattern length.
694
+ min_frequency: Minimum occurrences.
695
+ top_k: Number of patterns to return.
696
+
697
+ Returns:
698
+ List of discovered patterns.
699
+
700
+ Example:
701
+ >>> patterns = learn_patterns_from_data(binary_data)
702
+ >>> for p in patterns:
703
+ ... print(f"Pattern: {p.pattern.hex()}, freq: {p.frequency}")
704
+ """
705
+ learner = PatternLearner(
706
+ min_pattern_length=min_length,
707
+ max_pattern_length=max_length,
708
+ min_frequency=min_frequency,
709
+ )
710
+
711
+ if isinstance(data, bytes):
712
+ learner.add_sample(data)
713
+ else:
714
+ learner.add_samples(data)
715
+
716
+ return learner.learn_patterns(top_k=top_k)
717
+
718
+
719
+ def infer_structure(samples: Sequence[bytes]) -> StructureHypothesis:
720
+ """Infer data structure from samples.
721
+
722
+ Implements RE-PAT-004: Structure inference.
723
+
724
+ Args:
725
+ samples: List of binary data samples.
726
+
727
+ Returns:
728
+ StructureHypothesis about data organization.
729
+
730
+ Example:
731
+ >>> hypothesis = infer_structure(packet_samples)
732
+ >>> print(f"Header size: {hypothesis.header_size}")
733
+ """
734
+ learner = PatternLearner()
735
+ learner.add_samples(samples)
736
+ return learner.learn_structure()
737
+
738
+
739
+ def find_recurring_structures(
740
+ data: bytes,
741
+ min_size: int = 8,
742
+ max_size: int = 256,
743
+ ) -> list[tuple[int, int, float]]:
744
+ """Find recurring fixed-size structures in data.
745
+
746
+ Implements RE-PAT-004: Structure detection.
747
+
748
+ Args:
749
+ data: Binary data.
750
+ min_size: Minimum structure size.
751
+ max_size: Maximum structure size.
752
+
753
+ Returns:
754
+ List of (size, offset, confidence) tuples for detected structures.
755
+ """
756
+ results = []
757
+
758
+ for size in range(min_size, min(max_size, len(data) // 2) + 1):
759
+ # Check if data divides evenly
760
+ if len(data) % size != 0:
761
+ continue
762
+
763
+ num_records = len(data) // size
764
+ if num_records < 2:
765
+ continue
766
+
767
+ # Compare records for similarity
768
+ records = [data[i * size : (i + 1) * size] for i in range(num_records)]
769
+
770
+ # Calculate similarity between consecutive records
771
+ similarities = []
772
+ for i in range(len(records) - 1):
773
+ matching = sum(a == b for a, b in zip(records[i], records[i + 1], strict=True))
774
+ similarities.append(matching / size)
775
+
776
+ if similarities:
777
+ avg_similarity = sum(similarities) / len(similarities)
778
+ if avg_similarity > 0.3: # Some structural similarity
779
+ results.append((size, 0, avg_similarity))
780
+
781
+ # Sort by confidence
782
+ results.sort(key=lambda x: -x[2])
783
+ return results[:5]
784
+
785
+
786
+ __all__ = [
787
+ # Data classes
788
+ "LearnedPattern",
789
+ "NgramModel",
790
+ # Classes
791
+ "PatternLearner",
792
+ "StructureHypothesis",
793
+ "find_recurring_structures",
794
+ "infer_structure",
795
+ # Functions
796
+ "learn_patterns_from_data",
797
+ ]