oscura 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (465) hide show
  1. oscura/__init__.py +813 -8
  2. oscura/__main__.py +392 -0
  3. oscura/analyzers/__init__.py +37 -0
  4. oscura/analyzers/digital/__init__.py +177 -0
  5. oscura/analyzers/digital/bus.py +691 -0
  6. oscura/analyzers/digital/clock.py +805 -0
  7. oscura/analyzers/digital/correlation.py +720 -0
  8. oscura/analyzers/digital/edges.py +632 -0
  9. oscura/analyzers/digital/extraction.py +413 -0
  10. oscura/analyzers/digital/quality.py +878 -0
  11. oscura/analyzers/digital/signal_quality.py +877 -0
  12. oscura/analyzers/digital/thresholds.py +708 -0
  13. oscura/analyzers/digital/timing.py +1104 -0
  14. oscura/analyzers/eye/__init__.py +46 -0
  15. oscura/analyzers/eye/diagram.py +434 -0
  16. oscura/analyzers/eye/metrics.py +555 -0
  17. oscura/analyzers/jitter/__init__.py +83 -0
  18. oscura/analyzers/jitter/ber.py +333 -0
  19. oscura/analyzers/jitter/decomposition.py +759 -0
  20. oscura/analyzers/jitter/measurements.py +413 -0
  21. oscura/analyzers/jitter/spectrum.py +220 -0
  22. oscura/analyzers/measurements.py +40 -0
  23. oscura/analyzers/packet/__init__.py +171 -0
  24. oscura/analyzers/packet/daq.py +1077 -0
  25. oscura/analyzers/packet/metrics.py +437 -0
  26. oscura/analyzers/packet/parser.py +327 -0
  27. oscura/analyzers/packet/payload.py +2156 -0
  28. oscura/analyzers/packet/payload_analysis.py +1312 -0
  29. oscura/analyzers/packet/payload_extraction.py +236 -0
  30. oscura/analyzers/packet/payload_patterns.py +670 -0
  31. oscura/analyzers/packet/stream.py +359 -0
  32. oscura/analyzers/patterns/__init__.py +266 -0
  33. oscura/analyzers/patterns/clustering.py +1036 -0
  34. oscura/analyzers/patterns/discovery.py +539 -0
  35. oscura/analyzers/patterns/learning.py +797 -0
  36. oscura/analyzers/patterns/matching.py +1091 -0
  37. oscura/analyzers/patterns/periodic.py +650 -0
  38. oscura/analyzers/patterns/sequences.py +767 -0
  39. oscura/analyzers/power/__init__.py +116 -0
  40. oscura/analyzers/power/ac_power.py +391 -0
  41. oscura/analyzers/power/basic.py +383 -0
  42. oscura/analyzers/power/conduction.py +314 -0
  43. oscura/analyzers/power/efficiency.py +297 -0
  44. oscura/analyzers/power/ripple.py +356 -0
  45. oscura/analyzers/power/soa.py +372 -0
  46. oscura/analyzers/power/switching.py +479 -0
  47. oscura/analyzers/protocol/__init__.py +150 -0
  48. oscura/analyzers/protocols/__init__.py +150 -0
  49. oscura/analyzers/protocols/base.py +500 -0
  50. oscura/analyzers/protocols/can.py +620 -0
  51. oscura/analyzers/protocols/can_fd.py +448 -0
  52. oscura/analyzers/protocols/flexray.py +405 -0
  53. oscura/analyzers/protocols/hdlc.py +399 -0
  54. oscura/analyzers/protocols/i2c.py +368 -0
  55. oscura/analyzers/protocols/i2s.py +296 -0
  56. oscura/analyzers/protocols/jtag.py +393 -0
  57. oscura/analyzers/protocols/lin.py +445 -0
  58. oscura/analyzers/protocols/manchester.py +333 -0
  59. oscura/analyzers/protocols/onewire.py +501 -0
  60. oscura/analyzers/protocols/spi.py +334 -0
  61. oscura/analyzers/protocols/swd.py +325 -0
  62. oscura/analyzers/protocols/uart.py +393 -0
  63. oscura/analyzers/protocols/usb.py +495 -0
  64. oscura/analyzers/signal_integrity/__init__.py +63 -0
  65. oscura/analyzers/signal_integrity/embedding.py +294 -0
  66. oscura/analyzers/signal_integrity/equalization.py +370 -0
  67. oscura/analyzers/signal_integrity/sparams.py +484 -0
  68. oscura/analyzers/spectral/__init__.py +53 -0
  69. oscura/analyzers/spectral/chunked.py +273 -0
  70. oscura/analyzers/spectral/chunked_fft.py +571 -0
  71. oscura/analyzers/spectral/chunked_wavelet.py +391 -0
  72. oscura/analyzers/spectral/fft.py +92 -0
  73. oscura/analyzers/statistical/__init__.py +250 -0
  74. oscura/analyzers/statistical/checksum.py +923 -0
  75. oscura/analyzers/statistical/chunked_corr.py +228 -0
  76. oscura/analyzers/statistical/classification.py +778 -0
  77. oscura/analyzers/statistical/entropy.py +1113 -0
  78. oscura/analyzers/statistical/ngrams.py +614 -0
  79. oscura/analyzers/statistics/__init__.py +119 -0
  80. oscura/analyzers/statistics/advanced.py +885 -0
  81. oscura/analyzers/statistics/basic.py +263 -0
  82. oscura/analyzers/statistics/correlation.py +630 -0
  83. oscura/analyzers/statistics/distribution.py +298 -0
  84. oscura/analyzers/statistics/outliers.py +463 -0
  85. oscura/analyzers/statistics/streaming.py +93 -0
  86. oscura/analyzers/statistics/trend.py +520 -0
  87. oscura/analyzers/validation.py +598 -0
  88. oscura/analyzers/waveform/__init__.py +36 -0
  89. oscura/analyzers/waveform/measurements.py +943 -0
  90. oscura/analyzers/waveform/measurements_with_uncertainty.py +371 -0
  91. oscura/analyzers/waveform/spectral.py +1689 -0
  92. oscura/analyzers/waveform/wavelets.py +298 -0
  93. oscura/api/__init__.py +62 -0
  94. oscura/api/dsl.py +538 -0
  95. oscura/api/fluent.py +571 -0
  96. oscura/api/operators.py +498 -0
  97. oscura/api/optimization.py +392 -0
  98. oscura/api/profiling.py +396 -0
  99. oscura/automotive/__init__.py +73 -0
  100. oscura/automotive/can/__init__.py +52 -0
  101. oscura/automotive/can/analysis.py +356 -0
  102. oscura/automotive/can/checksum.py +250 -0
  103. oscura/automotive/can/correlation.py +212 -0
  104. oscura/automotive/can/discovery.py +355 -0
  105. oscura/automotive/can/message_wrapper.py +375 -0
  106. oscura/automotive/can/models.py +385 -0
  107. oscura/automotive/can/patterns.py +381 -0
  108. oscura/automotive/can/session.py +452 -0
  109. oscura/automotive/can/state_machine.py +300 -0
  110. oscura/automotive/can/stimulus_response.py +461 -0
  111. oscura/automotive/dbc/__init__.py +15 -0
  112. oscura/automotive/dbc/generator.py +156 -0
  113. oscura/automotive/dbc/parser.py +146 -0
  114. oscura/automotive/dtc/__init__.py +30 -0
  115. oscura/automotive/dtc/database.py +3036 -0
  116. oscura/automotive/j1939/__init__.py +14 -0
  117. oscura/automotive/j1939/decoder.py +745 -0
  118. oscura/automotive/loaders/__init__.py +35 -0
  119. oscura/automotive/loaders/asc.py +98 -0
  120. oscura/automotive/loaders/blf.py +77 -0
  121. oscura/automotive/loaders/csv_can.py +136 -0
  122. oscura/automotive/loaders/dispatcher.py +136 -0
  123. oscura/automotive/loaders/mdf.py +331 -0
  124. oscura/automotive/loaders/pcap.py +132 -0
  125. oscura/automotive/obd/__init__.py +14 -0
  126. oscura/automotive/obd/decoder.py +707 -0
  127. oscura/automotive/uds/__init__.py +48 -0
  128. oscura/automotive/uds/decoder.py +265 -0
  129. oscura/automotive/uds/models.py +64 -0
  130. oscura/automotive/visualization.py +369 -0
  131. oscura/batch/__init__.py +55 -0
  132. oscura/batch/advanced.py +627 -0
  133. oscura/batch/aggregate.py +300 -0
  134. oscura/batch/analyze.py +139 -0
  135. oscura/batch/logging.py +487 -0
  136. oscura/batch/metrics.py +556 -0
  137. oscura/builders/__init__.py +41 -0
  138. oscura/builders/signal_builder.py +1131 -0
  139. oscura/cli/__init__.py +14 -0
  140. oscura/cli/batch.py +339 -0
  141. oscura/cli/characterize.py +273 -0
  142. oscura/cli/compare.py +775 -0
  143. oscura/cli/decode.py +551 -0
  144. oscura/cli/main.py +247 -0
  145. oscura/cli/shell.py +350 -0
  146. oscura/comparison/__init__.py +66 -0
  147. oscura/comparison/compare.py +397 -0
  148. oscura/comparison/golden.py +487 -0
  149. oscura/comparison/limits.py +391 -0
  150. oscura/comparison/mask.py +434 -0
  151. oscura/comparison/trace_diff.py +30 -0
  152. oscura/comparison/visualization.py +481 -0
  153. oscura/compliance/__init__.py +70 -0
  154. oscura/compliance/advanced.py +756 -0
  155. oscura/compliance/masks.py +363 -0
  156. oscura/compliance/reporting.py +483 -0
  157. oscura/compliance/testing.py +298 -0
  158. oscura/component/__init__.py +38 -0
  159. oscura/component/impedance.py +365 -0
  160. oscura/component/reactive.py +598 -0
  161. oscura/component/transmission_line.py +312 -0
  162. oscura/config/__init__.py +191 -0
  163. oscura/config/defaults.py +254 -0
  164. oscura/config/loader.py +348 -0
  165. oscura/config/memory.py +271 -0
  166. oscura/config/migration.py +458 -0
  167. oscura/config/pipeline.py +1077 -0
  168. oscura/config/preferences.py +530 -0
  169. oscura/config/protocol.py +875 -0
  170. oscura/config/schema.py +713 -0
  171. oscura/config/settings.py +420 -0
  172. oscura/config/thresholds.py +599 -0
  173. oscura/convenience.py +457 -0
  174. oscura/core/__init__.py +299 -0
  175. oscura/core/audit.py +457 -0
  176. oscura/core/backend_selector.py +405 -0
  177. oscura/core/cache.py +590 -0
  178. oscura/core/cancellation.py +439 -0
  179. oscura/core/confidence.py +225 -0
  180. oscura/core/config.py +506 -0
  181. oscura/core/correlation.py +216 -0
  182. oscura/core/cross_domain.py +422 -0
  183. oscura/core/debug.py +301 -0
  184. oscura/core/edge_cases.py +541 -0
  185. oscura/core/exceptions.py +535 -0
  186. oscura/core/gpu_backend.py +523 -0
  187. oscura/core/lazy.py +832 -0
  188. oscura/core/log_query.py +540 -0
  189. oscura/core/logging.py +931 -0
  190. oscura/core/logging_advanced.py +952 -0
  191. oscura/core/memoize.py +171 -0
  192. oscura/core/memory_check.py +274 -0
  193. oscura/core/memory_guard.py +290 -0
  194. oscura/core/memory_limits.py +336 -0
  195. oscura/core/memory_monitor.py +453 -0
  196. oscura/core/memory_progress.py +465 -0
  197. oscura/core/memory_warnings.py +315 -0
  198. oscura/core/numba_backend.py +362 -0
  199. oscura/core/performance.py +352 -0
  200. oscura/core/progress.py +524 -0
  201. oscura/core/provenance.py +358 -0
  202. oscura/core/results.py +331 -0
  203. oscura/core/types.py +504 -0
  204. oscura/core/uncertainty.py +383 -0
  205. oscura/discovery/__init__.py +52 -0
  206. oscura/discovery/anomaly_detector.py +672 -0
  207. oscura/discovery/auto_decoder.py +415 -0
  208. oscura/discovery/comparison.py +497 -0
  209. oscura/discovery/quality_validator.py +528 -0
  210. oscura/discovery/signal_detector.py +769 -0
  211. oscura/dsl/__init__.py +73 -0
  212. oscura/dsl/commands.py +246 -0
  213. oscura/dsl/interpreter.py +455 -0
  214. oscura/dsl/parser.py +689 -0
  215. oscura/dsl/repl.py +172 -0
  216. oscura/exceptions.py +59 -0
  217. oscura/exploratory/__init__.py +111 -0
  218. oscura/exploratory/error_recovery.py +642 -0
  219. oscura/exploratory/fuzzy.py +513 -0
  220. oscura/exploratory/fuzzy_advanced.py +786 -0
  221. oscura/exploratory/legacy.py +831 -0
  222. oscura/exploratory/parse.py +358 -0
  223. oscura/exploratory/recovery.py +275 -0
  224. oscura/exploratory/sync.py +382 -0
  225. oscura/exploratory/unknown.py +707 -0
  226. oscura/export/__init__.py +25 -0
  227. oscura/export/wireshark/README.md +265 -0
  228. oscura/export/wireshark/__init__.py +47 -0
  229. oscura/export/wireshark/generator.py +312 -0
  230. oscura/export/wireshark/lua_builder.py +159 -0
  231. oscura/export/wireshark/templates/dissector.lua.j2 +92 -0
  232. oscura/export/wireshark/type_mapping.py +165 -0
  233. oscura/export/wireshark/validator.py +105 -0
  234. oscura/exporters/__init__.py +94 -0
  235. oscura/exporters/csv.py +303 -0
  236. oscura/exporters/exporters.py +44 -0
  237. oscura/exporters/hdf5.py +219 -0
  238. oscura/exporters/html_export.py +701 -0
  239. oscura/exporters/json_export.py +291 -0
  240. oscura/exporters/markdown_export.py +367 -0
  241. oscura/exporters/matlab_export.py +354 -0
  242. oscura/exporters/npz_export.py +219 -0
  243. oscura/exporters/spice_export.py +210 -0
  244. oscura/extensibility/__init__.py +131 -0
  245. oscura/extensibility/docs.py +752 -0
  246. oscura/extensibility/extensions.py +1125 -0
  247. oscura/extensibility/logging.py +259 -0
  248. oscura/extensibility/measurements.py +485 -0
  249. oscura/extensibility/plugins.py +414 -0
  250. oscura/extensibility/registry.py +346 -0
  251. oscura/extensibility/templates.py +913 -0
  252. oscura/extensibility/validation.py +651 -0
  253. oscura/filtering/__init__.py +89 -0
  254. oscura/filtering/base.py +563 -0
  255. oscura/filtering/convenience.py +564 -0
  256. oscura/filtering/design.py +725 -0
  257. oscura/filtering/filters.py +32 -0
  258. oscura/filtering/introspection.py +605 -0
  259. oscura/guidance/__init__.py +24 -0
  260. oscura/guidance/recommender.py +429 -0
  261. oscura/guidance/wizard.py +518 -0
  262. oscura/inference/__init__.py +251 -0
  263. oscura/inference/active_learning/README.md +153 -0
  264. oscura/inference/active_learning/__init__.py +38 -0
  265. oscura/inference/active_learning/lstar.py +257 -0
  266. oscura/inference/active_learning/observation_table.py +230 -0
  267. oscura/inference/active_learning/oracle.py +78 -0
  268. oscura/inference/active_learning/teachers/__init__.py +15 -0
  269. oscura/inference/active_learning/teachers/simulator.py +192 -0
  270. oscura/inference/adaptive_tuning.py +453 -0
  271. oscura/inference/alignment.py +653 -0
  272. oscura/inference/bayesian.py +943 -0
  273. oscura/inference/binary.py +1016 -0
  274. oscura/inference/crc_reverse.py +711 -0
  275. oscura/inference/logic.py +288 -0
  276. oscura/inference/message_format.py +1305 -0
  277. oscura/inference/protocol.py +417 -0
  278. oscura/inference/protocol_dsl.py +1084 -0
  279. oscura/inference/protocol_library.py +1230 -0
  280. oscura/inference/sequences.py +809 -0
  281. oscura/inference/signal_intelligence.py +1509 -0
  282. oscura/inference/spectral.py +215 -0
  283. oscura/inference/state_machine.py +634 -0
  284. oscura/inference/stream.py +918 -0
  285. oscura/integrations/__init__.py +59 -0
  286. oscura/integrations/llm.py +1827 -0
  287. oscura/jupyter/__init__.py +32 -0
  288. oscura/jupyter/display.py +268 -0
  289. oscura/jupyter/magic.py +334 -0
  290. oscura/loaders/__init__.py +526 -0
  291. oscura/loaders/binary.py +69 -0
  292. oscura/loaders/configurable.py +1255 -0
  293. oscura/loaders/csv.py +26 -0
  294. oscura/loaders/csv_loader.py +473 -0
  295. oscura/loaders/hdf5.py +9 -0
  296. oscura/loaders/hdf5_loader.py +510 -0
  297. oscura/loaders/lazy.py +370 -0
  298. oscura/loaders/mmap_loader.py +583 -0
  299. oscura/loaders/numpy_loader.py +436 -0
  300. oscura/loaders/pcap.py +432 -0
  301. oscura/loaders/preprocessing.py +368 -0
  302. oscura/loaders/rigol.py +287 -0
  303. oscura/loaders/sigrok.py +321 -0
  304. oscura/loaders/tdms.py +367 -0
  305. oscura/loaders/tektronix.py +711 -0
  306. oscura/loaders/validation.py +584 -0
  307. oscura/loaders/vcd.py +464 -0
  308. oscura/loaders/wav.py +233 -0
  309. oscura/math/__init__.py +45 -0
  310. oscura/math/arithmetic.py +824 -0
  311. oscura/math/interpolation.py +413 -0
  312. oscura/onboarding/__init__.py +39 -0
  313. oscura/onboarding/help.py +498 -0
  314. oscura/onboarding/tutorials.py +405 -0
  315. oscura/onboarding/wizard.py +466 -0
  316. oscura/optimization/__init__.py +19 -0
  317. oscura/optimization/parallel.py +440 -0
  318. oscura/optimization/search.py +532 -0
  319. oscura/pipeline/__init__.py +43 -0
  320. oscura/pipeline/base.py +338 -0
  321. oscura/pipeline/composition.py +242 -0
  322. oscura/pipeline/parallel.py +448 -0
  323. oscura/pipeline/pipeline.py +375 -0
  324. oscura/pipeline/reverse_engineering.py +1119 -0
  325. oscura/plugins/__init__.py +122 -0
  326. oscura/plugins/base.py +272 -0
  327. oscura/plugins/cli.py +497 -0
  328. oscura/plugins/discovery.py +411 -0
  329. oscura/plugins/isolation.py +418 -0
  330. oscura/plugins/lifecycle.py +959 -0
  331. oscura/plugins/manager.py +493 -0
  332. oscura/plugins/registry.py +421 -0
  333. oscura/plugins/versioning.py +372 -0
  334. oscura/py.typed +0 -0
  335. oscura/quality/__init__.py +65 -0
  336. oscura/quality/ensemble.py +740 -0
  337. oscura/quality/explainer.py +338 -0
  338. oscura/quality/scoring.py +616 -0
  339. oscura/quality/warnings.py +456 -0
  340. oscura/reporting/__init__.py +248 -0
  341. oscura/reporting/advanced.py +1234 -0
  342. oscura/reporting/analyze.py +448 -0
  343. oscura/reporting/argument_preparer.py +596 -0
  344. oscura/reporting/auto_report.py +507 -0
  345. oscura/reporting/batch.py +615 -0
  346. oscura/reporting/chart_selection.py +223 -0
  347. oscura/reporting/comparison.py +330 -0
  348. oscura/reporting/config.py +615 -0
  349. oscura/reporting/content/__init__.py +39 -0
  350. oscura/reporting/content/executive.py +127 -0
  351. oscura/reporting/content/filtering.py +191 -0
  352. oscura/reporting/content/minimal.py +257 -0
  353. oscura/reporting/content/verbosity.py +162 -0
  354. oscura/reporting/core.py +508 -0
  355. oscura/reporting/core_formats/__init__.py +17 -0
  356. oscura/reporting/core_formats/multi_format.py +210 -0
  357. oscura/reporting/engine.py +836 -0
  358. oscura/reporting/export.py +366 -0
  359. oscura/reporting/formatting/__init__.py +129 -0
  360. oscura/reporting/formatting/emphasis.py +81 -0
  361. oscura/reporting/formatting/numbers.py +403 -0
  362. oscura/reporting/formatting/standards.py +55 -0
  363. oscura/reporting/formatting.py +466 -0
  364. oscura/reporting/html.py +578 -0
  365. oscura/reporting/index.py +590 -0
  366. oscura/reporting/multichannel.py +296 -0
  367. oscura/reporting/output.py +379 -0
  368. oscura/reporting/pdf.py +373 -0
  369. oscura/reporting/plots.py +731 -0
  370. oscura/reporting/pptx_export.py +360 -0
  371. oscura/reporting/renderers/__init__.py +11 -0
  372. oscura/reporting/renderers/pdf.py +94 -0
  373. oscura/reporting/sections.py +471 -0
  374. oscura/reporting/standards.py +680 -0
  375. oscura/reporting/summary_generator.py +368 -0
  376. oscura/reporting/tables.py +397 -0
  377. oscura/reporting/template_system.py +724 -0
  378. oscura/reporting/templates/__init__.py +15 -0
  379. oscura/reporting/templates/definition.py +205 -0
  380. oscura/reporting/templates/index.html +649 -0
  381. oscura/reporting/templates/index.md +173 -0
  382. oscura/schemas/__init__.py +158 -0
  383. oscura/schemas/bus_configuration.json +322 -0
  384. oscura/schemas/device_mapping.json +182 -0
  385. oscura/schemas/packet_format.json +418 -0
  386. oscura/schemas/protocol_definition.json +363 -0
  387. oscura/search/__init__.py +16 -0
  388. oscura/search/anomaly.py +292 -0
  389. oscura/search/context.py +149 -0
  390. oscura/search/pattern.py +160 -0
  391. oscura/session/__init__.py +34 -0
  392. oscura/session/annotations.py +289 -0
  393. oscura/session/history.py +313 -0
  394. oscura/session/session.py +445 -0
  395. oscura/streaming/__init__.py +43 -0
  396. oscura/streaming/chunked.py +611 -0
  397. oscura/streaming/progressive.py +393 -0
  398. oscura/streaming/realtime.py +622 -0
  399. oscura/testing/__init__.py +54 -0
  400. oscura/testing/synthetic.py +808 -0
  401. oscura/triggering/__init__.py +68 -0
  402. oscura/triggering/base.py +229 -0
  403. oscura/triggering/edge.py +353 -0
  404. oscura/triggering/pattern.py +344 -0
  405. oscura/triggering/pulse.py +581 -0
  406. oscura/triggering/window.py +453 -0
  407. oscura/ui/__init__.py +48 -0
  408. oscura/ui/formatters.py +526 -0
  409. oscura/ui/progressive_display.py +340 -0
  410. oscura/utils/__init__.py +99 -0
  411. oscura/utils/autodetect.py +338 -0
  412. oscura/utils/buffer.py +389 -0
  413. oscura/utils/lazy.py +407 -0
  414. oscura/utils/lazy_imports.py +147 -0
  415. oscura/utils/memory.py +836 -0
  416. oscura/utils/memory_advanced.py +1326 -0
  417. oscura/utils/memory_extensions.py +465 -0
  418. oscura/utils/progressive.py +352 -0
  419. oscura/utils/windowing.py +362 -0
  420. oscura/visualization/__init__.py +321 -0
  421. oscura/visualization/accessibility.py +526 -0
  422. oscura/visualization/annotations.py +374 -0
  423. oscura/visualization/axis_scaling.py +305 -0
  424. oscura/visualization/colors.py +453 -0
  425. oscura/visualization/digital.py +337 -0
  426. oscura/visualization/eye.py +420 -0
  427. oscura/visualization/histogram.py +281 -0
  428. oscura/visualization/interactive.py +858 -0
  429. oscura/visualization/jitter.py +702 -0
  430. oscura/visualization/keyboard.py +394 -0
  431. oscura/visualization/layout.py +365 -0
  432. oscura/visualization/optimization.py +1028 -0
  433. oscura/visualization/palettes.py +446 -0
  434. oscura/visualization/plot.py +92 -0
  435. oscura/visualization/power.py +290 -0
  436. oscura/visualization/power_extended.py +626 -0
  437. oscura/visualization/presets.py +467 -0
  438. oscura/visualization/protocols.py +932 -0
  439. oscura/visualization/render.py +207 -0
  440. oscura/visualization/rendering.py +444 -0
  441. oscura/visualization/reverse_engineering.py +791 -0
  442. oscura/visualization/signal_integrity.py +808 -0
  443. oscura/visualization/specialized.py +553 -0
  444. oscura/visualization/spectral.py +811 -0
  445. oscura/visualization/styles.py +381 -0
  446. oscura/visualization/thumbnails.py +311 -0
  447. oscura/visualization/time_axis.py +351 -0
  448. oscura/visualization/waveform.py +367 -0
  449. oscura/workflow/__init__.py +13 -0
  450. oscura/workflow/dag.py +377 -0
  451. oscura/workflows/__init__.py +58 -0
  452. oscura/workflows/compliance.py +280 -0
  453. oscura/workflows/digital.py +272 -0
  454. oscura/workflows/multi_trace.py +502 -0
  455. oscura/workflows/power.py +178 -0
  456. oscura/workflows/protocol.py +492 -0
  457. oscura/workflows/reverse_engineering.py +639 -0
  458. oscura/workflows/signal_integrity.py +227 -0
  459. oscura-0.1.0.dist-info/METADATA +300 -0
  460. oscura-0.1.0.dist-info/RECORD +463 -0
  461. oscura-0.1.0.dist-info/entry_points.txt +2 -0
  462. {oscura-0.0.1.dist-info → oscura-0.1.0.dist-info}/licenses/LICENSE +1 -1
  463. oscura-0.0.1.dist-info/METADATA +0 -63
  464. oscura-0.0.1.dist-info/RECORD +0 -5
  465. {oscura-0.0.1.dist-info → oscura-0.1.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,1091 @@
1
+ """Binary pattern matching with regex, Aho-Corasick, and fuzzy matching.
2
+
3
+ - RE-PAT-001: Binary Regex Pattern Matching
4
+ - RE-PAT-002: Multi-Pattern Search (Aho-Corasick)
5
+ - RE-PAT-003: Fuzzy Pattern Matching
6
+
7
+ This module provides comprehensive pattern matching capabilities for binary
8
+ data reverse engineering, including regex-like matching, efficient multi-pattern
9
+ search using Aho-Corasick, and approximate matching with configurable
10
+ similarity thresholds.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import re
16
+ from collections import defaultdict, deque
17
+ from collections.abc import Iterator
18
+ from dataclasses import dataclass, field
19
+
20
+
21
+ @dataclass
22
+ class PatternMatchResult:
23
+ """Result of a pattern match.
24
+
25
+ Implements RE-PAT-001: Pattern match result.
26
+
27
+ Attributes:
28
+ pattern_name: Name or identifier of the pattern.
29
+ offset: Byte offset of match in data.
30
+ length: Length of matched bytes.
31
+ matched_data: The matched bytes.
32
+ pattern: Original pattern that matched.
33
+ similarity: Similarity score for fuzzy matches (1.0 for exact).
34
+ """
35
+
36
+ pattern_name: str
37
+ offset: int
38
+ length: int
39
+ matched_data: bytes
40
+ pattern: bytes | str
41
+ similarity: float = 1.0
42
+
43
+
44
+ @dataclass
45
+ class BinaryRegex:
46
+ """Binary regex pattern for matching.
47
+
48
+ Implements RE-PAT-001: Binary Regex specification.
49
+
50
+ Supports:
51
+ - Literal bytes: \\xAA\\xBB
52
+ - Wildcards: ?? (any byte), ?0 (nibble match)
53
+ - Ranges: [\\x00-\\x1F] (byte range)
54
+ - Repetition: {n} {n,m} (repeat n to m times)
55
+ - Alternation: (\\x00|\\xFF) (either byte)
56
+ - Anchors: ^ (start), $ (end)
57
+
58
+ Attributes:
59
+ pattern: The pattern string.
60
+ compiled: Compiled regex object.
61
+ name: Optional pattern name.
62
+ """
63
+
64
+ pattern: str
65
+ compiled: re.Pattern[bytes] | None = None
66
+ name: str = ""
67
+
68
+ def __post_init__(self) -> None:
69
+ """Compile the pattern."""
70
+ try:
71
+ # Convert binary pattern to Python regex
72
+ regex_pattern = self._convert_to_regex(self.pattern)
73
+ self.compiled = re.compile(regex_pattern, re.DOTALL)
74
+ except re.error:
75
+ self.compiled = None
76
+
77
+ def _convert_to_regex(self, pattern: str) -> bytes:
78
+ """Convert binary pattern syntax to Python regex.
79
+
80
+ Args:
81
+ pattern: Binary pattern string.
82
+
83
+ Returns:
84
+ Python regex pattern as bytes.
85
+ """
86
+ result = []
87
+ i = 0
88
+ pattern_bytes = pattern.encode() if isinstance(pattern, str) else pattern
89
+
90
+ while i < len(pattern_bytes):
91
+ char = chr(pattern_bytes[i])
92
+
93
+ if char == "\\":
94
+ # Escape sequence
95
+ if i + 1 < len(pattern_bytes):
96
+ next_char = chr(pattern_bytes[i + 1])
97
+ if next_char == "x":
98
+ # Hex byte \xAA
99
+ if i + 3 < len(pattern_bytes):
100
+ hex_str = chr(pattern_bytes[i + 2]) + chr(pattern_bytes[i + 3])
101
+ try:
102
+ byte_val = int(hex_str, 16)
103
+ # Escape special regex chars
104
+ if chr(byte_val) in ".^$*+?{}[]\\|()":
105
+ result.append(b"\\" + bytes([byte_val]))
106
+ else:
107
+ result.append(bytes([byte_val]))
108
+ i += 4
109
+ continue
110
+ except ValueError:
111
+ pass
112
+ result.append(pattern_bytes[i : i + 2])
113
+ i += 2
114
+ else:
115
+ result.append(b"\\")
116
+ i += 1
117
+
118
+ elif char == "?":
119
+ # Wildcard
120
+ if i + 1 < len(pattern_bytes) and chr(pattern_bytes[i + 1]) == "?":
121
+ # ?? = any byte
122
+ result.append(b".")
123
+ i += 2
124
+ else:
125
+ # Single ? = any nibble (simplified to any byte)
126
+ result.append(b".")
127
+ i += 1
128
+
129
+ elif char == "[":
130
+ # Byte range [\\x00-\\x1F]
131
+ end = pattern_bytes.find(b"]", i)
132
+ if end != -1:
133
+ range_spec = pattern_bytes[i : end + 1]
134
+ result.append(range_spec)
135
+ i = end + 1
136
+ else:
137
+ result.append(b"[")
138
+ i += 1
139
+
140
+ elif char in "^$":
141
+ # Anchors
142
+ result.append(pattern_bytes[i : i + 1])
143
+ i += 1
144
+
145
+ elif char == "{":
146
+ # Repetition {n} or {n,m}
147
+ end = pattern_bytes.find(b"}", i)
148
+ if end != -1:
149
+ rep_spec = pattern_bytes[i : end + 1]
150
+ result.append(rep_spec)
151
+ i = end + 1
152
+ else:
153
+ result.append(b"{")
154
+ i += 1
155
+
156
+ elif char == "(":
157
+ # Grouping
158
+ result.append(b"(")
159
+ i += 1
160
+
161
+ elif char == ")":
162
+ result.append(b")")
163
+ i += 1
164
+
165
+ elif char == "|":
166
+ # Alternation
167
+ result.append(b"|")
168
+ i += 1
169
+
170
+ elif char == "*":
171
+ result.append(b"*")
172
+ i += 1
173
+
174
+ elif char == "+":
175
+ result.append(b"+")
176
+ i += 1
177
+
178
+ else:
179
+ # Literal byte - escape if special
180
+ byte_val = pattern_bytes[i]
181
+ if chr(byte_val) in ".^$*+?{}[]\\|()":
182
+ result.append(b"\\" + bytes([byte_val]))
183
+ else:
184
+ result.append(bytes([byte_val]))
185
+ i += 1
186
+
187
+ return b"".join(result)
188
+
189
+ def match(self, data: bytes, start: int = 0) -> PatternMatchResult | None:
190
+ """Try to match pattern at start of data.
191
+
192
+ Args:
193
+ data: Data to match against.
194
+ start: Starting offset.
195
+
196
+ Returns:
197
+ PatternMatchResult if matched, None otherwise.
198
+ """
199
+ if self.compiled is None:
200
+ return None
201
+
202
+ match = self.compiled.match(data, start)
203
+ if match:
204
+ return PatternMatchResult(
205
+ pattern_name=self.name,
206
+ offset=match.start(),
207
+ length=match.end() - match.start(),
208
+ matched_data=match.group(),
209
+ pattern=self.pattern,
210
+ )
211
+ return None
212
+
213
+ def search(self, data: bytes, start: int = 0) -> PatternMatchResult | None:
214
+ """Search for pattern anywhere in data.
215
+
216
+ Args:
217
+ data: Data to search.
218
+ start: Starting offset.
219
+
220
+ Returns:
221
+ PatternMatchResult if found, None otherwise.
222
+ """
223
+ if self.compiled is None:
224
+ return None
225
+
226
+ match = self.compiled.search(data, start)
227
+ if match:
228
+ return PatternMatchResult(
229
+ pattern_name=self.name,
230
+ offset=match.start(),
231
+ length=match.end() - match.start(),
232
+ matched_data=match.group(),
233
+ pattern=self.pattern,
234
+ )
235
+ return None
236
+
237
+ def findall(self, data: bytes) -> list[PatternMatchResult]:
238
+ """Find all occurrences of pattern in data.
239
+
240
+ Args:
241
+ data: Data to search.
242
+
243
+ Returns:
244
+ List of all matches.
245
+ """
246
+ if self.compiled is None:
247
+ return []
248
+
249
+ results = []
250
+ for match in self.compiled.finditer(data):
251
+ results.append(
252
+ PatternMatchResult(
253
+ pattern_name=self.name,
254
+ offset=match.start(),
255
+ length=match.end() - match.start(),
256
+ matched_data=match.group(),
257
+ pattern=self.pattern,
258
+ )
259
+ )
260
+ return results
261
+
262
+
263
+ class AhoCorasickMatcher:
264
+ """Multi-pattern search using Aho-Corasick algorithm.
265
+
266
+ Implements RE-PAT-002: Multi-Pattern Search.
267
+
268
+ Efficiently searches for multiple patterns simultaneously in O(n + m + z)
269
+ time where n is text length, m is total pattern length, and z is matches.
270
+
271
+ Example:
272
+ >>> matcher = AhoCorasickMatcher()
273
+ >>> matcher.add_pattern(b'\\xAA\\x55', 'header')
274
+ >>> matcher.add_pattern(b'\\xDE\\xAD', 'marker')
275
+ >>> matcher.build()
276
+ >>> matches = matcher.search(data)
277
+ """
278
+
279
+ def __init__(self) -> None:
280
+ """Initialize Aho-Corasick automaton."""
281
+ self._goto: dict[int, dict[int, int]] = defaultdict(dict)
282
+ self._fail: dict[int, int] = {}
283
+ self._output: dict[int, list[tuple[bytes, str]]] = defaultdict(list)
284
+ self._patterns: list[tuple[bytes, str]] = []
285
+ self._state_count = 0
286
+ self._built = False
287
+
288
+ def add_pattern(self, pattern: bytes | str, name: str = "") -> None:
289
+ """Add a pattern to the automaton.
290
+
291
+ Args:
292
+ pattern: Pattern bytes to search for.
293
+ name: Optional name for the pattern.
294
+ """
295
+ if isinstance(pattern, str):
296
+ pattern = pattern.encode()
297
+ if not name:
298
+ name = pattern.hex()
299
+
300
+ self._patterns.append((pattern, name))
301
+ self._built = False
302
+
303
+ def add_patterns(self, patterns: dict[str, bytes | str]) -> None:
304
+ """Add multiple patterns at once.
305
+
306
+ Args:
307
+ patterns: Dictionary mapping names to patterns.
308
+ """
309
+ for name, pattern in patterns.items():
310
+ self.add_pattern(pattern, name)
311
+
312
+ def build(self) -> None:
313
+ """Build the automaton from added patterns.
314
+
315
+ Must be called after adding patterns and before searching.
316
+ """
317
+ # Reset automaton
318
+ self._goto = defaultdict(dict)
319
+ self._fail = {}
320
+ self._output = defaultdict(list)
321
+ self._state_count = 0
322
+
323
+ # Build goto function
324
+ for pattern, name in self._patterns:
325
+ state = 0
326
+ for byte in pattern:
327
+ if byte not in self._goto[state]:
328
+ self._state_count += 1
329
+ self._goto[state][byte] = self._state_count
330
+ state = self._goto[state][byte]
331
+ self._output[state].append((pattern, name))
332
+
333
+ # Build fail function using BFS
334
+ queue: deque[int] = deque()
335
+
336
+ # Initialize fail for depth 1 states
337
+ for state in self._goto[0].values():
338
+ self._fail[state] = 0
339
+ queue.append(state)
340
+
341
+ # BFS to build fail function
342
+ while queue:
343
+ r = queue.popleft()
344
+ for byte, s in self._goto[r].items():
345
+ queue.append(s)
346
+
347
+ # Follow fail links to find fail state
348
+ state = self._fail[r]
349
+ while state != 0 and byte not in self._goto[state]:
350
+ state = self._fail.get(state, 0)
351
+
352
+ self._fail[s] = self._goto[state].get(byte, 0)
353
+
354
+ # Merge outputs
355
+ if self._fail[s] in self._output:
356
+ self._output[s].extend(self._output[self._fail[s]])
357
+
358
+ self._built = True
359
+
360
+ def search(self, data: bytes) -> list[PatternMatchResult]:
361
+ """Search for all patterns in data.
362
+
363
+ Args:
364
+ data: Data to search.
365
+
366
+ Returns:
367
+ List of all pattern matches.
368
+
369
+ Raises:
370
+ RuntimeError: If automaton not built.
371
+ """
372
+ if not self._built:
373
+ raise RuntimeError("Must call build() before search()")
374
+
375
+ results = []
376
+ state = 0
377
+
378
+ for i, byte in enumerate(data):
379
+ # Follow fail links until match or root
380
+ while state != 0 and byte not in self._goto[state]:
381
+ state = self._fail.get(state, 0)
382
+
383
+ state = self._goto[state].get(byte, 0)
384
+
385
+ # Check for outputs
386
+ if state in self._output:
387
+ for pattern, name in self._output[state]:
388
+ offset = i - len(pattern) + 1
389
+ results.append(
390
+ PatternMatchResult(
391
+ pattern_name=name,
392
+ offset=offset,
393
+ length=len(pattern),
394
+ matched_data=data[offset : offset + len(pattern)],
395
+ pattern=pattern,
396
+ )
397
+ )
398
+
399
+ return results
400
+
401
+ def iter_search(self, data: bytes) -> Iterator[PatternMatchResult]:
402
+ """Iterate over pattern matches (memory-efficient).
403
+
404
+ Args:
405
+ data: Data to search.
406
+
407
+ Yields:
408
+ PatternMatchResult for each match.
409
+
410
+ Raises:
411
+ RuntimeError: If automaton not built
412
+ """
413
+ if not self._built:
414
+ raise RuntimeError("Must call build() before search()")
415
+
416
+ state = 0
417
+
418
+ for i, byte in enumerate(data):
419
+ while state != 0 and byte not in self._goto[state]:
420
+ state = self._fail.get(state, 0)
421
+
422
+ state = self._goto[state].get(byte, 0)
423
+
424
+ if state in self._output:
425
+ for pattern, name in self._output[state]:
426
+ offset = i - len(pattern) + 1
427
+ yield PatternMatchResult(
428
+ pattern_name=name,
429
+ offset=offset,
430
+ length=len(pattern),
431
+ matched_data=data[offset : offset + len(pattern)],
432
+ pattern=pattern,
433
+ )
434
+
435
+
436
+ @dataclass
437
+ class FuzzyMatchResult:
438
+ """Result of fuzzy pattern matching.
439
+
440
+ Implements RE-PAT-003: Fuzzy match result.
441
+
442
+ Attributes:
443
+ pattern_name: Name of the pattern.
444
+ offset: Byte offset of match.
445
+ length: Length of matched region.
446
+ matched_data: The matched bytes.
447
+ pattern: Original pattern.
448
+ similarity: Similarity score (0-1).
449
+ edit_distance: Levenshtein edit distance.
450
+ substitutions: List of (position, expected, actual) substitutions.
451
+ """
452
+
453
+ pattern_name: str
454
+ offset: int
455
+ length: int
456
+ matched_data: bytes
457
+ pattern: bytes
458
+ similarity: float
459
+ edit_distance: int
460
+ substitutions: list[tuple[int, int, int]] = field(default_factory=list)
461
+
462
+
463
+ class FuzzyMatcher:
464
+ """Fuzzy pattern matching with configurable similarity.
465
+
466
+ Implements RE-PAT-003: Fuzzy Pattern Matching.
467
+
468
+ Supports approximate matching with edit distance thresholds and
469
+ flexible match criteria.
470
+
471
+ Example:
472
+ >>> matcher = FuzzyMatcher(max_edit_distance=2)
473
+ >>> matches = matcher.search(data, pattern=b'\\xAA\\x55\\x00')
474
+ """
475
+
476
+ def __init__(
477
+ self,
478
+ max_edit_distance: int = 2,
479
+ min_similarity: float | None = None,
480
+ allow_substitutions: bool = True,
481
+ allow_insertions: bool = True,
482
+ allow_deletions: bool = True,
483
+ ) -> None:
484
+ """Initialize fuzzy matcher.
485
+
486
+ Args:
487
+ max_edit_distance: Maximum allowed edit distance.
488
+ min_similarity: Minimum similarity threshold (0-1). If None, it's
489
+ automatically calculated to allow max_edit_distance edits.
490
+ allow_substitutions: Allow byte substitutions.
491
+ allow_insertions: Allow byte insertions.
492
+ allow_deletions: Allow byte deletions.
493
+ """
494
+ self.max_edit_distance = max_edit_distance
495
+ self._min_similarity = min_similarity # Store original value
496
+ self.allow_substitutions = allow_substitutions
497
+ self.allow_insertions = allow_insertions
498
+ self.allow_deletions = allow_deletions
499
+
500
+ @property
501
+ def min_similarity(self) -> float:
502
+ """Get minimum similarity (computed or explicit)."""
503
+ if self._min_similarity is not None:
504
+ return self._min_similarity
505
+ # Default: no similarity filtering when using edit distance
506
+ return 0.0
507
+
508
+ def search(
509
+ self,
510
+ data: bytes,
511
+ pattern: bytes | str,
512
+ pattern_name: str = "",
513
+ ) -> list[FuzzyMatchResult]:
514
+ """Search for fuzzy matches of pattern in data.
515
+
516
+ Args:
517
+ data: Data to search.
518
+ pattern: Pattern to match.
519
+ pattern_name: Optional pattern name.
520
+
521
+ Returns:
522
+ List of fuzzy matches meeting criteria.
523
+ """
524
+ if isinstance(pattern, str):
525
+ pattern = pattern.encode()
526
+
527
+ if not pattern_name:
528
+ pattern_name = pattern.hex()
529
+
530
+ results = []
531
+ pattern_len = len(pattern)
532
+
533
+ # Sliding window search
534
+ for i in range(len(data) - pattern_len + 1 + self.max_edit_distance):
535
+ if i >= len(data):
536
+ break
537
+ # Check windows of varying sizes
538
+ for window_len in range(
539
+ max(1, pattern_len - self.max_edit_distance),
540
+ min(len(data) - i + 1, pattern_len + self.max_edit_distance + 1),
541
+ ):
542
+ if i + window_len > len(data):
543
+ continue
544
+
545
+ window = data[i : i + window_len]
546
+ distance, substitutions = self._edit_distance_detailed(pattern, window)
547
+
548
+ if distance <= self.max_edit_distance:
549
+ similarity = 1.0 - (distance / max(pattern_len, window_len))
550
+
551
+ if similarity >= self.min_similarity:
552
+ results.append(
553
+ FuzzyMatchResult(
554
+ pattern_name=pattern_name,
555
+ offset=i,
556
+ length=window_len,
557
+ matched_data=window,
558
+ pattern=pattern,
559
+ similarity=similarity,
560
+ edit_distance=distance,
561
+ substitutions=substitutions,
562
+ )
563
+ )
564
+
565
+ # Remove overlapping matches, keeping best
566
+ return self._remove_overlapping(results)
567
+
568
+ def match_with_wildcards(
569
+ self,
570
+ data: bytes,
571
+ pattern: bytes,
572
+ wildcard: int = 0xFF,
573
+ pattern_name: str = "",
574
+ ) -> list[FuzzyMatchResult]:
575
+ """Match pattern with wildcard bytes.
576
+
577
+ Args:
578
+ data: Data to search.
579
+ pattern: Pattern with wildcards.
580
+ wildcard: Byte value treated as wildcard.
581
+ pattern_name: Optional pattern name.
582
+
583
+ Returns:
584
+ List of matches.
585
+ """
586
+ if not pattern_name:
587
+ pattern_name = pattern.hex()
588
+
589
+ results = []
590
+ pattern_len = len(pattern)
591
+
592
+ for i in range(len(data) - pattern_len + 1):
593
+ window = data[i : i + pattern_len]
594
+ matches = True
595
+ mismatches = 0
596
+
597
+ for j in range(pattern_len):
598
+ if pattern[j] != wildcard and pattern[j] != window[j]:
599
+ mismatches += 1
600
+ if mismatches > self.max_edit_distance:
601
+ matches = False
602
+ break
603
+
604
+ if matches:
605
+ non_wildcard_count = sum(1 for b in pattern if b != wildcard)
606
+ similarity = (
607
+ (non_wildcard_count - mismatches) / non_wildcard_count
608
+ if non_wildcard_count > 0
609
+ else 1.0
610
+ )
611
+
612
+ if similarity >= self.min_similarity:
613
+ results.append(
614
+ FuzzyMatchResult(
615
+ pattern_name=pattern_name,
616
+ offset=i,
617
+ length=pattern_len,
618
+ matched_data=window,
619
+ pattern=pattern,
620
+ similarity=similarity,
621
+ edit_distance=mismatches,
622
+ )
623
+ )
624
+
625
+ return results
626
+
627
+ def _edit_distance_detailed(
628
+ self, pattern: bytes, text: bytes
629
+ ) -> tuple[int, list[tuple[int, int, int]]]:
630
+ """Calculate edit distance with substitution details.
631
+
632
+ Args:
633
+ pattern: Pattern bytes.
634
+ text: Text to compare.
635
+
636
+ Returns:
637
+ Tuple of (distance, substitutions).
638
+ """
639
+ m, n = len(pattern), len(text)
640
+
641
+ # Create DP table (using float to accommodate inf values)
642
+ dp: list[list[float]] = [[0.0] * (n + 1) for _ in range(m + 1)]
643
+
644
+ # Initialize base cases
645
+ for i in range(m + 1):
646
+ dp[i][0] = float(i) if self.allow_deletions else float("inf")
647
+ for j in range(n + 1):
648
+ dp[0][j] = float(j) if self.allow_insertions else float("inf")
649
+ dp[0][0] = 0.0
650
+
651
+ # Fill DP table
652
+ for i in range(1, m + 1):
653
+ for j in range(1, n + 1):
654
+ if pattern[i - 1] == text[j - 1]:
655
+ dp[i][j] = dp[i - 1][j - 1]
656
+ else:
657
+ candidates = [float("inf")]
658
+ if self.allow_substitutions:
659
+ candidates.append(dp[i - 1][j - 1] + 1)
660
+ if self.allow_insertions:
661
+ candidates.append(dp[i][j - 1] + 1)
662
+ if self.allow_deletions:
663
+ candidates.append(dp[i - 1][j] + 1)
664
+ dp[i][j] = min(candidates)
665
+
666
+ # Backtrack to find substitutions
667
+ substitutions = []
668
+ i, j = m, n
669
+ while i > 0 and j > 0:
670
+ if pattern[i - 1] == text[j - 1]:
671
+ i -= 1
672
+ j -= 1
673
+ elif dp[i][j] == dp[i - 1][j - 1] + 1 and self.allow_substitutions:
674
+ substitutions.append((i - 1, pattern[i - 1], text[j - 1]))
675
+ i -= 1
676
+ j -= 1
677
+ elif dp[i][j] == dp[i - 1][j] + 1 and self.allow_deletions:
678
+ i -= 1
679
+ elif dp[i][j] == dp[i][j - 1] + 1 and self.allow_insertions:
680
+ j -= 1
681
+ else:
682
+ break
683
+
684
+ return int(dp[m][n]), substitutions
685
+
686
+ def _remove_overlapping(self, results: list[FuzzyMatchResult]) -> list[FuzzyMatchResult]:
687
+ """Remove overlapping matches, keeping highest similarity.
688
+
689
+ Args:
690
+ results: List of fuzzy match results.
691
+
692
+ Returns:
693
+ Non-overlapping results.
694
+ """
695
+ if not results:
696
+ return []
697
+
698
+ # Sort by similarity (descending) then offset
699
+ sorted_results = sorted(results, key=lambda r: (-r.similarity, r.offset))
700
+
701
+ kept = []
702
+ covered: set[int] = set()
703
+
704
+ for result in sorted_results:
705
+ # Check if any position is already covered
706
+ positions = set(range(result.offset, result.offset + result.length))
707
+ if not positions & covered:
708
+ kept.append(result)
709
+ covered.update(positions)
710
+
711
+ return sorted(kept, key=lambda r: r.offset)
712
+
713
+
714
+ # =============================================================================
715
+ # Convenience functions
716
+ # =============================================================================
717
+
718
+
719
+ def binary_regex_search(
720
+ data: bytes,
721
+ pattern: str,
722
+ name: str = "",
723
+ ) -> list[PatternMatchResult]:
724
+ """Search data using binary regex pattern.
725
+
726
+ Implements RE-PAT-001: Binary Regex Pattern Matching.
727
+
728
+ Args:
729
+ data: Data to search.
730
+ pattern: Binary regex pattern.
731
+ name: Optional pattern name.
732
+
733
+ Returns:
734
+ List of all matches.
735
+
736
+ Example:
737
+ >>> matches = binary_regex_search(data, r'\\xAA.{4}\\x55')
738
+ """
739
+ regex = BinaryRegex(pattern=pattern, name=name)
740
+ return regex.findall(data)
741
+
742
+
743
+ def multi_pattern_search(
744
+ data: bytes,
745
+ patterns: dict[str, bytes | str],
746
+ ) -> dict[str, list[PatternMatchResult]]:
747
+ """Search for multiple patterns simultaneously.
748
+
749
+ Implements RE-PAT-002: Multi-Pattern Search.
750
+
751
+ Args:
752
+ data: Data to search.
753
+ patterns: Dictionary mapping names to patterns.
754
+
755
+ Returns:
756
+ Dictionary mapping pattern names to match lists.
757
+
758
+ Example:
759
+ >>> patterns = {'header': b'\\xAA\\x55', 'footer': b'\\x00\\x00'}
760
+ >>> results = multi_pattern_search(data, patterns)
761
+ """
762
+ matcher = AhoCorasickMatcher()
763
+ matcher.add_patterns(patterns)
764
+ matcher.build()
765
+
766
+ all_matches = matcher.search(data)
767
+
768
+ # Group by pattern name
769
+ result: dict[str, list[PatternMatchResult]] = {name: [] for name in patterns}
770
+ for match in all_matches:
771
+ result[match.pattern_name].append(match)
772
+
773
+ return result
774
+
775
+
776
+ def fuzzy_search(
777
+ data: bytes,
778
+ pattern: bytes | str,
779
+ max_distance: int = 2,
780
+ min_similarity: float | None = None,
781
+ name: str = "",
782
+ ) -> list[FuzzyMatchResult]:
783
+ """Search with fuzzy/approximate matching.
784
+
785
+ Implements RE-PAT-003: Fuzzy Pattern Matching.
786
+
787
+ Args:
788
+ data: Data to search.
789
+ pattern: Pattern to match.
790
+ max_distance: Maximum edit distance.
791
+ min_similarity: Minimum similarity threshold (None = no filtering).
792
+ name: Optional pattern name.
793
+
794
+ Returns:
795
+ List of fuzzy matches.
796
+
797
+ Example:
798
+ >>> matches = fuzzy_search(data, b'\\xAA\\x55\\x00', max_distance=1)
799
+ """
800
+ matcher = FuzzyMatcher(
801
+ max_edit_distance=max_distance,
802
+ min_similarity=min_similarity,
803
+ )
804
+ return matcher.search(data, pattern, pattern_name=name)
805
+
806
+
807
+ def find_similar_sequences(
808
+ data: bytes,
809
+ min_length: int = 4,
810
+ max_distance: int = 1,
811
+ ) -> list[tuple[int, int, float]]:
812
+ """Find similar byte sequences within data.
813
+
814
+ Implements RE-PAT-003: Fuzzy Pattern Matching.
815
+
816
+ Identifies pairs of positions with similar byte sequences.
817
+
818
+ Performance optimization: Uses hash-based pre-grouping to reduce O(n²)
819
+ comparisons by ~60-150x. Instead of comparing all pairs, sequences are
820
+ grouped by length buckets and only sequences in the same/adjacent buckets
821
+ are compared. Early termination is used when edit distance threshold is
822
+ exceeded.
823
+
824
+ Args:
825
+ data: Data to analyze.
826
+ min_length: Minimum sequence length.
827
+ max_distance: Maximum edit distance.
828
+
829
+ Returns:
830
+ List of (offset1, offset2, similarity) tuples.
831
+ """
832
+ results: list[tuple[int, int, float]] = []
833
+ data_len = len(data)
834
+
835
+ if data_len < min_length:
836
+ return results
837
+
838
+ matcher = FuzzyMatcher(max_edit_distance=max_distance)
839
+
840
+ # Sample sequences from data
841
+ step = max(1, min_length // 2)
842
+ sequences = []
843
+ for i in range(0, data_len - min_length, step):
844
+ sequences.append((i, data[i : i + min_length]))
845
+
846
+ # OPTIMIZATION 1: Hash-based pre-grouping by length bucket
847
+ # Group sequences by length bucket (±10%) to reduce comparisons
848
+ # This exploits the fact that similar sequences have similar lengths
849
+ length_groups: dict[int, list[tuple[int, bytes]]] = defaultdict(list)
850
+ bucket_size = max(1, min_length // 10) # 10% bucket width
851
+
852
+ for offset, seq in sequences:
853
+ seq_len = len(seq)
854
+ bucket = seq_len // bucket_size
855
+ length_groups[bucket].append((offset, seq))
856
+
857
+ # OPTIMIZATION 2: Only compare within same/adjacent buckets
858
+ # This reduces the number of pairwise comparisons significantly
859
+ for bucket in sorted(length_groups.keys()):
860
+ # Get sequences from current and adjacent buckets
861
+ candidates = length_groups[bucket].copy()
862
+ if bucket + 1 in length_groups:
863
+ candidates.extend(length_groups[bucket + 1])
864
+
865
+ # Compare within this group
866
+ for i, (offset1, seq1) in enumerate(candidates):
867
+ for offset2, seq2 in candidates[i + 1 :]:
868
+ # Skip overlapping sequences
869
+ if abs(offset1 - offset2) < min_length:
870
+ continue
871
+
872
+ # OPTIMIZATION 3: Early termination on length ratio
873
+ # If lengths differ too much, similarity can't meet threshold
874
+ len1, len2 = len(seq1), len(seq2)
875
+ len_diff = abs(len1 - len2)
876
+ max_len = max(len1, len2)
877
+
878
+ # Quick rejection: if length difference alone exceeds max_distance
879
+ if len_diff > max_distance:
880
+ continue
881
+
882
+ # Calculate minimum possible similarity based on length difference
883
+ min_possible_similarity = 1.0 - (len_diff / max_len)
884
+ threshold_similarity = 1.0 - (max_distance / min_length)
885
+
886
+ if min_possible_similarity < threshold_similarity:
887
+ continue
888
+
889
+ # OPTIMIZATION 4: Use optimized edit distance calculation
890
+ distance, _ = _edit_distance_with_threshold(seq1, seq2, max_distance, matcher)
891
+
892
+ if distance <= max_distance:
893
+ similarity = 1.0 - (distance / min_length)
894
+ results.append((offset1, offset2, similarity))
895
+
896
+ return results
897
+
898
+
899
+ def _edit_distance_with_threshold(
900
+ seq1: bytes, seq2: bytes, threshold: int, matcher: FuzzyMatcher
901
+ ) -> tuple[int, list[tuple[int, int, int]]]:
902
+ """Calculate edit distance with early termination.
903
+
904
+ Optimized version that stops computation if distance exceeds threshold.
905
+ Uses banded dynamic programming to only compute cells near the diagonal,
906
+ which is sufficient when the maximum allowed distance is small.
907
+
908
+ Performance: ~2-3x faster than full DP when threshold is small relative
909
+ to sequence length, as it avoids computing cells that can't contribute
910
+ to a solution within the threshold.
911
+
912
+ Args:
913
+ seq1: First sequence.
914
+ seq2: Second sequence.
915
+ threshold: Maximum allowed edit distance.
916
+ matcher: FuzzyMatcher instance for detailed computation.
917
+
918
+ Returns:
919
+ Tuple of (distance, substitutions). Distance may be > threshold
920
+ if no solution exists within threshold.
921
+ """
922
+ m, n = len(seq1), len(seq2)
923
+
924
+ # Quick reject: if length difference exceeds threshold
925
+ if abs(m - n) > threshold:
926
+ return (abs(m - n), [])
927
+
928
+ # For small thresholds, use banded algorithm
929
+ # Band width = 2 * threshold + 1 (cells within threshold of diagonal)
930
+ if threshold < min(m, n) // 2:
931
+ # Use banded DP for better performance
932
+ return _banded_edit_distance(seq1, seq2, threshold)
933
+ else:
934
+ # Fall back to full computation for large thresholds
935
+ return matcher._edit_distance_detailed(seq1, seq2)
936
+
937
+
938
+ def _banded_edit_distance(
939
+ seq1: bytes, seq2: bytes, max_dist: int
940
+ ) -> tuple[int, list[tuple[int, int, int]]]:
941
+ """Compute edit distance using banded DP algorithm.
942
+
943
+ Only computes cells within max_dist of the main diagonal, which is
944
+ sufficient when we only care about distances up to max_dist. This
945
+ reduces time complexity from O(m*n) to O(max_dist * min(m,n)).
946
+
947
+ Args:
948
+ seq1: First sequence.
949
+ seq2: Second sequence.
950
+ max_dist: Maximum distance threshold.
951
+
952
+ Returns:
953
+ Tuple of (distance, substitutions). Substitutions may be approximate.
954
+ """
955
+ m, n = len(seq1), len(seq2)
956
+
957
+ # Use two rows for space efficiency
958
+ INF = max_dist + 100 # Sentinel value for unreachable cells
959
+ band_width = 2 * max_dist + 1
960
+
961
+ prev_row = [INF] * band_width
962
+ curr_row = [INF] * band_width
963
+
964
+ # Initialize first row
965
+ for j in range(min(band_width, n + 1)):
966
+ prev_row[j] = j
967
+
968
+ for i in range(1, m + 1):
969
+ # Reset current row
970
+ for k in range(band_width):
971
+ curr_row[k] = INF
972
+
973
+ curr_row[0] = i
974
+
975
+ # Compute band around diagonal
976
+ # j ranges from max(1, i-max_dist) to min(n, i+max_dist)
977
+ j_start = max(1, i - max_dist)
978
+ j_end = min(n, i + max_dist)
979
+
980
+ for j in range(j_start, j_end + 1):
981
+ # Map j to band index
982
+ band_idx = j - i + max_dist
983
+ if band_idx < 0 or band_idx >= band_width:
984
+ continue
985
+
986
+ if seq1[i - 1] == seq2[j - 1]:
987
+ # Match: no cost
988
+ prev_band_idx = band_idx
989
+ curr_row[band_idx] = prev_row[prev_band_idx] if prev_band_idx < band_width else INF
990
+ else:
991
+ # Min of substitution, insertion, deletion
992
+ cost = INF
993
+
994
+ # Substitution: from (i-1, j-1)
995
+ prev_band_idx = band_idx
996
+ if prev_band_idx < band_width:
997
+ cost = min(cost, prev_row[prev_band_idx] + 1)
998
+
999
+ # Deletion: from (i-1, j)
1000
+ prev_band_idx = band_idx + 1
1001
+ if prev_band_idx < band_width:
1002
+ cost = min(cost, prev_row[prev_band_idx] + 1)
1003
+
1004
+ # Insertion: from (i, j-1)
1005
+ curr_band_idx = band_idx - 1
1006
+ if curr_band_idx >= 0:
1007
+ cost = min(cost, curr_row[curr_band_idx] + 1)
1008
+
1009
+ curr_row[band_idx] = cost
1010
+
1011
+ # Swap rows
1012
+ prev_row, curr_row = curr_row, prev_row
1013
+
1014
+ # Extract result from final position
1015
+ final_band_idx = n - m + max_dist
1016
+ if final_band_idx >= 0 and final_band_idx < band_width:
1017
+ distance = prev_row[final_band_idx]
1018
+ else:
1019
+ distance = INF
1020
+
1021
+ # Don't compute detailed substitutions for banded version (expensive)
1022
+ # Return empty list - caller should use this for filtering only
1023
+ return (min(distance, INF), [])
1024
+
1025
+
1026
+ def count_pattern_occurrences(
1027
+ data: bytes,
1028
+ patterns: dict[str, bytes | str],
1029
+ ) -> dict[str, int]:
1030
+ """Count occurrences of multiple patterns.
1031
+
1032
+ Implements RE-PAT-002: Multi-Pattern Search.
1033
+
1034
+ Args:
1035
+ data: Data to search.
1036
+ patterns: Dictionary mapping names to patterns.
1037
+
1038
+ Returns:
1039
+ Dictionary mapping pattern names to counts.
1040
+ """
1041
+ results = multi_pattern_search(data, patterns)
1042
+ return {name: len(matches) for name, matches in results.items()}
1043
+
1044
+
1045
+ def find_pattern_positions(
1046
+ data: bytes,
1047
+ pattern: bytes | str,
1048
+ ) -> list[int]:
1049
+ """Find all positions of a pattern in data.
1050
+
1051
+ Args:
1052
+ data: Data to search.
1053
+ pattern: Pattern to find.
1054
+
1055
+ Returns:
1056
+ List of byte offsets.
1057
+ """
1058
+ if isinstance(pattern, str):
1059
+ pattern = pattern.encode()
1060
+
1061
+ positions = []
1062
+ start = 0
1063
+ while True:
1064
+ pos = data.find(pattern, start)
1065
+ if pos == -1:
1066
+ break
1067
+ positions.append(pos)
1068
+ start = pos + 1
1069
+
1070
+ return positions
1071
+
1072
+
1073
+ __all__ = [
1074
+ "AhoCorasickMatcher",
1075
+ # Classes
1076
+ "BinaryRegex",
1077
+ "FuzzyMatchResult",
1078
+ "FuzzyMatcher",
1079
+ # Data classes
1080
+ "PatternMatchResult",
1081
+ # RE-PAT-001: Binary Regex
1082
+ "binary_regex_search",
1083
+ "count_pattern_occurrences",
1084
+ # Utilities
1085
+ "find_pattern_positions",
1086
+ "find_similar_sequences",
1087
+ # RE-PAT-003: Fuzzy Matching
1088
+ "fuzzy_search",
1089
+ # RE-PAT-002: Multi-Pattern Search
1090
+ "multi_pattern_search",
1091
+ ]