oscura 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (465) hide show
  1. oscura/__init__.py +813 -8
  2. oscura/__main__.py +392 -0
  3. oscura/analyzers/__init__.py +37 -0
  4. oscura/analyzers/digital/__init__.py +177 -0
  5. oscura/analyzers/digital/bus.py +691 -0
  6. oscura/analyzers/digital/clock.py +805 -0
  7. oscura/analyzers/digital/correlation.py +720 -0
  8. oscura/analyzers/digital/edges.py +632 -0
  9. oscura/analyzers/digital/extraction.py +413 -0
  10. oscura/analyzers/digital/quality.py +878 -0
  11. oscura/analyzers/digital/signal_quality.py +877 -0
  12. oscura/analyzers/digital/thresholds.py +708 -0
  13. oscura/analyzers/digital/timing.py +1104 -0
  14. oscura/analyzers/eye/__init__.py +46 -0
  15. oscura/analyzers/eye/diagram.py +434 -0
  16. oscura/analyzers/eye/metrics.py +555 -0
  17. oscura/analyzers/jitter/__init__.py +83 -0
  18. oscura/analyzers/jitter/ber.py +333 -0
  19. oscura/analyzers/jitter/decomposition.py +759 -0
  20. oscura/analyzers/jitter/measurements.py +413 -0
  21. oscura/analyzers/jitter/spectrum.py +220 -0
  22. oscura/analyzers/measurements.py +40 -0
  23. oscura/analyzers/packet/__init__.py +171 -0
  24. oscura/analyzers/packet/daq.py +1077 -0
  25. oscura/analyzers/packet/metrics.py +437 -0
  26. oscura/analyzers/packet/parser.py +327 -0
  27. oscura/analyzers/packet/payload.py +2156 -0
  28. oscura/analyzers/packet/payload_analysis.py +1312 -0
  29. oscura/analyzers/packet/payload_extraction.py +236 -0
  30. oscura/analyzers/packet/payload_patterns.py +670 -0
  31. oscura/analyzers/packet/stream.py +359 -0
  32. oscura/analyzers/patterns/__init__.py +266 -0
  33. oscura/analyzers/patterns/clustering.py +1036 -0
  34. oscura/analyzers/patterns/discovery.py +539 -0
  35. oscura/analyzers/patterns/learning.py +797 -0
  36. oscura/analyzers/patterns/matching.py +1091 -0
  37. oscura/analyzers/patterns/periodic.py +650 -0
  38. oscura/analyzers/patterns/sequences.py +767 -0
  39. oscura/analyzers/power/__init__.py +116 -0
  40. oscura/analyzers/power/ac_power.py +391 -0
  41. oscura/analyzers/power/basic.py +383 -0
  42. oscura/analyzers/power/conduction.py +314 -0
  43. oscura/analyzers/power/efficiency.py +297 -0
  44. oscura/analyzers/power/ripple.py +356 -0
  45. oscura/analyzers/power/soa.py +372 -0
  46. oscura/analyzers/power/switching.py +479 -0
  47. oscura/analyzers/protocol/__init__.py +150 -0
  48. oscura/analyzers/protocols/__init__.py +150 -0
  49. oscura/analyzers/protocols/base.py +500 -0
  50. oscura/analyzers/protocols/can.py +620 -0
  51. oscura/analyzers/protocols/can_fd.py +448 -0
  52. oscura/analyzers/protocols/flexray.py +405 -0
  53. oscura/analyzers/protocols/hdlc.py +399 -0
  54. oscura/analyzers/protocols/i2c.py +368 -0
  55. oscura/analyzers/protocols/i2s.py +296 -0
  56. oscura/analyzers/protocols/jtag.py +393 -0
  57. oscura/analyzers/protocols/lin.py +445 -0
  58. oscura/analyzers/protocols/manchester.py +333 -0
  59. oscura/analyzers/protocols/onewire.py +501 -0
  60. oscura/analyzers/protocols/spi.py +334 -0
  61. oscura/analyzers/protocols/swd.py +325 -0
  62. oscura/analyzers/protocols/uart.py +393 -0
  63. oscura/analyzers/protocols/usb.py +495 -0
  64. oscura/analyzers/signal_integrity/__init__.py +63 -0
  65. oscura/analyzers/signal_integrity/embedding.py +294 -0
  66. oscura/analyzers/signal_integrity/equalization.py +370 -0
  67. oscura/analyzers/signal_integrity/sparams.py +484 -0
  68. oscura/analyzers/spectral/__init__.py +53 -0
  69. oscura/analyzers/spectral/chunked.py +273 -0
  70. oscura/analyzers/spectral/chunked_fft.py +571 -0
  71. oscura/analyzers/spectral/chunked_wavelet.py +391 -0
  72. oscura/analyzers/spectral/fft.py +92 -0
  73. oscura/analyzers/statistical/__init__.py +250 -0
  74. oscura/analyzers/statistical/checksum.py +923 -0
  75. oscura/analyzers/statistical/chunked_corr.py +228 -0
  76. oscura/analyzers/statistical/classification.py +778 -0
  77. oscura/analyzers/statistical/entropy.py +1113 -0
  78. oscura/analyzers/statistical/ngrams.py +614 -0
  79. oscura/analyzers/statistics/__init__.py +119 -0
  80. oscura/analyzers/statistics/advanced.py +885 -0
  81. oscura/analyzers/statistics/basic.py +263 -0
  82. oscura/analyzers/statistics/correlation.py +630 -0
  83. oscura/analyzers/statistics/distribution.py +298 -0
  84. oscura/analyzers/statistics/outliers.py +463 -0
  85. oscura/analyzers/statistics/streaming.py +93 -0
  86. oscura/analyzers/statistics/trend.py +520 -0
  87. oscura/analyzers/validation.py +598 -0
  88. oscura/analyzers/waveform/__init__.py +36 -0
  89. oscura/analyzers/waveform/measurements.py +943 -0
  90. oscura/analyzers/waveform/measurements_with_uncertainty.py +371 -0
  91. oscura/analyzers/waveform/spectral.py +1689 -0
  92. oscura/analyzers/waveform/wavelets.py +298 -0
  93. oscura/api/__init__.py +62 -0
  94. oscura/api/dsl.py +538 -0
  95. oscura/api/fluent.py +571 -0
  96. oscura/api/operators.py +498 -0
  97. oscura/api/optimization.py +392 -0
  98. oscura/api/profiling.py +396 -0
  99. oscura/automotive/__init__.py +73 -0
  100. oscura/automotive/can/__init__.py +52 -0
  101. oscura/automotive/can/analysis.py +356 -0
  102. oscura/automotive/can/checksum.py +250 -0
  103. oscura/automotive/can/correlation.py +212 -0
  104. oscura/automotive/can/discovery.py +355 -0
  105. oscura/automotive/can/message_wrapper.py +375 -0
  106. oscura/automotive/can/models.py +385 -0
  107. oscura/automotive/can/patterns.py +381 -0
  108. oscura/automotive/can/session.py +452 -0
  109. oscura/automotive/can/state_machine.py +300 -0
  110. oscura/automotive/can/stimulus_response.py +461 -0
  111. oscura/automotive/dbc/__init__.py +15 -0
  112. oscura/automotive/dbc/generator.py +156 -0
  113. oscura/automotive/dbc/parser.py +146 -0
  114. oscura/automotive/dtc/__init__.py +30 -0
  115. oscura/automotive/dtc/database.py +3036 -0
  116. oscura/automotive/j1939/__init__.py +14 -0
  117. oscura/automotive/j1939/decoder.py +745 -0
  118. oscura/automotive/loaders/__init__.py +35 -0
  119. oscura/automotive/loaders/asc.py +98 -0
  120. oscura/automotive/loaders/blf.py +77 -0
  121. oscura/automotive/loaders/csv_can.py +136 -0
  122. oscura/automotive/loaders/dispatcher.py +136 -0
  123. oscura/automotive/loaders/mdf.py +331 -0
  124. oscura/automotive/loaders/pcap.py +132 -0
  125. oscura/automotive/obd/__init__.py +14 -0
  126. oscura/automotive/obd/decoder.py +707 -0
  127. oscura/automotive/uds/__init__.py +48 -0
  128. oscura/automotive/uds/decoder.py +265 -0
  129. oscura/automotive/uds/models.py +64 -0
  130. oscura/automotive/visualization.py +369 -0
  131. oscura/batch/__init__.py +55 -0
  132. oscura/batch/advanced.py +627 -0
  133. oscura/batch/aggregate.py +300 -0
  134. oscura/batch/analyze.py +139 -0
  135. oscura/batch/logging.py +487 -0
  136. oscura/batch/metrics.py +556 -0
  137. oscura/builders/__init__.py +41 -0
  138. oscura/builders/signal_builder.py +1131 -0
  139. oscura/cli/__init__.py +14 -0
  140. oscura/cli/batch.py +339 -0
  141. oscura/cli/characterize.py +273 -0
  142. oscura/cli/compare.py +775 -0
  143. oscura/cli/decode.py +551 -0
  144. oscura/cli/main.py +247 -0
  145. oscura/cli/shell.py +350 -0
  146. oscura/comparison/__init__.py +66 -0
  147. oscura/comparison/compare.py +397 -0
  148. oscura/comparison/golden.py +487 -0
  149. oscura/comparison/limits.py +391 -0
  150. oscura/comparison/mask.py +434 -0
  151. oscura/comparison/trace_diff.py +30 -0
  152. oscura/comparison/visualization.py +481 -0
  153. oscura/compliance/__init__.py +70 -0
  154. oscura/compliance/advanced.py +756 -0
  155. oscura/compliance/masks.py +363 -0
  156. oscura/compliance/reporting.py +483 -0
  157. oscura/compliance/testing.py +298 -0
  158. oscura/component/__init__.py +38 -0
  159. oscura/component/impedance.py +365 -0
  160. oscura/component/reactive.py +598 -0
  161. oscura/component/transmission_line.py +312 -0
  162. oscura/config/__init__.py +191 -0
  163. oscura/config/defaults.py +254 -0
  164. oscura/config/loader.py +348 -0
  165. oscura/config/memory.py +271 -0
  166. oscura/config/migration.py +458 -0
  167. oscura/config/pipeline.py +1077 -0
  168. oscura/config/preferences.py +530 -0
  169. oscura/config/protocol.py +875 -0
  170. oscura/config/schema.py +713 -0
  171. oscura/config/settings.py +420 -0
  172. oscura/config/thresholds.py +599 -0
  173. oscura/convenience.py +457 -0
  174. oscura/core/__init__.py +299 -0
  175. oscura/core/audit.py +457 -0
  176. oscura/core/backend_selector.py +405 -0
  177. oscura/core/cache.py +590 -0
  178. oscura/core/cancellation.py +439 -0
  179. oscura/core/confidence.py +225 -0
  180. oscura/core/config.py +506 -0
  181. oscura/core/correlation.py +216 -0
  182. oscura/core/cross_domain.py +422 -0
  183. oscura/core/debug.py +301 -0
  184. oscura/core/edge_cases.py +541 -0
  185. oscura/core/exceptions.py +535 -0
  186. oscura/core/gpu_backend.py +523 -0
  187. oscura/core/lazy.py +832 -0
  188. oscura/core/log_query.py +540 -0
  189. oscura/core/logging.py +931 -0
  190. oscura/core/logging_advanced.py +952 -0
  191. oscura/core/memoize.py +171 -0
  192. oscura/core/memory_check.py +274 -0
  193. oscura/core/memory_guard.py +290 -0
  194. oscura/core/memory_limits.py +336 -0
  195. oscura/core/memory_monitor.py +453 -0
  196. oscura/core/memory_progress.py +465 -0
  197. oscura/core/memory_warnings.py +315 -0
  198. oscura/core/numba_backend.py +362 -0
  199. oscura/core/performance.py +352 -0
  200. oscura/core/progress.py +524 -0
  201. oscura/core/provenance.py +358 -0
  202. oscura/core/results.py +331 -0
  203. oscura/core/types.py +504 -0
  204. oscura/core/uncertainty.py +383 -0
  205. oscura/discovery/__init__.py +52 -0
  206. oscura/discovery/anomaly_detector.py +672 -0
  207. oscura/discovery/auto_decoder.py +415 -0
  208. oscura/discovery/comparison.py +497 -0
  209. oscura/discovery/quality_validator.py +528 -0
  210. oscura/discovery/signal_detector.py +769 -0
  211. oscura/dsl/__init__.py +73 -0
  212. oscura/dsl/commands.py +246 -0
  213. oscura/dsl/interpreter.py +455 -0
  214. oscura/dsl/parser.py +689 -0
  215. oscura/dsl/repl.py +172 -0
  216. oscura/exceptions.py +59 -0
  217. oscura/exploratory/__init__.py +111 -0
  218. oscura/exploratory/error_recovery.py +642 -0
  219. oscura/exploratory/fuzzy.py +513 -0
  220. oscura/exploratory/fuzzy_advanced.py +786 -0
  221. oscura/exploratory/legacy.py +831 -0
  222. oscura/exploratory/parse.py +358 -0
  223. oscura/exploratory/recovery.py +275 -0
  224. oscura/exploratory/sync.py +382 -0
  225. oscura/exploratory/unknown.py +707 -0
  226. oscura/export/__init__.py +25 -0
  227. oscura/export/wireshark/README.md +265 -0
  228. oscura/export/wireshark/__init__.py +47 -0
  229. oscura/export/wireshark/generator.py +312 -0
  230. oscura/export/wireshark/lua_builder.py +159 -0
  231. oscura/export/wireshark/templates/dissector.lua.j2 +92 -0
  232. oscura/export/wireshark/type_mapping.py +165 -0
  233. oscura/export/wireshark/validator.py +105 -0
  234. oscura/exporters/__init__.py +94 -0
  235. oscura/exporters/csv.py +303 -0
  236. oscura/exporters/exporters.py +44 -0
  237. oscura/exporters/hdf5.py +219 -0
  238. oscura/exporters/html_export.py +701 -0
  239. oscura/exporters/json_export.py +291 -0
  240. oscura/exporters/markdown_export.py +367 -0
  241. oscura/exporters/matlab_export.py +354 -0
  242. oscura/exporters/npz_export.py +219 -0
  243. oscura/exporters/spice_export.py +210 -0
  244. oscura/extensibility/__init__.py +131 -0
  245. oscura/extensibility/docs.py +752 -0
  246. oscura/extensibility/extensions.py +1125 -0
  247. oscura/extensibility/logging.py +259 -0
  248. oscura/extensibility/measurements.py +485 -0
  249. oscura/extensibility/plugins.py +414 -0
  250. oscura/extensibility/registry.py +346 -0
  251. oscura/extensibility/templates.py +913 -0
  252. oscura/extensibility/validation.py +651 -0
  253. oscura/filtering/__init__.py +89 -0
  254. oscura/filtering/base.py +563 -0
  255. oscura/filtering/convenience.py +564 -0
  256. oscura/filtering/design.py +725 -0
  257. oscura/filtering/filters.py +32 -0
  258. oscura/filtering/introspection.py +605 -0
  259. oscura/guidance/__init__.py +24 -0
  260. oscura/guidance/recommender.py +429 -0
  261. oscura/guidance/wizard.py +518 -0
  262. oscura/inference/__init__.py +251 -0
  263. oscura/inference/active_learning/README.md +153 -0
  264. oscura/inference/active_learning/__init__.py +38 -0
  265. oscura/inference/active_learning/lstar.py +257 -0
  266. oscura/inference/active_learning/observation_table.py +230 -0
  267. oscura/inference/active_learning/oracle.py +78 -0
  268. oscura/inference/active_learning/teachers/__init__.py +15 -0
  269. oscura/inference/active_learning/teachers/simulator.py +192 -0
  270. oscura/inference/adaptive_tuning.py +453 -0
  271. oscura/inference/alignment.py +653 -0
  272. oscura/inference/bayesian.py +943 -0
  273. oscura/inference/binary.py +1016 -0
  274. oscura/inference/crc_reverse.py +711 -0
  275. oscura/inference/logic.py +288 -0
  276. oscura/inference/message_format.py +1305 -0
  277. oscura/inference/protocol.py +417 -0
  278. oscura/inference/protocol_dsl.py +1084 -0
  279. oscura/inference/protocol_library.py +1230 -0
  280. oscura/inference/sequences.py +809 -0
  281. oscura/inference/signal_intelligence.py +1509 -0
  282. oscura/inference/spectral.py +215 -0
  283. oscura/inference/state_machine.py +634 -0
  284. oscura/inference/stream.py +918 -0
  285. oscura/integrations/__init__.py +59 -0
  286. oscura/integrations/llm.py +1827 -0
  287. oscura/jupyter/__init__.py +32 -0
  288. oscura/jupyter/display.py +268 -0
  289. oscura/jupyter/magic.py +334 -0
  290. oscura/loaders/__init__.py +526 -0
  291. oscura/loaders/binary.py +69 -0
  292. oscura/loaders/configurable.py +1255 -0
  293. oscura/loaders/csv.py +26 -0
  294. oscura/loaders/csv_loader.py +473 -0
  295. oscura/loaders/hdf5.py +9 -0
  296. oscura/loaders/hdf5_loader.py +510 -0
  297. oscura/loaders/lazy.py +370 -0
  298. oscura/loaders/mmap_loader.py +583 -0
  299. oscura/loaders/numpy_loader.py +436 -0
  300. oscura/loaders/pcap.py +432 -0
  301. oscura/loaders/preprocessing.py +368 -0
  302. oscura/loaders/rigol.py +287 -0
  303. oscura/loaders/sigrok.py +321 -0
  304. oscura/loaders/tdms.py +367 -0
  305. oscura/loaders/tektronix.py +711 -0
  306. oscura/loaders/validation.py +584 -0
  307. oscura/loaders/vcd.py +464 -0
  308. oscura/loaders/wav.py +233 -0
  309. oscura/math/__init__.py +45 -0
  310. oscura/math/arithmetic.py +824 -0
  311. oscura/math/interpolation.py +413 -0
  312. oscura/onboarding/__init__.py +39 -0
  313. oscura/onboarding/help.py +498 -0
  314. oscura/onboarding/tutorials.py +405 -0
  315. oscura/onboarding/wizard.py +466 -0
  316. oscura/optimization/__init__.py +19 -0
  317. oscura/optimization/parallel.py +440 -0
  318. oscura/optimization/search.py +532 -0
  319. oscura/pipeline/__init__.py +43 -0
  320. oscura/pipeline/base.py +338 -0
  321. oscura/pipeline/composition.py +242 -0
  322. oscura/pipeline/parallel.py +448 -0
  323. oscura/pipeline/pipeline.py +375 -0
  324. oscura/pipeline/reverse_engineering.py +1119 -0
  325. oscura/plugins/__init__.py +122 -0
  326. oscura/plugins/base.py +272 -0
  327. oscura/plugins/cli.py +497 -0
  328. oscura/plugins/discovery.py +411 -0
  329. oscura/plugins/isolation.py +418 -0
  330. oscura/plugins/lifecycle.py +959 -0
  331. oscura/plugins/manager.py +493 -0
  332. oscura/plugins/registry.py +421 -0
  333. oscura/plugins/versioning.py +372 -0
  334. oscura/py.typed +0 -0
  335. oscura/quality/__init__.py +65 -0
  336. oscura/quality/ensemble.py +740 -0
  337. oscura/quality/explainer.py +338 -0
  338. oscura/quality/scoring.py +616 -0
  339. oscura/quality/warnings.py +456 -0
  340. oscura/reporting/__init__.py +248 -0
  341. oscura/reporting/advanced.py +1234 -0
  342. oscura/reporting/analyze.py +448 -0
  343. oscura/reporting/argument_preparer.py +596 -0
  344. oscura/reporting/auto_report.py +507 -0
  345. oscura/reporting/batch.py +615 -0
  346. oscura/reporting/chart_selection.py +223 -0
  347. oscura/reporting/comparison.py +330 -0
  348. oscura/reporting/config.py +615 -0
  349. oscura/reporting/content/__init__.py +39 -0
  350. oscura/reporting/content/executive.py +127 -0
  351. oscura/reporting/content/filtering.py +191 -0
  352. oscura/reporting/content/minimal.py +257 -0
  353. oscura/reporting/content/verbosity.py +162 -0
  354. oscura/reporting/core.py +508 -0
  355. oscura/reporting/core_formats/__init__.py +17 -0
  356. oscura/reporting/core_formats/multi_format.py +210 -0
  357. oscura/reporting/engine.py +836 -0
  358. oscura/reporting/export.py +366 -0
  359. oscura/reporting/formatting/__init__.py +129 -0
  360. oscura/reporting/formatting/emphasis.py +81 -0
  361. oscura/reporting/formatting/numbers.py +403 -0
  362. oscura/reporting/formatting/standards.py +55 -0
  363. oscura/reporting/formatting.py +466 -0
  364. oscura/reporting/html.py +578 -0
  365. oscura/reporting/index.py +590 -0
  366. oscura/reporting/multichannel.py +296 -0
  367. oscura/reporting/output.py +379 -0
  368. oscura/reporting/pdf.py +373 -0
  369. oscura/reporting/plots.py +731 -0
  370. oscura/reporting/pptx_export.py +360 -0
  371. oscura/reporting/renderers/__init__.py +11 -0
  372. oscura/reporting/renderers/pdf.py +94 -0
  373. oscura/reporting/sections.py +471 -0
  374. oscura/reporting/standards.py +680 -0
  375. oscura/reporting/summary_generator.py +368 -0
  376. oscura/reporting/tables.py +397 -0
  377. oscura/reporting/template_system.py +724 -0
  378. oscura/reporting/templates/__init__.py +15 -0
  379. oscura/reporting/templates/definition.py +205 -0
  380. oscura/reporting/templates/index.html +649 -0
  381. oscura/reporting/templates/index.md +173 -0
  382. oscura/schemas/__init__.py +158 -0
  383. oscura/schemas/bus_configuration.json +322 -0
  384. oscura/schemas/device_mapping.json +182 -0
  385. oscura/schemas/packet_format.json +418 -0
  386. oscura/schemas/protocol_definition.json +363 -0
  387. oscura/search/__init__.py +16 -0
  388. oscura/search/anomaly.py +292 -0
  389. oscura/search/context.py +149 -0
  390. oscura/search/pattern.py +160 -0
  391. oscura/session/__init__.py +34 -0
  392. oscura/session/annotations.py +289 -0
  393. oscura/session/history.py +313 -0
  394. oscura/session/session.py +445 -0
  395. oscura/streaming/__init__.py +43 -0
  396. oscura/streaming/chunked.py +611 -0
  397. oscura/streaming/progressive.py +393 -0
  398. oscura/streaming/realtime.py +622 -0
  399. oscura/testing/__init__.py +54 -0
  400. oscura/testing/synthetic.py +808 -0
  401. oscura/triggering/__init__.py +68 -0
  402. oscura/triggering/base.py +229 -0
  403. oscura/triggering/edge.py +353 -0
  404. oscura/triggering/pattern.py +344 -0
  405. oscura/triggering/pulse.py +581 -0
  406. oscura/triggering/window.py +453 -0
  407. oscura/ui/__init__.py +48 -0
  408. oscura/ui/formatters.py +526 -0
  409. oscura/ui/progressive_display.py +340 -0
  410. oscura/utils/__init__.py +99 -0
  411. oscura/utils/autodetect.py +338 -0
  412. oscura/utils/buffer.py +389 -0
  413. oscura/utils/lazy.py +407 -0
  414. oscura/utils/lazy_imports.py +147 -0
  415. oscura/utils/memory.py +836 -0
  416. oscura/utils/memory_advanced.py +1326 -0
  417. oscura/utils/memory_extensions.py +465 -0
  418. oscura/utils/progressive.py +352 -0
  419. oscura/utils/windowing.py +362 -0
  420. oscura/visualization/__init__.py +321 -0
  421. oscura/visualization/accessibility.py +526 -0
  422. oscura/visualization/annotations.py +374 -0
  423. oscura/visualization/axis_scaling.py +305 -0
  424. oscura/visualization/colors.py +453 -0
  425. oscura/visualization/digital.py +337 -0
  426. oscura/visualization/eye.py +420 -0
  427. oscura/visualization/histogram.py +281 -0
  428. oscura/visualization/interactive.py +858 -0
  429. oscura/visualization/jitter.py +702 -0
  430. oscura/visualization/keyboard.py +394 -0
  431. oscura/visualization/layout.py +365 -0
  432. oscura/visualization/optimization.py +1028 -0
  433. oscura/visualization/palettes.py +446 -0
  434. oscura/visualization/plot.py +92 -0
  435. oscura/visualization/power.py +290 -0
  436. oscura/visualization/power_extended.py +626 -0
  437. oscura/visualization/presets.py +467 -0
  438. oscura/visualization/protocols.py +932 -0
  439. oscura/visualization/render.py +207 -0
  440. oscura/visualization/rendering.py +444 -0
  441. oscura/visualization/reverse_engineering.py +791 -0
  442. oscura/visualization/signal_integrity.py +808 -0
  443. oscura/visualization/specialized.py +553 -0
  444. oscura/visualization/spectral.py +811 -0
  445. oscura/visualization/styles.py +381 -0
  446. oscura/visualization/thumbnails.py +311 -0
  447. oscura/visualization/time_axis.py +351 -0
  448. oscura/visualization/waveform.py +367 -0
  449. oscura/workflow/__init__.py +13 -0
  450. oscura/workflow/dag.py +377 -0
  451. oscura/workflows/__init__.py +58 -0
  452. oscura/workflows/compliance.py +280 -0
  453. oscura/workflows/digital.py +272 -0
  454. oscura/workflows/multi_trace.py +502 -0
  455. oscura/workflows/power.py +178 -0
  456. oscura/workflows/protocol.py +492 -0
  457. oscura/workflows/reverse_engineering.py +639 -0
  458. oscura/workflows/signal_integrity.py +227 -0
  459. oscura-0.1.0.dist-info/METADATA +300 -0
  460. oscura-0.1.0.dist-info/RECORD +463 -0
  461. oscura-0.1.0.dist-info/entry_points.txt +2 -0
  462. {oscura-0.0.1.dist-info → oscura-0.1.0.dist-info}/licenses/LICENSE +1 -1
  463. oscura-0.0.1.dist-info/METADATA +0 -63
  464. oscura-0.0.1.dist-info/RECORD +0 -5
  465. {oscura-0.0.1.dist-info → oscura-0.1.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,1036 @@
1
+ """Pattern clustering by similarity.
2
+
3
+ This module implements algorithms for clustering similar patterns/messages
4
+ using various distance metrics and clustering approaches.
5
+
6
+
7
+ Author: TraceKit Development Team
8
+ """
9
+
10
+ from dataclasses import dataclass
11
+ from typing import Literal
12
+
13
+ import numpy as np
14
+
15
+
16
+ def cluster_messages(
17
+ data: np.ndarray[tuple[int, int], np.dtype[np.float64]],
18
+ n_clusters: int = 3,
19
+ method: str = "kmeans",
20
+ random_state: int | None = None,
21
+ ) -> np.ndarray[tuple[int], np.dtype[np.int_]]:
22
+ """Cluster data points using K-means algorithm.
23
+
24
+ Groups data points into n_clusters clusters using K-means clustering.
25
+ Supports deterministic clustering with random_state.
26
+
27
+ Args:
28
+ data: Data points as (n_points, dimensions) array
29
+ n_clusters: Number of clusters to create
30
+ method: Clustering method (default: 'kmeans')
31
+ random_state: Random seed for deterministic results
32
+
33
+ Returns:
34
+ Array of cluster labels (one per data point), in range [0, n_clusters)
35
+
36
+ Raises:
37
+ ValueError: If n_clusters is invalid or data shape is incorrect
38
+
39
+ Examples:
40
+ >>> data = np.random.randn(20, 2)
41
+ >>> labels = cluster_messages(data, n_clusters=3, random_state=42)
42
+ >>> assert len(labels) == 20
43
+ >>> assert np.all((labels >= 0) & (labels < 3))
44
+ """
45
+ if data.ndim != 2:
46
+ raise ValueError(f"Expected 2D data array, got shape {data.shape}")
47
+
48
+ if n_clusters < 1:
49
+ raise ValueError(f"n_clusters must be >= 1, got {n_clusters}")
50
+
51
+ n_points = data.shape[0]
52
+ if n_clusters > n_points:
53
+ raise ValueError(f"n_clusters ({n_clusters}) cannot exceed n_points ({n_points})")
54
+
55
+ # Use K-means clustering
56
+ return _kmeans_clustering(data, n_clusters=n_clusters, random_state=random_state)
57
+
58
+
59
+ def _kmeans_clustering(
60
+ data: np.ndarray[tuple[int, int], np.dtype[np.float64]],
61
+ n_clusters: int,
62
+ random_state: int | None = None,
63
+ max_iterations: int = 100,
64
+ tolerance: float = 1e-4,
65
+ ) -> np.ndarray[tuple[int], np.dtype[np.int_]]:
66
+ """K-means clustering implementation.
67
+
68
+ Args:
69
+ data: Input data points (n_points, dimensions)
70
+ n_clusters: Number of clusters
71
+ random_state: Random seed
72
+ max_iterations: Maximum iterations
73
+ tolerance: Convergence tolerance
74
+
75
+ Returns:
76
+ Cluster labels for each point
77
+ """
78
+ if random_state is not None:
79
+ np.random.seed(random_state)
80
+
81
+ n_points = data.shape[0]
82
+
83
+ # Initialize centroids randomly from data points
84
+ initial_indices = np.random.choice(n_points, size=n_clusters, replace=False)
85
+ centroids = data[initial_indices].copy()
86
+
87
+ labels = np.zeros(n_points, dtype=int)
88
+
89
+ for _iteration in range(max_iterations):
90
+ # Assign points to nearest centroid
91
+ distances = np.zeros((n_points, n_clusters))
92
+ for k in range(n_clusters):
93
+ distances[:, k] = np.linalg.norm(data - centroids[k], axis=1)
94
+
95
+ new_labels = np.argmin(distances, axis=1)
96
+
97
+ # Check for convergence
98
+ if np.array_equal(new_labels, labels):
99
+ break
100
+
101
+ labels = new_labels
102
+
103
+ # Update centroids
104
+ for k in range(n_clusters):
105
+ cluster_points = data[labels == k]
106
+ if len(cluster_points) > 0:
107
+ centroids[k] = np.mean(cluster_points, axis=0)
108
+
109
+ return labels
110
+
111
+
112
+ @dataclass
113
+ class ClusterResult:
114
+ """Result of pattern clustering.
115
+
116
+ Attributes:
117
+ cluster_id: Unique cluster identifier
118
+ patterns: List of patterns in this cluster
119
+ centroid: Representative pattern (centroid)
120
+ size: Number of patterns in cluster
121
+ variance: Within-cluster variance
122
+ common_bytes: Byte positions that are constant across all patterns
123
+ variable_bytes: Byte positions that vary across patterns
124
+ """
125
+
126
+ cluster_id: int
127
+ patterns: list[bytes | np.ndarray[tuple[int], np.dtype[np.uint8]]]
128
+ centroid: bytes | np.ndarray[tuple[int], np.dtype[np.uint8]]
129
+ size: int
130
+ variance: float
131
+ common_bytes: list[int]
132
+ variable_bytes: list[int]
133
+
134
+ def __post_init__(self) -> None:
135
+ """Validate cluster result."""
136
+ if self.cluster_id < 0:
137
+ raise ValueError("cluster_id must be non-negative")
138
+ if self.size < 0:
139
+ raise ValueError("size must be non-negative")
140
+ if len(self.patterns) != self.size:
141
+ raise ValueError("patterns length must match size")
142
+
143
+
144
+ @dataclass
145
+ class ClusteringResult:
146
+ """Complete clustering result.
147
+
148
+ Attributes:
149
+ clusters: List of ClusterResult objects
150
+ labels: Cluster assignment for each input pattern
151
+ num_clusters: Total number of clusters
152
+ silhouette_score: Clustering quality metric (-1 to 1, higher = better)
153
+ """
154
+
155
+ clusters: list[ClusterResult]
156
+ labels: np.ndarray[tuple[int], np.dtype[np.int_]]
157
+ num_clusters: int
158
+ silhouette_score: float
159
+
160
+ def __post_init__(self) -> None:
161
+ """Validate clustering result."""
162
+ if self.num_clusters != len(self.clusters):
163
+ raise ValueError("num_clusters must match clusters length")
164
+
165
+
166
+ def cluster_by_hamming(
167
+ patterns: list[bytes | np.ndarray[tuple[int], np.dtype[np.uint8]]],
168
+ threshold: float = 0.2,
169
+ min_cluster_size: int = 2,
170
+ ) -> ClusteringResult:
171
+ """Cluster fixed-length patterns by Hamming distance.
172
+
173
+ : Hamming distance clustering
174
+
175
+ Groups patterns that differ by at most threshold * pattern_length bits.
176
+ Efficient for fixed-length binary patterns.
177
+
178
+ Args:
179
+ patterns: List of patterns (all must have same length)
180
+ threshold: Maximum normalized Hamming distance within cluster (0-1)
181
+ min_cluster_size: Minimum patterns per cluster
182
+
183
+ Returns:
184
+ ClusteringResult with cluster assignments
185
+
186
+ Raises:
187
+ ValueError: If patterns have different lengths or invalid parameters
188
+
189
+ Examples:
190
+ >>> patterns = [b"ABCD", b"ABCE", b"ABCF", b"XYZA"]
191
+ >>> result = cluster_by_hamming(patterns, threshold=0.3)
192
+ >>> assert result.num_clusters >= 1
193
+ """
194
+ if not patterns:
195
+ return ClusteringResult(
196
+ clusters=[], labels=np.array([]), num_clusters=0, silhouette_score=0.0
197
+ )
198
+
199
+ # Validate all patterns have same length
200
+ pattern_length = len(patterns[0])
201
+ for i, p in enumerate(patterns):
202
+ if len(p) != pattern_length:
203
+ raise ValueError(f"Pattern {i} has length {len(p)}, expected {pattern_length}")
204
+
205
+ # Convert to numpy arrays for efficient computation
206
+ pattern_arrays = [_to_array(p) for p in patterns]
207
+ n = len(pattern_arrays)
208
+
209
+ # Compute distance matrix
210
+ dist_matrix = compute_distance_matrix(patterns, metric="hamming")
211
+
212
+ # Perform clustering using simple threshold-based approach
213
+ labels = np.full(n, -1, dtype=int)
214
+ cluster_id = 0
215
+
216
+ for i in range(n):
217
+ if labels[i] != -1:
218
+ continue # Already assigned
219
+
220
+ # Start new cluster
221
+ cluster_members = [i]
222
+ labels[i] = cluster_id
223
+
224
+ # Find all patterns within threshold
225
+ for j in range(i + 1, n):
226
+ if labels[j] != -1:
227
+ continue
228
+
229
+ # Check if j is close to all members of current cluster
230
+ max_dist = max(dist_matrix[j, m] for m in cluster_members)
231
+ if max_dist <= threshold:
232
+ cluster_members.append(j)
233
+ labels[j] = cluster_id
234
+
235
+ # Only keep cluster if large enough
236
+ if len(cluster_members) < min_cluster_size:
237
+ for m in cluster_members:
238
+ labels[m] = -1
239
+ else:
240
+ cluster_id += 1
241
+
242
+ # Assign singleton patterns to noise cluster (-1)
243
+ num_clusters = cluster_id
244
+
245
+ # Build cluster results
246
+ clusters = []
247
+ for cid in range(num_clusters):
248
+ cluster_indices = np.where(labels == cid)[0]
249
+ cluster_patterns = [patterns[i] for i in cluster_indices]
250
+
251
+ # Compute centroid (majority vote per byte)
252
+ centroid = _compute_centroid_hamming([pattern_arrays[i] for i in cluster_indices])
253
+
254
+ # Analyze common vs variable bytes
255
+ common, variable = _analyze_pattern_variance([pattern_arrays[i] for i in cluster_indices])
256
+
257
+ # Compute within-cluster variance
258
+ variance = (
259
+ np.mean([dist_matrix[i, j] for i in cluster_indices for j in cluster_indices if i < j])
260
+ if len(cluster_indices) > 1
261
+ else 0.0
262
+ )
263
+
264
+ clusters.append(
265
+ ClusterResult(
266
+ cluster_id=cid,
267
+ patterns=cluster_patterns,
268
+ centroid=bytes(centroid) if isinstance(patterns[0], bytes) else centroid,
269
+ size=len(cluster_patterns),
270
+ variance=float(variance),
271
+ common_bytes=common,
272
+ variable_bytes=variable,
273
+ )
274
+ )
275
+
276
+ # Compute silhouette score
277
+ silhouette = _compute_silhouette_score(dist_matrix, labels) if num_clusters > 1 else 0.0
278
+
279
+ return ClusteringResult(
280
+ clusters=clusters, labels=labels, num_clusters=num_clusters, silhouette_score=silhouette
281
+ )
282
+
283
+
284
+ def cluster_by_edit_distance(
285
+ patterns: list[bytes | np.ndarray[tuple[int], np.dtype[np.uint8]]],
286
+ threshold: float = 0.3,
287
+ min_cluster_size: int = 2,
288
+ ) -> ClusteringResult:
289
+ """Cluster variable-length patterns by edit distance.
290
+
291
+ : Edit distance (Levenshtein) clustering
292
+
293
+ Groups patterns with normalized edit distance <= threshold.
294
+ Works with variable-length patterns.
295
+
296
+ Args:
297
+ patterns: List of patterns (can have different lengths)
298
+ threshold: Maximum normalized edit distance (0-1)
299
+ min_cluster_size: Minimum patterns per cluster
300
+
301
+ Returns:
302
+ ClusteringResult with cluster assignments
303
+
304
+ Examples:
305
+ >>> patterns = [b"ABCD", b"ABCDE", b"ABCDF", b"XYZ"]
306
+ >>> result = cluster_by_edit_distance(patterns, threshold=0.4)
307
+ """
308
+ if not patterns:
309
+ return ClusteringResult(
310
+ clusters=[], labels=np.array([]), num_clusters=0, silhouette_score=0.0
311
+ )
312
+
313
+ n = len(patterns)
314
+
315
+ # Compute distance matrix
316
+ dist_matrix = compute_distance_matrix(patterns, metric="levenshtein")
317
+
318
+ # Threshold-based clustering
319
+ labels = np.full(n, -1, dtype=int)
320
+ cluster_id = 0
321
+
322
+ for i in range(n):
323
+ if labels[i] != -1:
324
+ continue
325
+
326
+ # Start new cluster
327
+ cluster_members = [i]
328
+ labels[i] = cluster_id
329
+
330
+ # Find similar patterns
331
+ for j in range(i + 1, n):
332
+ if labels[j] != -1:
333
+ continue
334
+
335
+ # Check distance to cluster members
336
+ max_dist = max(dist_matrix[j, m] for m in cluster_members)
337
+ if max_dist <= threshold:
338
+ cluster_members.append(j)
339
+ labels[j] = cluster_id
340
+
341
+ # Keep cluster if large enough
342
+ if len(cluster_members) < min_cluster_size:
343
+ for m in cluster_members:
344
+ labels[m] = -1
345
+ else:
346
+ cluster_id += 1
347
+
348
+ num_clusters = cluster_id
349
+
350
+ # Build cluster results
351
+ clusters = []
352
+ for cid in range(num_clusters):
353
+ cluster_indices = np.where(labels == cid)[0]
354
+ cluster_patterns = [patterns[i] for i in cluster_indices]
355
+
356
+ # Use most common pattern as centroid
357
+ centroid = _compute_centroid_edit(cluster_patterns)
358
+
359
+ # For variable-length patterns, analysis is limited
360
+ # Pad to common length for analysis
361
+ max_len = max(len(p) for p in cluster_patterns)
362
+ padded = [_to_array(p, target_length=max_len) for p in cluster_patterns]
363
+ common, variable = _analyze_pattern_variance(padded)
364
+
365
+ # Compute variance
366
+ variance = (
367
+ np.mean([dist_matrix[i, j] for i in cluster_indices for j in cluster_indices if i < j])
368
+ if len(cluster_indices) > 1
369
+ else 0.0
370
+ )
371
+
372
+ clusters.append(
373
+ ClusterResult(
374
+ cluster_id=cid,
375
+ patterns=cluster_patterns,
376
+ centroid=centroid,
377
+ size=len(cluster_patterns),
378
+ variance=float(variance),
379
+ common_bytes=common,
380
+ variable_bytes=variable,
381
+ )
382
+ )
383
+
384
+ # Compute silhouette score
385
+ silhouette = _compute_silhouette_score(dist_matrix, labels) if num_clusters > 1 else 0.0
386
+
387
+ return ClusteringResult(
388
+ clusters=clusters, labels=labels, num_clusters=num_clusters, silhouette_score=silhouette
389
+ )
390
+
391
+
392
+ def cluster_hierarchical(
393
+ patterns: list[bytes | np.ndarray[tuple[int], np.dtype[np.uint8]]],
394
+ method: Literal["single", "complete", "average", "upgma"] = "upgma",
395
+ num_clusters: int | None = None,
396
+ distance_threshold: float | None = None,
397
+ ) -> ClusteringResult:
398
+ """Hierarchical clustering of patterns.
399
+
400
+ : Hierarchical clustering (UPGMA, etc.)
401
+
402
+ Uses agglomerative hierarchical clustering with various linkage methods.
403
+
404
+ Args:
405
+ patterns: List of patterns
406
+ method: Linkage method ('single', 'complete', 'average', 'upgma')
407
+ num_clusters: Desired number of clusters (if None, use distance_threshold)
408
+ distance_threshold: Distance threshold for cutting dendrogram
409
+
410
+ Returns:
411
+ ClusteringResult with cluster assignments
412
+
413
+ Raises:
414
+ ValueError: If neither num_clusters nor distance_threshold is specified
415
+
416
+ Examples:
417
+ >>> patterns = [b"AAA", b"AAB", b"BBB", b"BBC"]
418
+ >>> result = cluster_hierarchical(patterns, method='average', num_clusters=2)
419
+ """
420
+ if num_clusters is None and distance_threshold is None:
421
+ raise ValueError("Must specify either num_clusters or distance_threshold")
422
+
423
+ if not patterns:
424
+ return ClusteringResult(
425
+ clusters=[], labels=np.array([]), num_clusters=0, silhouette_score=0.0
426
+ )
427
+
428
+ # Normalize method name
429
+ if method == "upgma":
430
+ method = "average"
431
+
432
+ _n = len(patterns)
433
+
434
+ # Compute distance matrix
435
+ dist_matrix = compute_distance_matrix(patterns, metric="hamming")
436
+
437
+ # Perform hierarchical clustering
438
+ labels = _hierarchical_clustering(
439
+ dist_matrix, method=method, num_clusters=num_clusters, distance_threshold=distance_threshold
440
+ )
441
+
442
+ # Count actual clusters
443
+ unique_labels = set(labels[labels >= 0])
444
+ num_clusters_actual = len(unique_labels)
445
+
446
+ # Build cluster results
447
+ clusters = []
448
+ for cid in sorted(unique_labels):
449
+ cluster_indices = np.where(labels == cid)[0]
450
+ cluster_patterns = [patterns[i] for i in cluster_indices]
451
+
452
+ # Compute centroid
453
+ pattern_arrays = [_to_array(p) for p in cluster_patterns]
454
+ if len({len(p) for p in pattern_arrays}) == 1:
455
+ # Fixed length - use majority vote
456
+ centroid_array = _compute_centroid_hamming(pattern_arrays)
457
+ centroid = bytes(centroid_array) if isinstance(patterns[0], bytes) else centroid_array
458
+ else:
459
+ # Variable length - use most common
460
+ centroid = _compute_centroid_edit(cluster_patterns)
461
+
462
+ # Analyze variance
463
+ max_len = max(len(p) for p in pattern_arrays)
464
+ padded = [_to_array(p, target_length=max_len) for p in pattern_arrays]
465
+ common, variable = _analyze_pattern_variance(padded)
466
+
467
+ # Variance
468
+ variance = (
469
+ np.mean([dist_matrix[i, j] for i in cluster_indices for j in cluster_indices if i < j])
470
+ if len(cluster_indices) > 1
471
+ else 0.0
472
+ )
473
+
474
+ clusters.append(
475
+ ClusterResult(
476
+ cluster_id=cid,
477
+ patterns=cluster_patterns,
478
+ centroid=centroid,
479
+ size=len(cluster_patterns),
480
+ variance=float(variance),
481
+ common_bytes=common,
482
+ variable_bytes=variable,
483
+ )
484
+ )
485
+
486
+ # Silhouette score
487
+ silhouette = _compute_silhouette_score(dist_matrix, labels) if num_clusters_actual > 1 else 0.0
488
+
489
+ return ClusteringResult(
490
+ clusters=clusters,
491
+ labels=labels,
492
+ num_clusters=num_clusters_actual,
493
+ silhouette_score=silhouette,
494
+ )
495
+
496
+
497
+ def analyze_cluster(cluster: ClusterResult) -> dict[str, list[int] | list[float] | bytes]:
498
+ """Analyze cluster to find common vs variable regions.
499
+
500
+ : Cluster analysis
501
+
502
+ Performs detailed analysis of a cluster to identify byte positions
503
+ that are constant vs. those that vary.
504
+
505
+ Args:
506
+ cluster: ClusterResult to analyze
507
+
508
+ Returns:
509
+ Dictionary with analysis results including:
510
+ - common_bytes: List of byte positions that are constant
511
+ - variable_bytes: List of byte positions that vary
512
+ - entropy_per_byte: Entropy at each byte position
513
+ - consensus: Consensus pattern with variable bytes marked
514
+
515
+ Examples:
516
+ >>> # Assume we have a cluster
517
+ >>> analysis = analyze_cluster(cluster)
518
+ >>> print(f"Common positions: {analysis['common_bytes']}")
519
+ """
520
+ if cluster.size == 0:
521
+ return {"common_bytes": [], "variable_bytes": [], "entropy_per_byte": [], "consensus": b""}
522
+
523
+ # Convert patterns to arrays
524
+ pattern_arrays = [_to_array(p) for p in cluster.patterns]
525
+
526
+ # Pad to same length
527
+ max_len = max(len(p) for p in pattern_arrays)
528
+ padded = [_to_array(p, target_length=max_len) for p in pattern_arrays]
529
+
530
+ # Compute entropy per byte position
531
+ entropy_per_byte = []
532
+ for pos in range(max_len):
533
+ byte_values = [p[pos] for p in padded]
534
+ entropy = _compute_byte_entropy(byte_values)
535
+ entropy_per_byte.append(entropy)
536
+
537
+ # Threshold for "common" (low entropy)
538
+ common_threshold = 0.1
539
+ common_bytes = [i for i, e in enumerate(entropy_per_byte) if e < common_threshold]
540
+ variable_bytes = [i for i, e in enumerate(entropy_per_byte) if e >= common_threshold]
541
+
542
+ # Build consensus pattern
543
+ consensus = np.zeros(max_len, dtype=np.uint8)
544
+ for pos in range(max_len):
545
+ byte_values = [p[pos] for p in padded]
546
+ # Use most common byte
547
+ consensus[pos] = max(set(byte_values), key=byte_values.count)
548
+
549
+ return {
550
+ "common_bytes": common_bytes,
551
+ "variable_bytes": variable_bytes,
552
+ "entropy_per_byte": entropy_per_byte,
553
+ "consensus": bytes(consensus),
554
+ }
555
+
556
+
557
+ def compute_distance_matrix(
558
+ patterns: list[bytes | np.ndarray[tuple[int], np.dtype[np.uint8]]],
559
+ metric: Literal["hamming", "levenshtein", "jaccard"] = "hamming",
560
+ ) -> np.ndarray[tuple[int, int], np.dtype[np.float64]]:
561
+ """Compute pairwise distance matrix.
562
+
563
+ : Distance matrix computation
564
+
565
+ Computes all pairwise distances between patterns using the specified metric.
566
+
567
+ Args:
568
+ patterns: List of patterns
569
+ metric: Distance metric ('hamming', 'levenshtein', 'jaccard')
570
+
571
+ Returns:
572
+ Symmetric distance matrix (n x n)
573
+
574
+ Raises:
575
+ ValueError: If unknown metric is specified
576
+
577
+ Examples:
578
+ >>> patterns = [b"ABC", b"ABD", b"XYZ"]
579
+ >>> dist = compute_distance_matrix(patterns, metric='hamming')
580
+ >>> assert dist.shape == (3, 3)
581
+ """
582
+ n = len(patterns)
583
+ dist_matrix = np.zeros((n, n), dtype=float)
584
+
585
+ for i in range(n):
586
+ for j in range(i + 1, n):
587
+ if metric == "hamming":
588
+ dist = _hamming_distance(patterns[i], patterns[j])
589
+ elif metric == "levenshtein":
590
+ dist = _edit_distance(patterns[i], patterns[j])
591
+ elif metric == "jaccard":
592
+ dist = _jaccard_distance(patterns[i], patterns[j])
593
+ else:
594
+ raise ValueError(f"Unknown metric: {metric}")
595
+
596
+ dist_matrix[i, j] = dist
597
+ dist_matrix[j, i] = dist
598
+
599
+ return dist_matrix
600
+
601
+
602
+ # Helper functions
603
+
604
+
605
+ def _to_array(
606
+ data: bytes | np.ndarray[tuple[int], np.dtype[np.uint8]] | memoryview | bytearray,
607
+ target_length: int | None = None,
608
+ ) -> np.ndarray[tuple[int], np.dtype[np.uint8]]:
609
+ """Convert to numpy array, optionally padding to target length.
610
+
611
+ Args:
612
+ data: Input data (bytes, bytearray, memoryview, or numpy array)
613
+ target_length: If specified, pad to this length
614
+
615
+ Returns:
616
+ Numpy array of uint8
617
+
618
+ Raises:
619
+ TypeError: If data type is not supported
620
+ """
621
+ if isinstance(data, bytes):
622
+ arr = np.frombuffer(data, dtype=np.uint8)
623
+ elif isinstance(data, bytearray | memoryview):
624
+ arr = np.frombuffer(bytes(data), dtype=np.uint8)
625
+ elif isinstance(data, np.ndarray):
626
+ arr = data.astype(np.uint8)
627
+ else:
628
+ raise TypeError(f"Unsupported type: {type(data)}")
629
+
630
+ if target_length is not None and len(arr) < target_length:
631
+ # Pad with zeros
632
+ padded = np.zeros(target_length, dtype=np.uint8)
633
+ padded[: len(arr)] = arr
634
+ return padded
635
+
636
+ return arr
637
+
638
+
639
+ def _hamming_distance(
640
+ a: bytes | np.ndarray[tuple[int], np.dtype[np.uint8]],
641
+ b: bytes | np.ndarray[tuple[int], np.dtype[np.uint8]],
642
+ ) -> float:
643
+ """Compute normalized Hamming distance."""
644
+ arr_a = _to_array(a)
645
+ arr_b = _to_array(b)
646
+
647
+ if len(arr_a) != len(arr_b):
648
+ # Pad shorter to match longer
649
+ max_len = max(len(arr_a), len(arr_b))
650
+ arr_a = _to_array(a, target_length=max_len)
651
+ arr_b = _to_array(b, target_length=max_len)
652
+
653
+ # Count differences
654
+ differences = np.sum(arr_a != arr_b)
655
+ return float(differences) / len(arr_a)
656
+
657
+
658
+ def _edit_distance(
659
+ a: bytes | np.ndarray[tuple[int], np.dtype[np.uint8]],
660
+ b: bytes | np.ndarray[tuple[int], np.dtype[np.uint8]],
661
+ ) -> float:
662
+ """Compute normalized Levenshtein edit distance."""
663
+ bytes_a = bytes(a) if isinstance(a, np.ndarray) else a
664
+ bytes_b = bytes(b) if isinstance(b, np.ndarray) else b
665
+
666
+ m, n = len(bytes_a), len(bytes_b)
667
+
668
+ if m == 0 and n == 0:
669
+ return 0.0
670
+ if m == 0:
671
+ return 1.0
672
+ if n == 0:
673
+ return 1.0
674
+
675
+ # DP table
676
+ prev_row = list(range(n + 1))
677
+ curr_row = [0] * (n + 1)
678
+
679
+ for i in range(1, m + 1):
680
+ curr_row[0] = i
681
+ for j in range(1, n + 1):
682
+ if bytes_a[i - 1] == bytes_b[j - 1]:
683
+ curr_row[j] = prev_row[j - 1]
684
+ else:
685
+ curr_row[j] = 1 + min(prev_row[j], curr_row[j - 1], prev_row[j - 1])
686
+ prev_row, curr_row = curr_row, prev_row
687
+
688
+ # Normalize by max length
689
+ return prev_row[n] / max(m, n)
690
+
691
+
692
+ def _jaccard_distance(
693
+ a: bytes | np.ndarray[tuple[int], np.dtype[np.uint8]],
694
+ b: bytes | np.ndarray[tuple[int], np.dtype[np.uint8]],
695
+ ) -> float:
696
+ """Compute Jaccard distance based on byte sets."""
697
+ set_a = set(_to_array(a))
698
+ set_b = set(_to_array(b))
699
+
700
+ if len(set_a) == 0 and len(set_b) == 0:
701
+ return 0.0
702
+
703
+ intersection = len(set_a & set_b)
704
+ union = len(set_a | set_b)
705
+
706
+ if union == 0:
707
+ return 0.0
708
+
709
+ # Jaccard distance = 1 - Jaccard similarity
710
+ return 1.0 - (intersection / union)
711
+
712
+
713
+ def _compute_centroid_hamming(
714
+ patterns: list[np.ndarray[tuple[int], np.dtype[np.uint8]]],
715
+ ) -> np.ndarray[tuple[int], np.dtype[np.uint8]]:
716
+ """Compute centroid using majority vote (for fixed-length patterns)."""
717
+ if not patterns:
718
+ return np.array([], dtype=np.uint8)
719
+
720
+ _n = len(patterns)
721
+ length = len(patterns[0])
722
+
723
+ centroid = np.zeros(length, dtype=np.uint8)
724
+ for pos in range(length):
725
+ bytes_at_pos = [p[pos] for p in patterns]
726
+ # Most common byte
727
+ centroid[pos] = max(set(bytes_at_pos), key=bytes_at_pos.count)
728
+
729
+ return centroid
730
+
731
+
732
+ def _compute_centroid_edit(
733
+ patterns: list[bytes | np.ndarray[tuple[int], np.dtype[np.uint8]]],
734
+ ) -> bytes | np.ndarray[tuple[int], np.dtype[np.uint8]]:
735
+ """Compute centroid for variable-length patterns (most central pattern)."""
736
+ if not patterns:
737
+ return b"" if isinstance(patterns[0], bytes) else np.array([])
738
+
739
+ # Use most common pattern as centroid
740
+ from collections import Counter
741
+
742
+ pattern_counts = Counter(bytes(p) if isinstance(p, np.ndarray) else p for p in patterns)
743
+ most_common = pattern_counts.most_common(1)[0][0]
744
+
745
+ # Return in original type
746
+ if isinstance(patterns[0], bytes):
747
+ return most_common
748
+ else:
749
+ return np.frombuffer(most_common, dtype=np.uint8)
750
+
751
+
752
+ def _analyze_pattern_variance(
753
+ patterns: list[np.ndarray[tuple[int], np.dtype[np.uint8]]],
754
+ ) -> tuple[list[int], list[int]]:
755
+ """Analyze which byte positions are common vs variable."""
756
+ if not patterns or len(patterns) == 0:
757
+ return [], []
758
+
759
+ length = len(patterns[0])
760
+ common_bytes = []
761
+ variable_bytes = []
762
+
763
+ for pos in range(length):
764
+ bytes_at_pos = [p[pos] for p in patterns]
765
+ unique_values = len(set(bytes_at_pos))
766
+
767
+ if unique_values == 1:
768
+ common_bytes.append(pos)
769
+ else:
770
+ variable_bytes.append(pos)
771
+
772
+ return common_bytes, variable_bytes
773
+
774
+
775
+ def _compute_byte_entropy(byte_values: list[int]) -> float:
776
+ """Compute Shannon entropy of byte values."""
777
+ if not byte_values:
778
+ return 0.0
779
+
780
+ from collections import Counter
781
+
782
+ counts = Counter(byte_values)
783
+ n = len(byte_values)
784
+
785
+ entropy = 0.0
786
+ for count in counts.values():
787
+ if count > 0:
788
+ prob = count / n
789
+ entropy -= prob * np.log2(prob)
790
+
791
+ return entropy
792
+
793
+
794
+ def _compute_silhouette_score(
795
+ dist_matrix: np.ndarray[tuple[int, int], np.dtype[np.float64]],
796
+ labels: np.ndarray[tuple[int], np.dtype[np.int_]],
797
+ ) -> float:
798
+ """Compute average silhouette score for clustering quality."""
799
+ n = len(labels)
800
+ if n <= 1:
801
+ return 0.0
802
+
803
+ # Filter out noise points (-1 labels)
804
+ valid_mask = labels >= 0
805
+ if np.sum(valid_mask) <= 1:
806
+ return 0.0
807
+
808
+ unique_labels = set(labels[valid_mask])
809
+ if len(unique_labels) <= 1:
810
+ return 0.0
811
+
812
+ silhouette_scores = []
813
+
814
+ for i in range(n):
815
+ if labels[i] == -1:
816
+ continue
817
+
818
+ # a(i): average distance to points in same cluster
819
+ same_cluster = (labels == labels[i]) & (np.arange(n) != i)
820
+ if np.sum(same_cluster) == 0:
821
+ continue
822
+
823
+ a_i = np.mean(dist_matrix[i, same_cluster])
824
+
825
+ # b(i): minimum average distance to points in other clusters
826
+ b_i = float("inf")
827
+ for other_label in unique_labels:
828
+ if other_label == labels[i]:
829
+ continue
830
+
831
+ other_cluster = labels == other_label
832
+ if np.sum(other_cluster) > 0:
833
+ avg_dist = np.mean(dist_matrix[i, other_cluster])
834
+ b_i = min(b_i, avg_dist)
835
+
836
+ # Silhouette coefficient
837
+ if b_i == float("inf"):
838
+ s_i = 0.0
839
+ else:
840
+ s_i = (b_i - a_i) / max(a_i, b_i)
841
+
842
+ silhouette_scores.append(s_i)
843
+
844
+ return float(np.mean(silhouette_scores)) if silhouette_scores else 0.0
845
+
846
+
847
+ def _hierarchical_clustering(
848
+ dist_matrix: np.ndarray[tuple[int, int], np.dtype[np.float64]],
849
+ method: str,
850
+ num_clusters: int | None,
851
+ distance_threshold: float | None,
852
+ ) -> np.ndarray[tuple[int], np.dtype[np.int_]]:
853
+ """Perform agglomerative hierarchical clustering."""
854
+ n = dist_matrix.shape[0]
855
+
856
+ # Initialize: each point is its own cluster
857
+ clusters = [[i] for i in range(n)]
858
+ _cluster_distances = dist_matrix.copy()
859
+
860
+ # Merge until desired number of clusters
861
+ while len(clusters) > 1:
862
+ if num_clusters is not None and len(clusters) <= num_clusters:
863
+ break
864
+
865
+ # Find closest pair of clusters
866
+ min_dist = float("inf")
867
+ merge_i, merge_j = -1, -1
868
+
869
+ for i in range(len(clusters)):
870
+ for j in range(i + 1, len(clusters)):
871
+ # Compute inter-cluster distance
872
+ dist = _linkage_distance(clusters[i], clusters[j], dist_matrix, method)
873
+
874
+ if dist < min_dist:
875
+ min_dist = dist
876
+ merge_i, merge_j = i, j
877
+
878
+ # Check distance threshold
879
+ if distance_threshold is not None and min_dist > distance_threshold:
880
+ break
881
+
882
+ # Merge clusters
883
+ if merge_i >= 0 and merge_j >= 0:
884
+ clusters[merge_i].extend(clusters[merge_j])
885
+ del clusters[merge_j]
886
+
887
+ # Assign labels
888
+ labels = np.full(n, -1, dtype=int)
889
+ for cid, cluster in enumerate(clusters):
890
+ for idx in cluster:
891
+ labels[idx] = cid
892
+
893
+ return labels
894
+
895
+
896
+ def _linkage_distance(
897
+ cluster_a: list[int],
898
+ cluster_b: list[int],
899
+ dist_matrix: np.ndarray[tuple[int, int], np.dtype[np.float64]],
900
+ method: str,
901
+ ) -> float:
902
+ """Compute distance between two clusters using linkage method."""
903
+ distances = [dist_matrix[i, j] for i in cluster_a for j in cluster_b]
904
+
905
+ if not distances:
906
+ return 0.0
907
+
908
+ if method == "single":
909
+ return float(min(distances))
910
+ elif method == "complete":
911
+ return float(max(distances))
912
+ elif method == "average":
913
+ return float(np.mean(distances))
914
+ else:
915
+ return float(np.mean(distances)) # Default to average
916
+
917
+
918
+ class PatternClusterer:
919
+ """Object-oriented wrapper for pattern clustering functionality.
920
+
921
+ Provides a class-based interface for clustering operations,
922
+ wrapping the functional API for consistency with test expectations.
923
+
924
+
925
+
926
+ Example:
927
+ >>> clusterer = PatternClusterer(n_clusters=3)
928
+ >>> labels = clusterer.cluster(messages)
929
+ """
930
+
931
+ def __init__(
932
+ self,
933
+ n_clusters: int = 3,
934
+ method: Literal["hamming", "edit", "hierarchical"] = "hamming",
935
+ distance_metric: Literal["hamming", "levenshtein", "jaccard"] = "hamming",
936
+ threshold: float = 0.3,
937
+ min_cluster_size: int = 2,
938
+ ):
939
+ """Initialize pattern clusterer.
940
+
941
+ Args:
942
+ n_clusters: Desired number of clusters.
943
+ method: Clustering method ('hamming', 'edit', or 'hierarchical').
944
+ distance_metric: Distance metric to use.
945
+ threshold: Distance threshold for clustering.
946
+ min_cluster_size: Minimum patterns per cluster.
947
+ """
948
+ self.n_clusters = n_clusters
949
+ self.method = method
950
+ self.distance_metric = distance_metric
951
+ self.threshold = threshold
952
+ self.min_cluster_size = min_cluster_size
953
+ self.result_: ClusteringResult | None = None
954
+
955
+ def cluster(
956
+ self, patterns: list[bytes | np.ndarray[tuple[int], np.dtype[np.uint8]]]
957
+ ) -> np.ndarray[tuple[int], np.dtype[np.int_]]:
958
+ """Cluster patterns and return labels.
959
+
960
+ Args:
961
+ patterns: List of patterns to cluster.
962
+
963
+ Returns:
964
+ Array of cluster labels (one per pattern).
965
+
966
+ Example:
967
+ >>> clusterer = PatternClusterer(n_clusters=3)
968
+ >>> labels = clusterer.cluster(messages)
969
+ """
970
+ if self.method == "hamming":
971
+ self.result_ = cluster_by_hamming(
972
+ patterns, threshold=self.threshold, min_cluster_size=self.min_cluster_size
973
+ )
974
+ elif self.method == "edit":
975
+ self.result_ = cluster_by_edit_distance(
976
+ patterns, threshold=self.threshold, min_cluster_size=self.min_cluster_size
977
+ )
978
+ else: # hierarchical or default
979
+ self.result_ = cluster_hierarchical(
980
+ patterns, method="average", num_clusters=self.n_clusters
981
+ )
982
+
983
+ return self.result_.labels
984
+
985
+ def fit(
986
+ self, patterns: list[bytes | np.ndarray[tuple[int], np.dtype[np.uint8]]]
987
+ ) -> "PatternClusterer":
988
+ """Fit the clusterer to patterns (sklearn-style interface).
989
+
990
+ Args:
991
+ patterns: List of patterns to cluster.
992
+
993
+ Returns:
994
+ Self (for method chaining).
995
+ """
996
+ self.cluster(patterns)
997
+ return self
998
+
999
+ def fit_predict(
1000
+ self, patterns: list[bytes | np.ndarray[tuple[int], np.dtype[np.uint8]]]
1001
+ ) -> np.ndarray[tuple[int], np.dtype[np.int_]]:
1002
+ """Fit and return cluster labels (sklearn-style interface).
1003
+
1004
+ Args:
1005
+ patterns: List of patterns to cluster.
1006
+
1007
+ Returns:
1008
+ Array of cluster labels.
1009
+ """
1010
+ return self.cluster(patterns)
1011
+
1012
+ def get_clusters(self) -> list[ClusterResult]:
1013
+ """Get detailed cluster results.
1014
+
1015
+ Returns:
1016
+ List of ClusterResult objects with full cluster analysis.
1017
+
1018
+ Raises:
1019
+ ValueError: If cluster() hasn't been called yet.
1020
+ """
1021
+ if self.result_ is None:
1022
+ raise ValueError("Must call cluster() before get_clusters()")
1023
+ return self.result_.clusters
1024
+
1025
+ def get_silhouette_score(self) -> float:
1026
+ """Get silhouette score for clustering quality.
1027
+
1028
+ Returns:
1029
+ Silhouette score (-1 to 1, higher is better).
1030
+
1031
+ Raises:
1032
+ ValueError: If cluster() hasn't been called yet.
1033
+ """
1034
+ if self.result_ is None:
1035
+ raise ValueError("Must call cluster() before get_silhouette_score()")
1036
+ return self.result_.silhouette_score