oscura 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (497) hide show
  1. oscura/__init__.py +169 -167
  2. oscura/analyzers/__init__.py +3 -0
  3. oscura/analyzers/classification.py +659 -0
  4. oscura/analyzers/digital/edges.py +325 -65
  5. oscura/analyzers/digital/quality.py +293 -166
  6. oscura/analyzers/digital/timing.py +260 -115
  7. oscura/analyzers/digital/timing_numba.py +334 -0
  8. oscura/analyzers/entropy.py +605 -0
  9. oscura/analyzers/eye/diagram.py +176 -109
  10. oscura/analyzers/eye/metrics.py +5 -5
  11. oscura/analyzers/jitter/__init__.py +6 -4
  12. oscura/analyzers/jitter/ber.py +52 -52
  13. oscura/analyzers/jitter/classification.py +156 -0
  14. oscura/analyzers/jitter/decomposition.py +163 -113
  15. oscura/analyzers/jitter/spectrum.py +80 -64
  16. oscura/analyzers/ml/__init__.py +39 -0
  17. oscura/analyzers/ml/features.py +600 -0
  18. oscura/analyzers/ml/signal_classifier.py +604 -0
  19. oscura/analyzers/packet/daq.py +246 -158
  20. oscura/analyzers/packet/parser.py +12 -1
  21. oscura/analyzers/packet/payload.py +50 -2110
  22. oscura/analyzers/packet/payload_analysis.py +361 -181
  23. oscura/analyzers/packet/payload_patterns.py +133 -70
  24. oscura/analyzers/packet/stream.py +84 -23
  25. oscura/analyzers/patterns/__init__.py +26 -5
  26. oscura/analyzers/patterns/anomaly_detection.py +908 -0
  27. oscura/analyzers/patterns/clustering.py +169 -108
  28. oscura/analyzers/patterns/clustering_optimized.py +227 -0
  29. oscura/analyzers/patterns/discovery.py +1 -1
  30. oscura/analyzers/patterns/matching.py +581 -197
  31. oscura/analyzers/patterns/pattern_mining.py +778 -0
  32. oscura/analyzers/patterns/periodic.py +121 -38
  33. oscura/analyzers/patterns/sequences.py +175 -78
  34. oscura/analyzers/power/conduction.py +1 -1
  35. oscura/analyzers/power/soa.py +6 -6
  36. oscura/analyzers/power/switching.py +250 -110
  37. oscura/analyzers/protocol/__init__.py +17 -1
  38. oscura/analyzers/protocols/base.py +6 -6
  39. oscura/analyzers/protocols/ble/__init__.py +38 -0
  40. oscura/analyzers/protocols/ble/analyzer.py +809 -0
  41. oscura/analyzers/protocols/ble/uuids.py +288 -0
  42. oscura/analyzers/protocols/can.py +257 -127
  43. oscura/analyzers/protocols/can_fd.py +107 -80
  44. oscura/analyzers/protocols/flexray.py +139 -80
  45. oscura/analyzers/protocols/hdlc.py +93 -58
  46. oscura/analyzers/protocols/i2c.py +247 -106
  47. oscura/analyzers/protocols/i2s.py +138 -86
  48. oscura/analyzers/protocols/industrial/__init__.py +40 -0
  49. oscura/analyzers/protocols/industrial/bacnet/__init__.py +33 -0
  50. oscura/analyzers/protocols/industrial/bacnet/analyzer.py +708 -0
  51. oscura/analyzers/protocols/industrial/bacnet/encoding.py +412 -0
  52. oscura/analyzers/protocols/industrial/bacnet/services.py +622 -0
  53. oscura/analyzers/protocols/industrial/ethercat/__init__.py +30 -0
  54. oscura/analyzers/protocols/industrial/ethercat/analyzer.py +474 -0
  55. oscura/analyzers/protocols/industrial/ethercat/mailbox.py +339 -0
  56. oscura/analyzers/protocols/industrial/ethercat/topology.py +166 -0
  57. oscura/analyzers/protocols/industrial/modbus/__init__.py +31 -0
  58. oscura/analyzers/protocols/industrial/modbus/analyzer.py +525 -0
  59. oscura/analyzers/protocols/industrial/modbus/crc.py +79 -0
  60. oscura/analyzers/protocols/industrial/modbus/functions.py +436 -0
  61. oscura/analyzers/protocols/industrial/opcua/__init__.py +21 -0
  62. oscura/analyzers/protocols/industrial/opcua/analyzer.py +552 -0
  63. oscura/analyzers/protocols/industrial/opcua/datatypes.py +446 -0
  64. oscura/analyzers/protocols/industrial/opcua/services.py +264 -0
  65. oscura/analyzers/protocols/industrial/profinet/__init__.py +23 -0
  66. oscura/analyzers/protocols/industrial/profinet/analyzer.py +441 -0
  67. oscura/analyzers/protocols/industrial/profinet/dcp.py +263 -0
  68. oscura/analyzers/protocols/industrial/profinet/ptcp.py +200 -0
  69. oscura/analyzers/protocols/jtag.py +180 -98
  70. oscura/analyzers/protocols/lin.py +219 -114
  71. oscura/analyzers/protocols/manchester.py +4 -4
  72. oscura/analyzers/protocols/onewire.py +253 -149
  73. oscura/analyzers/protocols/parallel_bus/__init__.py +20 -0
  74. oscura/analyzers/protocols/parallel_bus/centronics.py +92 -0
  75. oscura/analyzers/protocols/parallel_bus/gpib.py +137 -0
  76. oscura/analyzers/protocols/spi.py +192 -95
  77. oscura/analyzers/protocols/swd.py +321 -167
  78. oscura/analyzers/protocols/uart.py +267 -125
  79. oscura/analyzers/protocols/usb.py +235 -131
  80. oscura/analyzers/side_channel/power.py +17 -12
  81. oscura/analyzers/signal/__init__.py +15 -0
  82. oscura/analyzers/signal/timing_analysis.py +1086 -0
  83. oscura/analyzers/signal_integrity/__init__.py +4 -1
  84. oscura/analyzers/signal_integrity/sparams.py +2 -19
  85. oscura/analyzers/spectral/chunked.py +129 -60
  86. oscura/analyzers/spectral/chunked_fft.py +300 -94
  87. oscura/analyzers/spectral/chunked_wavelet.py +100 -80
  88. oscura/analyzers/statistical/checksum.py +376 -217
  89. oscura/analyzers/statistical/classification.py +229 -107
  90. oscura/analyzers/statistical/entropy.py +78 -53
  91. oscura/analyzers/statistics/correlation.py +407 -211
  92. oscura/analyzers/statistics/outliers.py +2 -2
  93. oscura/analyzers/statistics/streaming.py +30 -5
  94. oscura/analyzers/validation.py +216 -101
  95. oscura/analyzers/waveform/measurements.py +9 -0
  96. oscura/analyzers/waveform/measurements_with_uncertainty.py +31 -15
  97. oscura/analyzers/waveform/spectral.py +500 -228
  98. oscura/api/__init__.py +31 -5
  99. oscura/api/dsl/__init__.py +582 -0
  100. oscura/{dsl → api/dsl}/commands.py +43 -76
  101. oscura/{dsl → api/dsl}/interpreter.py +26 -51
  102. oscura/{dsl → api/dsl}/parser.py +107 -77
  103. oscura/{dsl → api/dsl}/repl.py +2 -2
  104. oscura/api/dsl.py +1 -1
  105. oscura/{integrations → api/integrations}/__init__.py +1 -1
  106. oscura/{integrations → api/integrations}/llm.py +201 -102
  107. oscura/api/operators.py +3 -3
  108. oscura/api/optimization.py +144 -30
  109. oscura/api/rest_server.py +921 -0
  110. oscura/api/server/__init__.py +17 -0
  111. oscura/api/server/dashboard.py +850 -0
  112. oscura/api/server/static/README.md +34 -0
  113. oscura/api/server/templates/base.html +181 -0
  114. oscura/api/server/templates/export.html +120 -0
  115. oscura/api/server/templates/home.html +284 -0
  116. oscura/api/server/templates/protocols.html +58 -0
  117. oscura/api/server/templates/reports.html +43 -0
  118. oscura/api/server/templates/session_detail.html +89 -0
  119. oscura/api/server/templates/sessions.html +83 -0
  120. oscura/api/server/templates/waveforms.html +73 -0
  121. oscura/automotive/__init__.py +8 -1
  122. oscura/automotive/can/__init__.py +10 -0
  123. oscura/automotive/can/checksum.py +3 -1
  124. oscura/automotive/can/dbc_generator.py +590 -0
  125. oscura/automotive/can/message_wrapper.py +121 -74
  126. oscura/automotive/can/patterns.py +98 -21
  127. oscura/automotive/can/session.py +292 -56
  128. oscura/automotive/can/state_machine.py +6 -3
  129. oscura/automotive/can/stimulus_response.py +97 -75
  130. oscura/automotive/dbc/__init__.py +10 -2
  131. oscura/automotive/dbc/generator.py +84 -56
  132. oscura/automotive/dbc/parser.py +6 -6
  133. oscura/automotive/dtc/data.json +17 -102
  134. oscura/automotive/dtc/database.py +2 -2
  135. oscura/automotive/flexray/__init__.py +31 -0
  136. oscura/automotive/flexray/analyzer.py +504 -0
  137. oscura/automotive/flexray/crc.py +185 -0
  138. oscura/automotive/flexray/fibex.py +449 -0
  139. oscura/automotive/j1939/__init__.py +45 -8
  140. oscura/automotive/j1939/analyzer.py +605 -0
  141. oscura/automotive/j1939/spns.py +326 -0
  142. oscura/automotive/j1939/transport.py +306 -0
  143. oscura/automotive/lin/__init__.py +47 -0
  144. oscura/automotive/lin/analyzer.py +612 -0
  145. oscura/automotive/loaders/blf.py +13 -2
  146. oscura/automotive/loaders/csv_can.py +143 -72
  147. oscura/automotive/loaders/dispatcher.py +50 -2
  148. oscura/automotive/loaders/mdf.py +86 -45
  149. oscura/automotive/loaders/pcap.py +111 -61
  150. oscura/automotive/uds/__init__.py +4 -0
  151. oscura/automotive/uds/analyzer.py +725 -0
  152. oscura/automotive/uds/decoder.py +140 -58
  153. oscura/automotive/uds/models.py +7 -1
  154. oscura/automotive/visualization.py +1 -1
  155. oscura/cli/analyze.py +348 -0
  156. oscura/cli/batch.py +142 -122
  157. oscura/cli/benchmark.py +275 -0
  158. oscura/cli/characterize.py +137 -82
  159. oscura/cli/compare.py +224 -131
  160. oscura/cli/completion.py +250 -0
  161. oscura/cli/config_cmd.py +361 -0
  162. oscura/cli/decode.py +164 -87
  163. oscura/cli/export.py +286 -0
  164. oscura/cli/main.py +115 -31
  165. oscura/{onboarding → cli/onboarding}/__init__.py +3 -3
  166. oscura/{onboarding → cli/onboarding}/help.py +80 -58
  167. oscura/{onboarding → cli/onboarding}/tutorials.py +97 -72
  168. oscura/{onboarding → cli/onboarding}/wizard.py +55 -36
  169. oscura/cli/progress.py +147 -0
  170. oscura/cli/shell.py +157 -135
  171. oscura/cli/validate_cmd.py +204 -0
  172. oscura/cli/visualize.py +158 -0
  173. oscura/convenience.py +125 -79
  174. oscura/core/__init__.py +4 -2
  175. oscura/core/backend_selector.py +3 -3
  176. oscura/core/cache.py +126 -15
  177. oscura/core/cancellation.py +1 -1
  178. oscura/{config → core/config}/__init__.py +20 -11
  179. oscura/{config → core/config}/defaults.py +1 -1
  180. oscura/{config → core/config}/loader.py +7 -5
  181. oscura/{config → core/config}/memory.py +5 -5
  182. oscura/{config → core/config}/migration.py +1 -1
  183. oscura/{config → core/config}/pipeline.py +99 -23
  184. oscura/{config → core/config}/preferences.py +1 -1
  185. oscura/{config → core/config}/protocol.py +3 -3
  186. oscura/{config → core/config}/schema.py +426 -272
  187. oscura/{config → core/config}/settings.py +1 -1
  188. oscura/{config → core/config}/thresholds.py +195 -153
  189. oscura/core/correlation.py +5 -6
  190. oscura/core/cross_domain.py +0 -2
  191. oscura/core/debug.py +9 -5
  192. oscura/{extensibility → core/extensibility}/docs.py +158 -70
  193. oscura/{extensibility → core/extensibility}/extensions.py +160 -76
  194. oscura/{extensibility → core/extensibility}/logging.py +1 -1
  195. oscura/{extensibility → core/extensibility}/measurements.py +1 -1
  196. oscura/{extensibility → core/extensibility}/plugins.py +1 -1
  197. oscura/{extensibility → core/extensibility}/templates.py +73 -3
  198. oscura/{extensibility → core/extensibility}/validation.py +1 -1
  199. oscura/core/gpu_backend.py +11 -7
  200. oscura/core/log_query.py +101 -11
  201. oscura/core/logging.py +126 -54
  202. oscura/core/logging_advanced.py +5 -5
  203. oscura/core/memory_limits.py +108 -70
  204. oscura/core/memory_monitor.py +2 -2
  205. oscura/core/memory_progress.py +7 -7
  206. oscura/core/memory_warnings.py +1 -1
  207. oscura/core/numba_backend.py +13 -13
  208. oscura/{plugins → core/plugins}/__init__.py +9 -9
  209. oscura/{plugins → core/plugins}/base.py +7 -7
  210. oscura/{plugins → core/plugins}/cli.py +3 -3
  211. oscura/{plugins → core/plugins}/discovery.py +186 -106
  212. oscura/{plugins → core/plugins}/lifecycle.py +1 -1
  213. oscura/{plugins → core/plugins}/manager.py +7 -7
  214. oscura/{plugins → core/plugins}/registry.py +3 -3
  215. oscura/{plugins → core/plugins}/versioning.py +1 -1
  216. oscura/core/progress.py +16 -1
  217. oscura/core/provenance.py +8 -2
  218. oscura/{schemas → core/schemas}/__init__.py +2 -2
  219. oscura/{schemas → core/schemas}/device_mapping.json +2 -8
  220. oscura/{schemas → core/schemas}/packet_format.json +4 -24
  221. oscura/{schemas → core/schemas}/protocol_definition.json +2 -12
  222. oscura/core/types.py +4 -0
  223. oscura/core/uncertainty.py +3 -3
  224. oscura/correlation/__init__.py +52 -0
  225. oscura/correlation/multi_protocol.py +811 -0
  226. oscura/discovery/auto_decoder.py +117 -35
  227. oscura/discovery/comparison.py +191 -86
  228. oscura/discovery/quality_validator.py +155 -68
  229. oscura/discovery/signal_detector.py +196 -79
  230. oscura/export/__init__.py +18 -8
  231. oscura/export/kaitai_struct.py +513 -0
  232. oscura/export/scapy_layer.py +801 -0
  233. oscura/export/wireshark/generator.py +1 -1
  234. oscura/export/wireshark/templates/dissector.lua.j2 +2 -2
  235. oscura/export/wireshark_dissector.py +746 -0
  236. oscura/guidance/wizard.py +207 -111
  237. oscura/hardware/__init__.py +19 -0
  238. oscura/{acquisition → hardware/acquisition}/__init__.py +4 -4
  239. oscura/{acquisition → hardware/acquisition}/file.py +2 -2
  240. oscura/{acquisition → hardware/acquisition}/hardware.py +7 -7
  241. oscura/{acquisition → hardware/acquisition}/saleae.py +15 -12
  242. oscura/{acquisition → hardware/acquisition}/socketcan.py +1 -1
  243. oscura/{acquisition → hardware/acquisition}/streaming.py +2 -2
  244. oscura/{acquisition → hardware/acquisition}/synthetic.py +3 -3
  245. oscura/{acquisition → hardware/acquisition}/visa.py +33 -11
  246. oscura/hardware/firmware/__init__.py +29 -0
  247. oscura/hardware/firmware/pattern_recognition.py +874 -0
  248. oscura/hardware/hal_detector.py +736 -0
  249. oscura/hardware/security/__init__.py +37 -0
  250. oscura/hardware/security/side_channel_detector.py +1126 -0
  251. oscura/inference/__init__.py +4 -0
  252. oscura/inference/active_learning/observation_table.py +4 -1
  253. oscura/inference/alignment.py +216 -123
  254. oscura/inference/bayesian.py +113 -33
  255. oscura/inference/crc_reverse.py +101 -55
  256. oscura/inference/logic.py +6 -2
  257. oscura/inference/message_format.py +342 -183
  258. oscura/inference/protocol.py +95 -44
  259. oscura/inference/protocol_dsl.py +180 -82
  260. oscura/inference/signal_intelligence.py +1439 -706
  261. oscura/inference/spectral.py +99 -57
  262. oscura/inference/state_machine.py +810 -158
  263. oscura/inference/stream.py +270 -110
  264. oscura/iot/__init__.py +34 -0
  265. oscura/iot/coap/__init__.py +32 -0
  266. oscura/iot/coap/analyzer.py +668 -0
  267. oscura/iot/coap/options.py +212 -0
  268. oscura/iot/lorawan/__init__.py +21 -0
  269. oscura/iot/lorawan/crypto.py +206 -0
  270. oscura/iot/lorawan/decoder.py +801 -0
  271. oscura/iot/lorawan/mac_commands.py +341 -0
  272. oscura/iot/mqtt/__init__.py +27 -0
  273. oscura/iot/mqtt/analyzer.py +999 -0
  274. oscura/iot/mqtt/properties.py +315 -0
  275. oscura/iot/zigbee/__init__.py +31 -0
  276. oscura/iot/zigbee/analyzer.py +615 -0
  277. oscura/iot/zigbee/security.py +153 -0
  278. oscura/iot/zigbee/zcl.py +349 -0
  279. oscura/jupyter/display.py +125 -45
  280. oscura/{exploratory → jupyter/exploratory}/__init__.py +8 -8
  281. oscura/{exploratory → jupyter/exploratory}/error_recovery.py +298 -141
  282. oscura/jupyter/exploratory/fuzzy.py +746 -0
  283. oscura/{exploratory → jupyter/exploratory}/fuzzy_advanced.py +258 -100
  284. oscura/{exploratory → jupyter/exploratory}/legacy.py +464 -242
  285. oscura/{exploratory → jupyter/exploratory}/parse.py +167 -145
  286. oscura/{exploratory → jupyter/exploratory}/recovery.py +119 -87
  287. oscura/jupyter/exploratory/sync.py +612 -0
  288. oscura/{exploratory → jupyter/exploratory}/unknown.py +299 -176
  289. oscura/jupyter/magic.py +4 -4
  290. oscura/{ui → jupyter/ui}/__init__.py +2 -2
  291. oscura/{ui → jupyter/ui}/formatters.py +3 -3
  292. oscura/{ui → jupyter/ui}/progressive_display.py +153 -82
  293. oscura/loaders/__init__.py +183 -67
  294. oscura/loaders/binary.py +88 -1
  295. oscura/loaders/chipwhisperer.py +153 -137
  296. oscura/loaders/configurable.py +208 -86
  297. oscura/loaders/csv_loader.py +458 -215
  298. oscura/loaders/hdf5_loader.py +278 -119
  299. oscura/loaders/lazy.py +87 -54
  300. oscura/loaders/mmap_loader.py +1 -1
  301. oscura/loaders/numpy_loader.py +253 -116
  302. oscura/loaders/pcap.py +226 -151
  303. oscura/loaders/rigol.py +110 -49
  304. oscura/loaders/sigrok.py +201 -78
  305. oscura/loaders/tdms.py +81 -58
  306. oscura/loaders/tektronix.py +291 -174
  307. oscura/loaders/touchstone.py +182 -87
  308. oscura/loaders/tss.py +456 -0
  309. oscura/loaders/vcd.py +215 -117
  310. oscura/loaders/wav.py +155 -68
  311. oscura/reporting/__init__.py +9 -0
  312. oscura/reporting/analyze.py +352 -146
  313. oscura/reporting/argument_preparer.py +69 -14
  314. oscura/reporting/auto_report.py +97 -61
  315. oscura/reporting/batch.py +131 -58
  316. oscura/reporting/chart_selection.py +57 -45
  317. oscura/reporting/comparison.py +63 -17
  318. oscura/reporting/content/executive.py +76 -24
  319. oscura/reporting/core_formats/multi_format.py +11 -8
  320. oscura/reporting/engine.py +312 -158
  321. oscura/reporting/enhanced_reports.py +949 -0
  322. oscura/reporting/export.py +86 -43
  323. oscura/reporting/formatting/numbers.py +69 -42
  324. oscura/reporting/html.py +139 -58
  325. oscura/reporting/index.py +137 -65
  326. oscura/reporting/output.py +158 -67
  327. oscura/reporting/pdf.py +67 -102
  328. oscura/reporting/plots.py +191 -112
  329. oscura/reporting/sections.py +88 -47
  330. oscura/reporting/standards.py +104 -61
  331. oscura/reporting/summary_generator.py +75 -55
  332. oscura/reporting/tables.py +138 -54
  333. oscura/reporting/templates/enhanced/protocol_re.html +525 -0
  334. oscura/sessions/__init__.py +14 -23
  335. oscura/sessions/base.py +3 -3
  336. oscura/sessions/blackbox.py +106 -10
  337. oscura/sessions/generic.py +2 -2
  338. oscura/sessions/legacy.py +783 -0
  339. oscura/side_channel/__init__.py +63 -0
  340. oscura/side_channel/dpa.py +1025 -0
  341. oscura/utils/__init__.py +15 -1
  342. oscura/utils/bitwise.py +118 -0
  343. oscura/{builders → utils/builders}/__init__.py +1 -1
  344. oscura/{comparison → utils/comparison}/__init__.py +6 -6
  345. oscura/{comparison → utils/comparison}/compare.py +202 -101
  346. oscura/{comparison → utils/comparison}/golden.py +83 -63
  347. oscura/{comparison → utils/comparison}/limits.py +313 -89
  348. oscura/{comparison → utils/comparison}/mask.py +151 -45
  349. oscura/{comparison → utils/comparison}/trace_diff.py +1 -1
  350. oscura/{comparison → utils/comparison}/visualization.py +147 -89
  351. oscura/{component → utils/component}/__init__.py +3 -3
  352. oscura/{component → utils/component}/impedance.py +122 -58
  353. oscura/{component → utils/component}/reactive.py +165 -168
  354. oscura/{component → utils/component}/transmission_line.py +3 -3
  355. oscura/{filtering → utils/filtering}/__init__.py +6 -6
  356. oscura/{filtering → utils/filtering}/base.py +1 -1
  357. oscura/{filtering → utils/filtering}/convenience.py +2 -2
  358. oscura/{filtering → utils/filtering}/design.py +169 -93
  359. oscura/{filtering → utils/filtering}/filters.py +2 -2
  360. oscura/{filtering → utils/filtering}/introspection.py +2 -2
  361. oscura/utils/geometry.py +31 -0
  362. oscura/utils/imports.py +184 -0
  363. oscura/utils/lazy.py +1 -1
  364. oscura/{math → utils/math}/__init__.py +2 -2
  365. oscura/{math → utils/math}/arithmetic.py +114 -48
  366. oscura/{math → utils/math}/interpolation.py +139 -106
  367. oscura/utils/memory.py +129 -66
  368. oscura/utils/memory_advanced.py +92 -9
  369. oscura/utils/memory_extensions.py +10 -8
  370. oscura/{optimization → utils/optimization}/__init__.py +1 -1
  371. oscura/{optimization → utils/optimization}/search.py +2 -2
  372. oscura/utils/performance/__init__.py +58 -0
  373. oscura/utils/performance/caching.py +889 -0
  374. oscura/utils/performance/lsh_clustering.py +333 -0
  375. oscura/utils/performance/memory_optimizer.py +699 -0
  376. oscura/utils/performance/optimizations.py +675 -0
  377. oscura/utils/performance/parallel.py +654 -0
  378. oscura/utils/performance/profiling.py +661 -0
  379. oscura/{pipeline → utils/pipeline}/base.py +1 -1
  380. oscura/{pipeline → utils/pipeline}/composition.py +1 -1
  381. oscura/{pipeline → utils/pipeline}/parallel.py +3 -2
  382. oscura/{pipeline → utils/pipeline}/pipeline.py +1 -1
  383. oscura/{pipeline → utils/pipeline}/reverse_engineering.py +412 -221
  384. oscura/{search → utils/search}/__init__.py +3 -3
  385. oscura/{search → utils/search}/anomaly.py +188 -58
  386. oscura/utils/search/context.py +294 -0
  387. oscura/{search → utils/search}/pattern.py +138 -10
  388. oscura/utils/serial.py +51 -0
  389. oscura/utils/storage/__init__.py +61 -0
  390. oscura/utils/storage/database.py +1166 -0
  391. oscura/{streaming → utils/streaming}/chunked.py +302 -143
  392. oscura/{streaming → utils/streaming}/progressive.py +1 -1
  393. oscura/{streaming → utils/streaming}/realtime.py +3 -2
  394. oscura/{triggering → utils/triggering}/__init__.py +6 -6
  395. oscura/{triggering → utils/triggering}/base.py +6 -6
  396. oscura/{triggering → utils/triggering}/edge.py +2 -2
  397. oscura/{triggering → utils/triggering}/pattern.py +2 -2
  398. oscura/{triggering → utils/triggering}/pulse.py +115 -74
  399. oscura/{triggering → utils/triggering}/window.py +2 -2
  400. oscura/utils/validation.py +32 -0
  401. oscura/validation/__init__.py +121 -0
  402. oscura/{compliance → validation/compliance}/__init__.py +5 -5
  403. oscura/{compliance → validation/compliance}/advanced.py +5 -5
  404. oscura/{compliance → validation/compliance}/masks.py +1 -1
  405. oscura/{compliance → validation/compliance}/reporting.py +127 -53
  406. oscura/{compliance → validation/compliance}/testing.py +114 -52
  407. oscura/validation/compliance_tests.py +915 -0
  408. oscura/validation/fuzzer.py +990 -0
  409. oscura/validation/grammar_tests.py +596 -0
  410. oscura/validation/grammar_validator.py +904 -0
  411. oscura/validation/hil_testing.py +977 -0
  412. oscura/{quality → validation/quality}/__init__.py +4 -4
  413. oscura/{quality → validation/quality}/ensemble.py +251 -171
  414. oscura/{quality → validation/quality}/explainer.py +3 -3
  415. oscura/{quality → validation/quality}/scoring.py +1 -1
  416. oscura/{quality → validation/quality}/warnings.py +4 -4
  417. oscura/validation/regression_suite.py +808 -0
  418. oscura/validation/replay.py +788 -0
  419. oscura/{testing → validation/testing}/__init__.py +2 -2
  420. oscura/{testing → validation/testing}/synthetic.py +5 -5
  421. oscura/visualization/__init__.py +9 -0
  422. oscura/visualization/accessibility.py +1 -1
  423. oscura/visualization/annotations.py +64 -67
  424. oscura/visualization/colors.py +7 -7
  425. oscura/visualization/digital.py +180 -81
  426. oscura/visualization/eye.py +236 -85
  427. oscura/visualization/interactive.py +320 -143
  428. oscura/visualization/jitter.py +587 -247
  429. oscura/visualization/layout.py +169 -134
  430. oscura/visualization/optimization.py +103 -52
  431. oscura/visualization/palettes.py +1 -1
  432. oscura/visualization/power.py +427 -211
  433. oscura/visualization/power_extended.py +626 -297
  434. oscura/visualization/presets.py +2 -0
  435. oscura/visualization/protocols.py +495 -181
  436. oscura/visualization/render.py +79 -63
  437. oscura/visualization/reverse_engineering.py +171 -124
  438. oscura/visualization/signal_integrity.py +460 -279
  439. oscura/visualization/specialized.py +190 -100
  440. oscura/visualization/spectral.py +670 -255
  441. oscura/visualization/thumbnails.py +166 -137
  442. oscura/visualization/waveform.py +150 -63
  443. oscura/workflows/__init__.py +3 -0
  444. oscura/{batch → workflows/batch}/__init__.py +5 -5
  445. oscura/{batch → workflows/batch}/advanced.py +150 -75
  446. oscura/workflows/batch/aggregate.py +531 -0
  447. oscura/workflows/batch/analyze.py +236 -0
  448. oscura/{batch → workflows/batch}/logging.py +2 -2
  449. oscura/{batch → workflows/batch}/metrics.py +1 -1
  450. oscura/workflows/complete_re.py +1144 -0
  451. oscura/workflows/compliance.py +44 -54
  452. oscura/workflows/digital.py +197 -51
  453. oscura/workflows/legacy/__init__.py +12 -0
  454. oscura/{workflow → workflows/legacy}/dag.py +4 -1
  455. oscura/workflows/multi_trace.py +9 -9
  456. oscura/workflows/power.py +42 -62
  457. oscura/workflows/protocol.py +82 -49
  458. oscura/workflows/reverse_engineering.py +351 -150
  459. oscura/workflows/signal_integrity.py +157 -82
  460. oscura-0.7.0.dist-info/METADATA +661 -0
  461. oscura-0.7.0.dist-info/RECORD +591 -0
  462. oscura/batch/aggregate.py +0 -300
  463. oscura/batch/analyze.py +0 -139
  464. oscura/dsl/__init__.py +0 -73
  465. oscura/exceptions.py +0 -59
  466. oscura/exploratory/fuzzy.py +0 -513
  467. oscura/exploratory/sync.py +0 -384
  468. oscura/exporters/__init__.py +0 -94
  469. oscura/exporters/csv.py +0 -303
  470. oscura/exporters/exporters.py +0 -44
  471. oscura/exporters/hdf5.py +0 -217
  472. oscura/exporters/html_export.py +0 -701
  473. oscura/exporters/json_export.py +0 -291
  474. oscura/exporters/markdown_export.py +0 -367
  475. oscura/exporters/matlab_export.py +0 -354
  476. oscura/exporters/npz_export.py +0 -219
  477. oscura/exporters/spice_export.py +0 -210
  478. oscura/search/context.py +0 -149
  479. oscura/session/__init__.py +0 -34
  480. oscura/session/annotations.py +0 -289
  481. oscura/session/history.py +0 -313
  482. oscura/session/session.py +0 -520
  483. oscura/workflow/__init__.py +0 -13
  484. oscura-0.5.1.dist-info/METADATA +0 -583
  485. oscura-0.5.1.dist-info/RECORD +0 -481
  486. /oscura/core/{config.py → config/legacy.py} +0 -0
  487. /oscura/{extensibility → core/extensibility}/__init__.py +0 -0
  488. /oscura/{extensibility → core/extensibility}/registry.py +0 -0
  489. /oscura/{plugins → core/plugins}/isolation.py +0 -0
  490. /oscura/{schemas → core/schemas}/bus_configuration.json +0 -0
  491. /oscura/{builders → utils/builders}/signal_builder.py +0 -0
  492. /oscura/{optimization → utils/optimization}/parallel.py +0 -0
  493. /oscura/{pipeline → utils/pipeline}/__init__.py +0 -0
  494. /oscura/{streaming → utils/streaming}/__init__.py +0 -0
  495. {oscura-0.5.1.dist-info → oscura-0.7.0.dist-info}/WHEEL +0 -0
  496. {oscura-0.5.1.dist-info → oscura-0.7.0.dist-info}/entry_points.txt +0 -0
  497. {oscura-0.5.1.dist-info → oscura-0.7.0.dist-info}/licenses/LICENSE +0 -0
@@ -469,17 +469,9 @@ class FieldInferrer:
469
469
  """
470
470
  size = end - start
471
471
  name = f"field_{index}"
472
+ raw_values = self._extract_field_values(messages, start, end)
472
473
 
473
- # Extract field values
474
- values = []
475
- raw_values = []
476
- for msg in messages:
477
- if len(msg) >= end:
478
- field_bytes = msg[start:end]
479
- raw_values.append(field_bytes)
480
- values.append(field_bytes)
481
-
482
- if not values:
474
+ if not raw_values:
483
475
  return InferredField(
484
476
  name=name,
485
477
  offset=start,
@@ -488,31 +480,60 @@ class FieldInferrer:
488
480
  confidence=0.0,
489
481
  )
490
482
 
491
- # Check if constant
483
+ # Analyze field properties
492
484
  unique_values = set(raw_values)
493
485
  is_constant = len(unique_values) == 1
486
+ is_sequence = self._check_sequence(raw_values, size, is_constant)
487
+ is_checksum = self._check_checksum(messages, start, size)
494
488
 
495
- # Check if sequence
496
- is_sequence = False
497
- if not is_constant and size in [1, 2, 4, 8]:
498
- int_values = [int.from_bytes(v, "big") for v in raw_values]
499
- is_sequence = self._is_sequence(int_values)
489
+ # Infer type and create sample values
490
+ inferred_type, endianness, confidence = self._infer_type(raw_values, size)
491
+ sample_values = self._create_sample_values(raw_values[:5], inferred_type, endianness)
500
492
 
501
- # Check for checksum patterns
502
- is_checksum = False
503
- if start >= min(len(m) for m in messages) - 4:
504
- score = self._check_checksum_correlation(messages, start, size)
505
- is_checksum = score > 0.7
493
+ # Cast to Literal types for type checker
494
+ type_literal = self._cast_type_literal(inferred_type)
495
+ endianness_literal = self._cast_endianness_literal(endianness)
506
496
 
507
- # Infer type
508
- inferred_type, endianness, confidence = self._infer_type(raw_values, size)
497
+ return InferredField(
498
+ name=name,
499
+ offset=start,
500
+ size=size,
501
+ inferred_type=type_literal,
502
+ endianness=endianness_literal,
503
+ is_constant=is_constant,
504
+ is_sequence=is_sequence,
505
+ is_checksum=is_checksum,
506
+ constant_value=raw_values[0] if is_constant else None,
507
+ confidence=confidence,
508
+ sample_values=sample_values,
509
+ )
510
+
511
+ def _extract_field_values(self, messages: Sequence[bytes], start: int, end: int) -> list[bytes]:
512
+ """Extract field values from messages."""
513
+ return [msg[start:end] for msg in messages if len(msg) >= end]
514
+
515
+ def _check_sequence(self, raw_values: list[bytes], size: int, is_constant: bool) -> bool:
516
+ """Check if field values form a sequence."""
517
+ if is_constant or size not in [1, 2, 4, 8]:
518
+ return False
519
+ int_values = [int.from_bytes(v, "big") for v in raw_values]
520
+ return self._is_sequence(int_values)
509
521
 
510
- # Sample values for debugging
522
+ def _check_checksum(self, messages: Sequence[bytes], start: int, size: int) -> bool:
523
+ """Check if field appears to be a checksum."""
524
+ if start < min(len(m) for m in messages) - 4:
525
+ return False
526
+ score = self._check_checksum_correlation(messages, start, size)
527
+ return score > 0.7
528
+
529
+ def _create_sample_values(
530
+ self, raw_values: list[bytes], inferred_type: str, endianness: str
531
+ ) -> list[int | str]:
532
+ """Create sample values for debugging."""
511
533
  sample_values: list[int | str] = []
512
- for v in raw_values[:5]:
513
- if inferred_type.startswith("uint") or inferred_type.startswith("int"):
534
+ for v in raw_values:
535
+ if inferred_type.startswith(("uint", "int")):
514
536
  try:
515
- # Cast endianness to Literal type for type checker
516
537
  byte_order: Literal["big", "little"] = (
517
538
  "big" if endianness == "n/a" else endianness # type: ignore[assignment]
518
539
  )
@@ -526,38 +547,31 @@ class FieldInferrer:
526
547
  sample_values.append(v.hex())
527
548
  else:
528
549
  sample_values.append(v.hex())
550
+ return sample_values
529
551
 
530
- # Cast to Literal types for type checker
531
- inferred_type_literal: Literal[
532
- "uint8",
533
- "uint16",
534
- "uint32",
535
- "uint64",
536
- "int8",
537
- "int16",
538
- "int32",
539
- "int64",
540
- "float32",
541
- "float64",
542
- "bytes",
543
- "string",
544
- "unknown",
545
- ] = inferred_type # type: ignore[assignment]
546
- endianness_literal: Literal["big", "little", "n/a"] = endianness # type: ignore[assignment]
552
+ def _cast_type_literal(
553
+ self, inferred_type: str
554
+ ) -> Literal[
555
+ "uint8",
556
+ "uint16",
557
+ "uint32",
558
+ "uint64",
559
+ "int8",
560
+ "int16",
561
+ "int32",
562
+ "int64",
563
+ "float32",
564
+ "float64",
565
+ "bytes",
566
+ "string",
567
+ "unknown",
568
+ ]:
569
+ """Cast inferred type to Literal for type checker."""
570
+ return inferred_type # type: ignore[return-value]
547
571
 
548
- return InferredField(
549
- name=name,
550
- offset=start,
551
- size=size,
552
- inferred_type=inferred_type_literal,
553
- endianness=endianness_literal,
554
- is_constant=is_constant,
555
- is_sequence=is_sequence,
556
- is_checksum=is_checksum,
557
- constant_value=raw_values[0] if is_constant else None,
558
- confidence=confidence,
559
- sample_values=sample_values,
560
- )
572
+ def _cast_endianness_literal(self, endianness: str) -> Literal["big", "little", "n/a"]:
573
+ """Cast endianness to Literal for type checker."""
574
+ return endianness # type: ignore[return-value]
561
575
 
562
576
  def _infer_type(
563
577
  self,
@@ -576,59 +590,114 @@ class FieldInferrer:
576
590
  if not values:
577
591
  return "unknown", "n/a", 0.0
578
592
 
579
- # Check for string (high printable ratio)
593
+ # Check for string first
594
+ string_result = self._check_string_type(values, size)
595
+ if string_result is not None:
596
+ return string_result
597
+
598
+ # Infer based on field size
599
+ if size == 1:
600
+ return "uint8", "n/a", 0.9
601
+ elif size == 2:
602
+ return self._infer_uint16_type(values)
603
+ elif size == 4:
604
+ return self._infer_4byte_type(values)
605
+ elif size == 8:
606
+ return self._infer_uint64_type(values)
607
+ else:
608
+ return "bytes", "n/a", 0.6
609
+
610
+ def _check_string_type(self, values: list[bytes], size: int) -> tuple[str, str, float] | None:
611
+ """Check if values represent string data.
612
+
613
+ Args:
614
+ values: Field values to check.
615
+ size: Field size.
616
+
617
+ Returns:
618
+ Type tuple if string, None otherwise.
619
+ """
580
620
  printable_ratio = sum(
581
621
  1 for v in values for b in v if 32 <= b <= 126 or b in (9, 10, 13)
582
622
  ) / (len(values) * size)
583
623
 
584
624
  if printable_ratio > 0.8:
585
625
  return "string", "n/a", printable_ratio
626
+ return None
586
627
 
587
- # Check for standard integer sizes
588
- if size == 1:
589
- return "uint8", "n/a", 0.9
628
+ def _infer_uint16_type(self, values: list[bytes]) -> tuple[str, str, float]:
629
+ """Infer uint16 type and detect endianness.
590
630
 
591
- elif size == 2:
592
- # Try to detect endianness
593
- be_variance = np.var([int.from_bytes(v, "big") for v in values])
594
- le_variance = np.var([int.from_bytes(v, "little") for v in values])
631
+ Args:
632
+ values: Field values.
595
633
 
596
- if be_variance < le_variance:
597
- endian = "big"
598
- else:
599
- endian = "little"
634
+ Returns:
635
+ Type tuple with endianness.
636
+ """
637
+ endian = self._detect_endianness(values)
638
+ return "uint16", endian, 0.8
600
639
 
601
- return "uint16", endian, 0.8
640
+ def _infer_4byte_type(self, values: list[bytes]) -> tuple[str, str, float]:
641
+ """Infer 4-byte type (float32 or uint32).
602
642
 
603
- elif size == 4:
604
- # Check for float
605
- float_valid = 0
606
- for v in values:
607
- try:
608
- f = struct.unpack(">f", v)[0]
609
- if not (np.isnan(f) or np.isinf(f)) and -1e10 < f < 1e10:
610
- float_valid += 1
611
- except Exception:
612
- pass
643
+ Args:
644
+ values: Field values.
613
645
 
614
- if float_valid / len(values) > 0.8:
615
- return "float32", "big", 0.7
646
+ Returns:
647
+ Type tuple with endianness.
648
+ """
649
+ # Check if float32
650
+ if self._is_valid_float32(values):
651
+ return "float32", "big", 0.7
616
652
 
617
- # Otherwise integer
618
- be_variance = np.var([int.from_bytes(v, "big") for v in values])
619
- le_variance = np.var([int.from_bytes(v, "little") for v in values])
620
- endian = "big" if be_variance < le_variance else "little"
621
- return "uint32", endian, 0.8
653
+ # Otherwise uint32
654
+ endian = self._detect_endianness(values)
655
+ return "uint32", endian, 0.8
622
656
 
623
- elif size == 8:
624
- # Check for float64 or uint64
625
- be_variance = np.var([int.from_bytes(v, "big") for v in values])
626
- le_variance = np.var([int.from_bytes(v, "little") for v in values])
627
- endian = "big" if be_variance < le_variance else "little"
628
- return "uint64", endian, 0.7
657
+ def _infer_uint64_type(self, values: list[bytes]) -> tuple[str, str, float]:
658
+ """Infer uint64 type and detect endianness.
629
659
 
630
- else:
631
- return "bytes", "n/a", 0.6
660
+ Args:
661
+ values: Field values.
662
+
663
+ Returns:
664
+ Type tuple with endianness.
665
+ """
666
+ endian = self._detect_endianness(values)
667
+ return "uint64", endian, 0.7
668
+
669
+ def _detect_endianness(self, values: list[bytes]) -> str:
670
+ """Detect endianness by comparing variance.
671
+
672
+ Args:
673
+ values: Field values.
674
+
675
+ Returns:
676
+ Endianness string ("big" or "little").
677
+ """
678
+ be_variance = np.var([int.from_bytes(v, "big") for v in values])
679
+ le_variance = np.var([int.from_bytes(v, "little") for v in values])
680
+ return "big" if be_variance < le_variance else "little"
681
+
682
+ def _is_valid_float32(self, values: list[bytes]) -> bool:
683
+ """Check if values are valid float32 numbers.
684
+
685
+ Args:
686
+ values: Field values to check.
687
+
688
+ Returns:
689
+ True if majority are valid floats.
690
+ """
691
+ float_valid = 0
692
+ for v in values:
693
+ try:
694
+ f = struct.unpack(">f", v)[0]
695
+ if not (np.isnan(f) or np.isinf(f)) and -1e10 < f < 1e10:
696
+ float_valid += 1
697
+ except Exception:
698
+ pass
699
+
700
+ return float_valid / len(values) > 0.8
632
701
 
633
702
  def _is_sequence(self, values: list[int]) -> bool:
634
703
  """Check if values form a sequence.
@@ -827,30 +896,82 @@ def diff_payloads(payload_a: bytes, payload_b: bytes) -> PayloadDiff:
827
896
  >>> print(f"Common prefix: {diff.common_prefix_length} bytes")
828
897
  >>> print(f"Different bytes: {len(diff.differences)}")
829
898
  """
830
- # Find common prefix
831
- common_prefix = 0
832
899
  min_len = min(len(payload_a), len(payload_b))
900
+
901
+ common_prefix = _find_common_prefix(payload_a, payload_b, min_len)
902
+ common_suffix = _find_common_suffix(payload_a, payload_b, min_len, common_prefix)
903
+ differences = _find_payload_differences(payload_a, payload_b, min_len)
904
+
905
+ similarity = _calculate_similarity(payload_a, payload_b, min_len, differences)
906
+ edit_distance = _levenshtein_distance(payload_a, payload_b)
907
+
908
+ return PayloadDiff(
909
+ common_prefix_length=common_prefix,
910
+ common_suffix_length=common_suffix,
911
+ differences=differences,
912
+ similarity=similarity,
913
+ edit_distance=edit_distance,
914
+ )
915
+
916
+
917
+ def _find_common_prefix(payload_a: bytes, payload_b: bytes, min_len: int) -> int:
918
+ """Find length of common prefix.
919
+
920
+ Args:
921
+ payload_a: First payload.
922
+ payload_b: Second payload.
923
+ min_len: Minimum payload length.
924
+
925
+ Returns:
926
+ Length of common prefix in bytes.
927
+ """
833
928
  for i in range(min_len):
834
- if payload_a[i] == payload_b[i]:
835
- common_prefix += 1
836
- else:
837
- break
929
+ if payload_a[i] != payload_b[i]:
930
+ return i
931
+ return min_len
838
932
 
839
- # Find common suffix
840
- common_suffix = 0
933
+
934
+ def _find_common_suffix(
935
+ payload_a: bytes, payload_b: bytes, min_len: int, common_prefix: int
936
+ ) -> int:
937
+ """Find length of common suffix.
938
+
939
+ Args:
940
+ payload_a: First payload.
941
+ payload_b: Second payload.
942
+ min_len: Minimum payload length.
943
+ common_prefix: Length of common prefix.
944
+
945
+ Returns:
946
+ Length of common suffix in bytes.
947
+ """
841
948
  for i in range(1, min_len - common_prefix + 1):
842
- if payload_a[-i] == payload_b[-i]:
843
- common_suffix += 1
844
- else:
845
- break
949
+ if payload_a[-i] != payload_b[-i]:
950
+ return i - 1
951
+ return min_len - common_prefix
952
+
953
+
954
+ def _find_payload_differences(
955
+ payload_a: bytes, payload_b: bytes, min_len: int
956
+ ) -> list[tuple[int, int, int]]:
957
+ """Find all byte differences between payloads.
958
+
959
+ Args:
960
+ payload_a: First payload.
961
+ payload_b: Second payload.
962
+ min_len: Minimum payload length.
846
963
 
847
- # Find all differences
964
+ Returns:
965
+ List of (offset, byte_a, byte_b) tuples (-1 for missing bytes).
966
+ """
848
967
  differences = []
968
+
969
+ # Differences in overlapping region
849
970
  for i in range(min_len):
850
971
  if payload_a[i] != payload_b[i]:
851
972
  differences.append((i, payload_a[i], payload_b[i]))
852
973
 
853
- # Add length differences
974
+ # Length differences
854
975
  if len(payload_a) > len(payload_b):
855
976
  for i in range(len(payload_b), len(payload_a)):
856
977
  differences.append((i, payload_a[i], -1))
@@ -858,24 +979,29 @@ def diff_payloads(payload_a: bytes, payload_b: bytes) -> PayloadDiff:
858
979
  for i in range(len(payload_a), len(payload_b)):
859
980
  differences.append((i, -1, payload_b[i]))
860
981
 
861
- # Calculate similarity
982
+ return differences
983
+
984
+
985
+ def _calculate_similarity(
986
+ payload_a: bytes, payload_b: bytes, min_len: int, differences: list[tuple[int, int, int]]
987
+ ) -> float:
988
+ """Calculate payload similarity ratio.
989
+
990
+ Args:
991
+ payload_a: First payload.
992
+ payload_b: Second payload.
993
+ min_len: Minimum payload length.
994
+ differences: List of differences.
995
+
996
+ Returns:
997
+ Similarity ratio (0.0-1.0).
998
+ """
862
999
  max_len = max(len(payload_a), len(payload_b))
863
1000
  if max_len == 0:
864
- similarity = 1.0
865
- else:
866
- matching = min_len - len([d for d in differences if d[0] < min_len])
867
- similarity = matching / max_len
1001
+ return 1.0
868
1002
 
869
- # Calculate edit distance (simplified Levenshtein)
870
- edit_distance = _levenshtein_distance(payload_a, payload_b)
871
-
872
- return PayloadDiff(
873
- common_prefix_length=common_prefix,
874
- common_suffix_length=common_suffix,
875
- differences=differences,
876
- similarity=similarity,
877
- edit_distance=edit_distance,
878
- )
1003
+ matching = min_len - len([d for d in differences if d[0] < min_len])
1004
+ return matching / max_len
879
1005
 
880
1006
 
881
1007
  def find_common_bytes(payloads: Sequence[bytes]) -> bytes:
@@ -1008,7 +1134,7 @@ def compute_similarity(
1008
1134
  def cluster_payloads(
1009
1135
  payloads: Sequence[bytes],
1010
1136
  threshold: float = 0.8,
1011
- algorithm: Literal["greedy", "dbscan"] = "greedy",
1137
+ algorithm: Literal["greedy", "dbscan", "lsh"] = "greedy",
1012
1138
  ) -> list[PayloadCluster]:
1013
1139
  """Cluster similar payloads together.
1014
1140
 
@@ -1017,7 +1143,7 @@ def cluster_payloads(
1017
1143
  Args:
1018
1144
  payloads: List of payloads to cluster.
1019
1145
  threshold: Similarity threshold for clustering.
1020
- algorithm: Clustering algorithm.
1146
+ algorithm: Clustering algorithm (greedy: O(n²), lsh: O(n log n)).
1021
1147
 
1022
1148
  Returns:
1023
1149
  List of PayloadCluster objects.
@@ -1026,11 +1152,19 @@ def cluster_payloads(
1026
1152
  >>> clusters = cluster_payloads(payloads, threshold=0.85)
1027
1153
  >>> for c in clusters:
1028
1154
  ... print(f"Cluster {c.cluster_id}: {c.size} payloads")
1155
+
1156
+ >>> # For large datasets (>1000 payloads), use LSH for 100-1000x speedup
1157
+ >>> clusters = cluster_payloads(payloads, threshold=0.85, algorithm="lsh")
1029
1158
  """
1030
1159
  if not payloads:
1031
1160
  return []
1032
1161
 
1033
- if algorithm == "greedy":
1162
+ if algorithm == "lsh":
1163
+ # Use LSH for O(n log n) performance on large datasets
1164
+ from oscura.utils.performance.lsh_clustering import cluster_payloads_lsh
1165
+
1166
+ return cluster_payloads_lsh(payloads, threshold=threshold)
1167
+ elif algorithm == "greedy":
1034
1168
  return _cluster_greedy_optimized(payloads, threshold)
1035
1169
  # algorithm == "dbscan"
1036
1170
  return _cluster_dbscan(payloads, threshold)
@@ -1103,78 +1237,82 @@ def _levenshtein_distance(a: bytes, b: bytes) -> int:
1103
1237
  return previous_row[-1]
1104
1238
 
1105
1239
 
1106
- def _fast_similarity(payload_a: bytes, payload_b: bytes, threshold: float) -> float | None:
1107
- """Fast similarity check with early termination.
1108
-
1109
- Uses length-based filtering and sampling to quickly reject dissimilar payloads.
1110
- Returns None if payloads are likely similar (needs full check),
1111
- or a similarity value if they can be quickly determined.
1240
+ def _check_length_similarity(len_a: int, len_b: int, threshold: float) -> float | None:
1241
+ """Check if length difference allows similarity threshold.
1112
1242
 
1113
1243
  Args:
1114
- payload_a: First payload.
1115
- payload_b: Second payload.
1116
- threshold: Similarity threshold for clustering.
1244
+ len_a: Length of first payload.
1245
+ len_b: Length of second payload.
1246
+ threshold: Similarity threshold.
1117
1247
 
1118
1248
  Returns:
1119
- Similarity value if quickly determined, None if full check needed.
1249
+ Similarity if can be determined from length, None otherwise.
1120
1250
  """
1121
- len_a = len(payload_a)
1122
- len_b = len(payload_b)
1123
-
1124
1251
  # Empty payloads
1125
1252
  if len_a == 0 and len_b == 0:
1126
1253
  return 1.0
1127
1254
  if len_a == 0 or len_b == 0:
1128
1255
  return 0.0
1129
1256
 
1130
- # Length difference filter: if lengths differ by more than (1-threshold)*max_len,
1131
- # similarity can't exceed threshold
1257
+ # Maximum possible similarity given length difference
1132
1258
  max_len = max(len_a, len_b)
1133
1259
  min_len = min(len_a, len_b)
1134
- _length_diff = max_len - min_len
1135
-
1136
- # Maximum possible similarity given length difference
1137
1260
  max_possible_similarity = min_len / max_len
1261
+
1138
1262
  if max_possible_similarity < threshold:
1139
1263
  return max_possible_similarity
1140
1264
 
1141
- # For same-length payloads, use fast hamming similarity
1142
- if len_a == len_b:
1143
- # Sample comparison for large payloads
1144
- if len_a > 50:
1145
- # Sample first 16, last 16, and some middle bytes
1146
- sample_size = min(48, len_a)
1147
- mismatches = 0
1148
-
1149
- # First 16 bytes
1150
- for i in range(min(16, len_a)):
1151
- if payload_a[i] != payload_b[i]:
1152
- mismatches += 1
1153
-
1154
- # Last 16 bytes
1155
- for i in range(1, min(17, len_a + 1)):
1156
- if payload_a[-i] != payload_b[-i]:
1157
- mismatches += 1
1158
-
1159
- # Middle samples (len_a > 32 always true here since len_a > 50)
1160
- step = (len_a - 32) // 16
1161
- if step > 0:
1162
- for i in range(16, len_a - 16, step):
1163
- if payload_a[i] != payload_b[i]:
1164
- mismatches += 1
1165
-
1166
- # Estimate similarity from sample
1167
- estimated_similarity = 1.0 - (mismatches / sample_size)
1265
+ return None
1168
1266
 
1169
- # If sample shows very low similarity, reject early
1170
- if estimated_similarity < threshold * 0.8:
1171
- return estimated_similarity
1172
1267
 
1173
- # Full hamming comparison for same-length payloads (faster than Levenshtein)
1174
- matches = sum(a == b for a, b in zip(payload_a, payload_b, strict=True))
1175
- return matches / len_a
1268
+ def _sample_hamming_similarity(payload_a: bytes, payload_b: bytes, length: int) -> float:
1269
+ """Compute similarity by sampling first 16, last 16, and middle bytes.
1176
1270
 
1177
- # For different-length payloads, use common prefix/suffix heuristic
1271
+ Args:
1272
+ payload_a: First payload.
1273
+ payload_b: Second payload.
1274
+ length: Length of payloads (must be equal).
1275
+
1276
+ Returns:
1277
+ Estimated similarity based on samples.
1278
+ """
1279
+ sample_size = min(48, length)
1280
+ mismatches = 0
1281
+
1282
+ # First 16 bytes
1283
+ for i in range(min(16, length)):
1284
+ if payload_a[i] != payload_b[i]:
1285
+ mismatches += 1
1286
+
1287
+ # Last 16 bytes
1288
+ for i in range(1, min(17, length + 1)):
1289
+ if payload_a[-i] != payload_b[-i]:
1290
+ mismatches += 1
1291
+
1292
+ # Middle samples (only if length > 32)
1293
+ step = (length - 32) // 16
1294
+ if step > 0:
1295
+ for i in range(16, length - 16, step):
1296
+ if payload_a[i] != payload_b[i]:
1297
+ mismatches += 1
1298
+
1299
+ return 1.0 - (mismatches / sample_size)
1300
+
1301
+
1302
+ def _prefix_suffix_similarity(
1303
+ payload_a: bytes, payload_b: bytes, min_len: int, max_len: int
1304
+ ) -> float:
1305
+ """Estimate similarity from common prefix and suffix.
1306
+
1307
+ Args:
1308
+ payload_a: First payload.
1309
+ payload_b: Second payload.
1310
+ min_len: Minimum length.
1311
+ max_len: Maximum length.
1312
+
1313
+ Returns:
1314
+ Estimated similarity.
1315
+ """
1178
1316
  common_prefix = 0
1179
1317
  for i in range(min_len):
1180
1318
  if payload_a[i] == payload_b[i]:
@@ -1189,9 +1327,51 @@ def _fast_similarity(payload_a: bytes, payload_b: bytes, threshold: float) -> fl
1189
1327
  else:
1190
1328
  break
1191
1329
 
1192
- # Estimate similarity from prefix/suffix
1193
1330
  common_bytes = common_prefix + common_suffix
1194
- estimated_similarity = common_bytes / max_len
1331
+ return common_bytes / max_len
1332
+
1333
+
1334
+ def _fast_similarity(payload_a: bytes, payload_b: bytes, threshold: float) -> float | None:
1335
+ """Fast similarity check with early termination.
1336
+
1337
+ Uses length-based filtering and sampling to quickly reject dissimilar payloads.
1338
+ Returns None if payloads are likely similar (needs full check),
1339
+ or a similarity value if they can be quickly determined.
1340
+
1341
+ Args:
1342
+ payload_a: First payload.
1343
+ payload_b: Second payload.
1344
+ threshold: Similarity threshold for clustering.
1345
+
1346
+ Returns:
1347
+ Similarity value if quickly determined, None if full check needed.
1348
+ """
1349
+ len_a = len(payload_a)
1350
+ len_b = len(payload_b)
1351
+
1352
+ # Check length-based similarity
1353
+ length_result = _check_length_similarity(len_a, len_b, threshold)
1354
+ if length_result is not None:
1355
+ return length_result
1356
+
1357
+ # For same-length payloads, use fast hamming similarity
1358
+ if len_a == len_b:
1359
+ # Sample comparison for large payloads
1360
+ if len_a > 50:
1361
+ estimated_similarity = _sample_hamming_similarity(payload_a, payload_b, len_a)
1362
+
1363
+ # If sample shows very low similarity, reject early
1364
+ if estimated_similarity < threshold * 0.8:
1365
+ return estimated_similarity
1366
+
1367
+ # Full hamming comparison for same-length payloads (faster than Levenshtein)
1368
+ matches = sum(a == b for a, b in zip(payload_a, payload_b, strict=True))
1369
+ return matches / len_a
1370
+
1371
+ # For different-length payloads, use common prefix/suffix heuristic
1372
+ max_len = max(len_a, len_b)
1373
+ min_len = min(len_a, len_b)
1374
+ estimated_similarity = _prefix_suffix_similarity(payload_a, payload_b, min_len, max_len)
1195
1375
 
1196
1376
  # If common bytes suggest low similarity, reject
1197
1377
  if estimated_similarity < threshold * 0.7: