oscura 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (497) hide show
  1. oscura/__init__.py +169 -167
  2. oscura/analyzers/__init__.py +3 -0
  3. oscura/analyzers/classification.py +659 -0
  4. oscura/analyzers/digital/edges.py +325 -65
  5. oscura/analyzers/digital/quality.py +293 -166
  6. oscura/analyzers/digital/timing.py +260 -115
  7. oscura/analyzers/digital/timing_numba.py +334 -0
  8. oscura/analyzers/entropy.py +605 -0
  9. oscura/analyzers/eye/diagram.py +176 -109
  10. oscura/analyzers/eye/metrics.py +5 -5
  11. oscura/analyzers/jitter/__init__.py +6 -4
  12. oscura/analyzers/jitter/ber.py +52 -52
  13. oscura/analyzers/jitter/classification.py +156 -0
  14. oscura/analyzers/jitter/decomposition.py +163 -113
  15. oscura/analyzers/jitter/spectrum.py +80 -64
  16. oscura/analyzers/ml/__init__.py +39 -0
  17. oscura/analyzers/ml/features.py +600 -0
  18. oscura/analyzers/ml/signal_classifier.py +604 -0
  19. oscura/analyzers/packet/daq.py +246 -158
  20. oscura/analyzers/packet/parser.py +12 -1
  21. oscura/analyzers/packet/payload.py +50 -2110
  22. oscura/analyzers/packet/payload_analysis.py +361 -181
  23. oscura/analyzers/packet/payload_patterns.py +133 -70
  24. oscura/analyzers/packet/stream.py +84 -23
  25. oscura/analyzers/patterns/__init__.py +26 -5
  26. oscura/analyzers/patterns/anomaly_detection.py +908 -0
  27. oscura/analyzers/patterns/clustering.py +169 -108
  28. oscura/analyzers/patterns/clustering_optimized.py +227 -0
  29. oscura/analyzers/patterns/discovery.py +1 -1
  30. oscura/analyzers/patterns/matching.py +581 -197
  31. oscura/analyzers/patterns/pattern_mining.py +778 -0
  32. oscura/analyzers/patterns/periodic.py +121 -38
  33. oscura/analyzers/patterns/sequences.py +175 -78
  34. oscura/analyzers/power/conduction.py +1 -1
  35. oscura/analyzers/power/soa.py +6 -6
  36. oscura/analyzers/power/switching.py +250 -110
  37. oscura/analyzers/protocol/__init__.py +17 -1
  38. oscura/analyzers/protocols/base.py +6 -6
  39. oscura/analyzers/protocols/ble/__init__.py +38 -0
  40. oscura/analyzers/protocols/ble/analyzer.py +809 -0
  41. oscura/analyzers/protocols/ble/uuids.py +288 -0
  42. oscura/analyzers/protocols/can.py +257 -127
  43. oscura/analyzers/protocols/can_fd.py +107 -80
  44. oscura/analyzers/protocols/flexray.py +139 -80
  45. oscura/analyzers/protocols/hdlc.py +93 -58
  46. oscura/analyzers/protocols/i2c.py +247 -106
  47. oscura/analyzers/protocols/i2s.py +138 -86
  48. oscura/analyzers/protocols/industrial/__init__.py +40 -0
  49. oscura/analyzers/protocols/industrial/bacnet/__init__.py +33 -0
  50. oscura/analyzers/protocols/industrial/bacnet/analyzer.py +708 -0
  51. oscura/analyzers/protocols/industrial/bacnet/encoding.py +412 -0
  52. oscura/analyzers/protocols/industrial/bacnet/services.py +622 -0
  53. oscura/analyzers/protocols/industrial/ethercat/__init__.py +30 -0
  54. oscura/analyzers/protocols/industrial/ethercat/analyzer.py +474 -0
  55. oscura/analyzers/protocols/industrial/ethercat/mailbox.py +339 -0
  56. oscura/analyzers/protocols/industrial/ethercat/topology.py +166 -0
  57. oscura/analyzers/protocols/industrial/modbus/__init__.py +31 -0
  58. oscura/analyzers/protocols/industrial/modbus/analyzer.py +525 -0
  59. oscura/analyzers/protocols/industrial/modbus/crc.py +79 -0
  60. oscura/analyzers/protocols/industrial/modbus/functions.py +436 -0
  61. oscura/analyzers/protocols/industrial/opcua/__init__.py +21 -0
  62. oscura/analyzers/protocols/industrial/opcua/analyzer.py +552 -0
  63. oscura/analyzers/protocols/industrial/opcua/datatypes.py +446 -0
  64. oscura/analyzers/protocols/industrial/opcua/services.py +264 -0
  65. oscura/analyzers/protocols/industrial/profinet/__init__.py +23 -0
  66. oscura/analyzers/protocols/industrial/profinet/analyzer.py +441 -0
  67. oscura/analyzers/protocols/industrial/profinet/dcp.py +263 -0
  68. oscura/analyzers/protocols/industrial/profinet/ptcp.py +200 -0
  69. oscura/analyzers/protocols/jtag.py +180 -98
  70. oscura/analyzers/protocols/lin.py +219 -114
  71. oscura/analyzers/protocols/manchester.py +4 -4
  72. oscura/analyzers/protocols/onewire.py +253 -149
  73. oscura/analyzers/protocols/parallel_bus/__init__.py +20 -0
  74. oscura/analyzers/protocols/parallel_bus/centronics.py +92 -0
  75. oscura/analyzers/protocols/parallel_bus/gpib.py +137 -0
  76. oscura/analyzers/protocols/spi.py +192 -95
  77. oscura/analyzers/protocols/swd.py +321 -167
  78. oscura/analyzers/protocols/uart.py +267 -125
  79. oscura/analyzers/protocols/usb.py +235 -131
  80. oscura/analyzers/side_channel/power.py +17 -12
  81. oscura/analyzers/signal/__init__.py +15 -0
  82. oscura/analyzers/signal/timing_analysis.py +1086 -0
  83. oscura/analyzers/signal_integrity/__init__.py +4 -1
  84. oscura/analyzers/signal_integrity/sparams.py +2 -19
  85. oscura/analyzers/spectral/chunked.py +129 -60
  86. oscura/analyzers/spectral/chunked_fft.py +300 -94
  87. oscura/analyzers/spectral/chunked_wavelet.py +100 -80
  88. oscura/analyzers/statistical/checksum.py +376 -217
  89. oscura/analyzers/statistical/classification.py +229 -107
  90. oscura/analyzers/statistical/entropy.py +78 -53
  91. oscura/analyzers/statistics/correlation.py +407 -211
  92. oscura/analyzers/statistics/outliers.py +2 -2
  93. oscura/analyzers/statistics/streaming.py +30 -5
  94. oscura/analyzers/validation.py +216 -101
  95. oscura/analyzers/waveform/measurements.py +9 -0
  96. oscura/analyzers/waveform/measurements_with_uncertainty.py +31 -15
  97. oscura/analyzers/waveform/spectral.py +500 -228
  98. oscura/api/__init__.py +31 -5
  99. oscura/api/dsl/__init__.py +582 -0
  100. oscura/{dsl → api/dsl}/commands.py +43 -76
  101. oscura/{dsl → api/dsl}/interpreter.py +26 -51
  102. oscura/{dsl → api/dsl}/parser.py +107 -77
  103. oscura/{dsl → api/dsl}/repl.py +2 -2
  104. oscura/api/dsl.py +1 -1
  105. oscura/{integrations → api/integrations}/__init__.py +1 -1
  106. oscura/{integrations → api/integrations}/llm.py +201 -102
  107. oscura/api/operators.py +3 -3
  108. oscura/api/optimization.py +144 -30
  109. oscura/api/rest_server.py +921 -0
  110. oscura/api/server/__init__.py +17 -0
  111. oscura/api/server/dashboard.py +850 -0
  112. oscura/api/server/static/README.md +34 -0
  113. oscura/api/server/templates/base.html +181 -0
  114. oscura/api/server/templates/export.html +120 -0
  115. oscura/api/server/templates/home.html +284 -0
  116. oscura/api/server/templates/protocols.html +58 -0
  117. oscura/api/server/templates/reports.html +43 -0
  118. oscura/api/server/templates/session_detail.html +89 -0
  119. oscura/api/server/templates/sessions.html +83 -0
  120. oscura/api/server/templates/waveforms.html +73 -0
  121. oscura/automotive/__init__.py +8 -1
  122. oscura/automotive/can/__init__.py +10 -0
  123. oscura/automotive/can/checksum.py +3 -1
  124. oscura/automotive/can/dbc_generator.py +590 -0
  125. oscura/automotive/can/message_wrapper.py +121 -74
  126. oscura/automotive/can/patterns.py +98 -21
  127. oscura/automotive/can/session.py +292 -56
  128. oscura/automotive/can/state_machine.py +6 -3
  129. oscura/automotive/can/stimulus_response.py +97 -75
  130. oscura/automotive/dbc/__init__.py +10 -2
  131. oscura/automotive/dbc/generator.py +84 -56
  132. oscura/automotive/dbc/parser.py +6 -6
  133. oscura/automotive/dtc/data.json +17 -102
  134. oscura/automotive/dtc/database.py +2 -2
  135. oscura/automotive/flexray/__init__.py +31 -0
  136. oscura/automotive/flexray/analyzer.py +504 -0
  137. oscura/automotive/flexray/crc.py +185 -0
  138. oscura/automotive/flexray/fibex.py +449 -0
  139. oscura/automotive/j1939/__init__.py +45 -8
  140. oscura/automotive/j1939/analyzer.py +605 -0
  141. oscura/automotive/j1939/spns.py +326 -0
  142. oscura/automotive/j1939/transport.py +306 -0
  143. oscura/automotive/lin/__init__.py +47 -0
  144. oscura/automotive/lin/analyzer.py +612 -0
  145. oscura/automotive/loaders/blf.py +13 -2
  146. oscura/automotive/loaders/csv_can.py +143 -72
  147. oscura/automotive/loaders/dispatcher.py +50 -2
  148. oscura/automotive/loaders/mdf.py +86 -45
  149. oscura/automotive/loaders/pcap.py +111 -61
  150. oscura/automotive/uds/__init__.py +4 -0
  151. oscura/automotive/uds/analyzer.py +725 -0
  152. oscura/automotive/uds/decoder.py +140 -58
  153. oscura/automotive/uds/models.py +7 -1
  154. oscura/automotive/visualization.py +1 -1
  155. oscura/cli/analyze.py +348 -0
  156. oscura/cli/batch.py +142 -122
  157. oscura/cli/benchmark.py +275 -0
  158. oscura/cli/characterize.py +137 -82
  159. oscura/cli/compare.py +224 -131
  160. oscura/cli/completion.py +250 -0
  161. oscura/cli/config_cmd.py +361 -0
  162. oscura/cli/decode.py +164 -87
  163. oscura/cli/export.py +286 -0
  164. oscura/cli/main.py +115 -31
  165. oscura/{onboarding → cli/onboarding}/__init__.py +3 -3
  166. oscura/{onboarding → cli/onboarding}/help.py +80 -58
  167. oscura/{onboarding → cli/onboarding}/tutorials.py +97 -72
  168. oscura/{onboarding → cli/onboarding}/wizard.py +55 -36
  169. oscura/cli/progress.py +147 -0
  170. oscura/cli/shell.py +157 -135
  171. oscura/cli/validate_cmd.py +204 -0
  172. oscura/cli/visualize.py +158 -0
  173. oscura/convenience.py +125 -79
  174. oscura/core/__init__.py +4 -2
  175. oscura/core/backend_selector.py +3 -3
  176. oscura/core/cache.py +126 -15
  177. oscura/core/cancellation.py +1 -1
  178. oscura/{config → core/config}/__init__.py +20 -11
  179. oscura/{config → core/config}/defaults.py +1 -1
  180. oscura/{config → core/config}/loader.py +7 -5
  181. oscura/{config → core/config}/memory.py +5 -5
  182. oscura/{config → core/config}/migration.py +1 -1
  183. oscura/{config → core/config}/pipeline.py +99 -23
  184. oscura/{config → core/config}/preferences.py +1 -1
  185. oscura/{config → core/config}/protocol.py +3 -3
  186. oscura/{config → core/config}/schema.py +426 -272
  187. oscura/{config → core/config}/settings.py +1 -1
  188. oscura/{config → core/config}/thresholds.py +195 -153
  189. oscura/core/correlation.py +5 -6
  190. oscura/core/cross_domain.py +0 -2
  191. oscura/core/debug.py +9 -5
  192. oscura/{extensibility → core/extensibility}/docs.py +158 -70
  193. oscura/{extensibility → core/extensibility}/extensions.py +160 -76
  194. oscura/{extensibility → core/extensibility}/logging.py +1 -1
  195. oscura/{extensibility → core/extensibility}/measurements.py +1 -1
  196. oscura/{extensibility → core/extensibility}/plugins.py +1 -1
  197. oscura/{extensibility → core/extensibility}/templates.py +73 -3
  198. oscura/{extensibility → core/extensibility}/validation.py +1 -1
  199. oscura/core/gpu_backend.py +11 -7
  200. oscura/core/log_query.py +101 -11
  201. oscura/core/logging.py +126 -54
  202. oscura/core/logging_advanced.py +5 -5
  203. oscura/core/memory_limits.py +108 -70
  204. oscura/core/memory_monitor.py +2 -2
  205. oscura/core/memory_progress.py +7 -7
  206. oscura/core/memory_warnings.py +1 -1
  207. oscura/core/numba_backend.py +13 -13
  208. oscura/{plugins → core/plugins}/__init__.py +9 -9
  209. oscura/{plugins → core/plugins}/base.py +7 -7
  210. oscura/{plugins → core/plugins}/cli.py +3 -3
  211. oscura/{plugins → core/plugins}/discovery.py +186 -106
  212. oscura/{plugins → core/plugins}/lifecycle.py +1 -1
  213. oscura/{plugins → core/plugins}/manager.py +7 -7
  214. oscura/{plugins → core/plugins}/registry.py +3 -3
  215. oscura/{plugins → core/plugins}/versioning.py +1 -1
  216. oscura/core/progress.py +16 -1
  217. oscura/core/provenance.py +8 -2
  218. oscura/{schemas → core/schemas}/__init__.py +2 -2
  219. oscura/{schemas → core/schemas}/device_mapping.json +2 -8
  220. oscura/{schemas → core/schemas}/packet_format.json +4 -24
  221. oscura/{schemas → core/schemas}/protocol_definition.json +2 -12
  222. oscura/core/types.py +4 -0
  223. oscura/core/uncertainty.py +3 -3
  224. oscura/correlation/__init__.py +52 -0
  225. oscura/correlation/multi_protocol.py +811 -0
  226. oscura/discovery/auto_decoder.py +117 -35
  227. oscura/discovery/comparison.py +191 -86
  228. oscura/discovery/quality_validator.py +155 -68
  229. oscura/discovery/signal_detector.py +196 -79
  230. oscura/export/__init__.py +18 -8
  231. oscura/export/kaitai_struct.py +513 -0
  232. oscura/export/scapy_layer.py +801 -0
  233. oscura/export/wireshark/generator.py +1 -1
  234. oscura/export/wireshark/templates/dissector.lua.j2 +2 -2
  235. oscura/export/wireshark_dissector.py +746 -0
  236. oscura/guidance/wizard.py +207 -111
  237. oscura/hardware/__init__.py +19 -0
  238. oscura/{acquisition → hardware/acquisition}/__init__.py +4 -4
  239. oscura/{acquisition → hardware/acquisition}/file.py +2 -2
  240. oscura/{acquisition → hardware/acquisition}/hardware.py +7 -7
  241. oscura/{acquisition → hardware/acquisition}/saleae.py +15 -12
  242. oscura/{acquisition → hardware/acquisition}/socketcan.py +1 -1
  243. oscura/{acquisition → hardware/acquisition}/streaming.py +2 -2
  244. oscura/{acquisition → hardware/acquisition}/synthetic.py +3 -3
  245. oscura/{acquisition → hardware/acquisition}/visa.py +33 -11
  246. oscura/hardware/firmware/__init__.py +29 -0
  247. oscura/hardware/firmware/pattern_recognition.py +874 -0
  248. oscura/hardware/hal_detector.py +736 -0
  249. oscura/hardware/security/__init__.py +37 -0
  250. oscura/hardware/security/side_channel_detector.py +1126 -0
  251. oscura/inference/__init__.py +4 -0
  252. oscura/inference/active_learning/observation_table.py +4 -1
  253. oscura/inference/alignment.py +216 -123
  254. oscura/inference/bayesian.py +113 -33
  255. oscura/inference/crc_reverse.py +101 -55
  256. oscura/inference/logic.py +6 -2
  257. oscura/inference/message_format.py +342 -183
  258. oscura/inference/protocol.py +95 -44
  259. oscura/inference/protocol_dsl.py +180 -82
  260. oscura/inference/signal_intelligence.py +1439 -706
  261. oscura/inference/spectral.py +99 -57
  262. oscura/inference/state_machine.py +810 -158
  263. oscura/inference/stream.py +270 -110
  264. oscura/iot/__init__.py +34 -0
  265. oscura/iot/coap/__init__.py +32 -0
  266. oscura/iot/coap/analyzer.py +668 -0
  267. oscura/iot/coap/options.py +212 -0
  268. oscura/iot/lorawan/__init__.py +21 -0
  269. oscura/iot/lorawan/crypto.py +206 -0
  270. oscura/iot/lorawan/decoder.py +801 -0
  271. oscura/iot/lorawan/mac_commands.py +341 -0
  272. oscura/iot/mqtt/__init__.py +27 -0
  273. oscura/iot/mqtt/analyzer.py +999 -0
  274. oscura/iot/mqtt/properties.py +315 -0
  275. oscura/iot/zigbee/__init__.py +31 -0
  276. oscura/iot/zigbee/analyzer.py +615 -0
  277. oscura/iot/zigbee/security.py +153 -0
  278. oscura/iot/zigbee/zcl.py +349 -0
  279. oscura/jupyter/display.py +125 -45
  280. oscura/{exploratory → jupyter/exploratory}/__init__.py +8 -8
  281. oscura/{exploratory → jupyter/exploratory}/error_recovery.py +298 -141
  282. oscura/jupyter/exploratory/fuzzy.py +746 -0
  283. oscura/{exploratory → jupyter/exploratory}/fuzzy_advanced.py +258 -100
  284. oscura/{exploratory → jupyter/exploratory}/legacy.py +464 -242
  285. oscura/{exploratory → jupyter/exploratory}/parse.py +167 -145
  286. oscura/{exploratory → jupyter/exploratory}/recovery.py +119 -87
  287. oscura/jupyter/exploratory/sync.py +612 -0
  288. oscura/{exploratory → jupyter/exploratory}/unknown.py +299 -176
  289. oscura/jupyter/magic.py +4 -4
  290. oscura/{ui → jupyter/ui}/__init__.py +2 -2
  291. oscura/{ui → jupyter/ui}/formatters.py +3 -3
  292. oscura/{ui → jupyter/ui}/progressive_display.py +153 -82
  293. oscura/loaders/__init__.py +183 -67
  294. oscura/loaders/binary.py +88 -1
  295. oscura/loaders/chipwhisperer.py +153 -137
  296. oscura/loaders/configurable.py +208 -86
  297. oscura/loaders/csv_loader.py +458 -215
  298. oscura/loaders/hdf5_loader.py +278 -119
  299. oscura/loaders/lazy.py +87 -54
  300. oscura/loaders/mmap_loader.py +1 -1
  301. oscura/loaders/numpy_loader.py +253 -116
  302. oscura/loaders/pcap.py +226 -151
  303. oscura/loaders/rigol.py +110 -49
  304. oscura/loaders/sigrok.py +201 -78
  305. oscura/loaders/tdms.py +81 -58
  306. oscura/loaders/tektronix.py +291 -174
  307. oscura/loaders/touchstone.py +182 -87
  308. oscura/loaders/tss.py +456 -0
  309. oscura/loaders/vcd.py +215 -117
  310. oscura/loaders/wav.py +155 -68
  311. oscura/reporting/__init__.py +9 -0
  312. oscura/reporting/analyze.py +352 -146
  313. oscura/reporting/argument_preparer.py +69 -14
  314. oscura/reporting/auto_report.py +97 -61
  315. oscura/reporting/batch.py +131 -58
  316. oscura/reporting/chart_selection.py +57 -45
  317. oscura/reporting/comparison.py +63 -17
  318. oscura/reporting/content/executive.py +76 -24
  319. oscura/reporting/core_formats/multi_format.py +11 -8
  320. oscura/reporting/engine.py +312 -158
  321. oscura/reporting/enhanced_reports.py +949 -0
  322. oscura/reporting/export.py +86 -43
  323. oscura/reporting/formatting/numbers.py +69 -42
  324. oscura/reporting/html.py +139 -58
  325. oscura/reporting/index.py +137 -65
  326. oscura/reporting/output.py +158 -67
  327. oscura/reporting/pdf.py +67 -102
  328. oscura/reporting/plots.py +191 -112
  329. oscura/reporting/sections.py +88 -47
  330. oscura/reporting/standards.py +104 -61
  331. oscura/reporting/summary_generator.py +75 -55
  332. oscura/reporting/tables.py +138 -54
  333. oscura/reporting/templates/enhanced/protocol_re.html +525 -0
  334. oscura/sessions/__init__.py +14 -23
  335. oscura/sessions/base.py +3 -3
  336. oscura/sessions/blackbox.py +106 -10
  337. oscura/sessions/generic.py +2 -2
  338. oscura/sessions/legacy.py +783 -0
  339. oscura/side_channel/__init__.py +63 -0
  340. oscura/side_channel/dpa.py +1025 -0
  341. oscura/utils/__init__.py +15 -1
  342. oscura/utils/bitwise.py +118 -0
  343. oscura/{builders → utils/builders}/__init__.py +1 -1
  344. oscura/{comparison → utils/comparison}/__init__.py +6 -6
  345. oscura/{comparison → utils/comparison}/compare.py +202 -101
  346. oscura/{comparison → utils/comparison}/golden.py +83 -63
  347. oscura/{comparison → utils/comparison}/limits.py +313 -89
  348. oscura/{comparison → utils/comparison}/mask.py +151 -45
  349. oscura/{comparison → utils/comparison}/trace_diff.py +1 -1
  350. oscura/{comparison → utils/comparison}/visualization.py +147 -89
  351. oscura/{component → utils/component}/__init__.py +3 -3
  352. oscura/{component → utils/component}/impedance.py +122 -58
  353. oscura/{component → utils/component}/reactive.py +165 -168
  354. oscura/{component → utils/component}/transmission_line.py +3 -3
  355. oscura/{filtering → utils/filtering}/__init__.py +6 -6
  356. oscura/{filtering → utils/filtering}/base.py +1 -1
  357. oscura/{filtering → utils/filtering}/convenience.py +2 -2
  358. oscura/{filtering → utils/filtering}/design.py +169 -93
  359. oscura/{filtering → utils/filtering}/filters.py +2 -2
  360. oscura/{filtering → utils/filtering}/introspection.py +2 -2
  361. oscura/utils/geometry.py +31 -0
  362. oscura/utils/imports.py +184 -0
  363. oscura/utils/lazy.py +1 -1
  364. oscura/{math → utils/math}/__init__.py +2 -2
  365. oscura/{math → utils/math}/arithmetic.py +114 -48
  366. oscura/{math → utils/math}/interpolation.py +139 -106
  367. oscura/utils/memory.py +129 -66
  368. oscura/utils/memory_advanced.py +92 -9
  369. oscura/utils/memory_extensions.py +10 -8
  370. oscura/{optimization → utils/optimization}/__init__.py +1 -1
  371. oscura/{optimization → utils/optimization}/search.py +2 -2
  372. oscura/utils/performance/__init__.py +58 -0
  373. oscura/utils/performance/caching.py +889 -0
  374. oscura/utils/performance/lsh_clustering.py +333 -0
  375. oscura/utils/performance/memory_optimizer.py +699 -0
  376. oscura/utils/performance/optimizations.py +675 -0
  377. oscura/utils/performance/parallel.py +654 -0
  378. oscura/utils/performance/profiling.py +661 -0
  379. oscura/{pipeline → utils/pipeline}/base.py +1 -1
  380. oscura/{pipeline → utils/pipeline}/composition.py +1 -1
  381. oscura/{pipeline → utils/pipeline}/parallel.py +3 -2
  382. oscura/{pipeline → utils/pipeline}/pipeline.py +1 -1
  383. oscura/{pipeline → utils/pipeline}/reverse_engineering.py +412 -221
  384. oscura/{search → utils/search}/__init__.py +3 -3
  385. oscura/{search → utils/search}/anomaly.py +188 -58
  386. oscura/utils/search/context.py +294 -0
  387. oscura/{search → utils/search}/pattern.py +138 -10
  388. oscura/utils/serial.py +51 -0
  389. oscura/utils/storage/__init__.py +61 -0
  390. oscura/utils/storage/database.py +1166 -0
  391. oscura/{streaming → utils/streaming}/chunked.py +302 -143
  392. oscura/{streaming → utils/streaming}/progressive.py +1 -1
  393. oscura/{streaming → utils/streaming}/realtime.py +3 -2
  394. oscura/{triggering → utils/triggering}/__init__.py +6 -6
  395. oscura/{triggering → utils/triggering}/base.py +6 -6
  396. oscura/{triggering → utils/triggering}/edge.py +2 -2
  397. oscura/{triggering → utils/triggering}/pattern.py +2 -2
  398. oscura/{triggering → utils/triggering}/pulse.py +115 -74
  399. oscura/{triggering → utils/triggering}/window.py +2 -2
  400. oscura/utils/validation.py +32 -0
  401. oscura/validation/__init__.py +121 -0
  402. oscura/{compliance → validation/compliance}/__init__.py +5 -5
  403. oscura/{compliance → validation/compliance}/advanced.py +5 -5
  404. oscura/{compliance → validation/compliance}/masks.py +1 -1
  405. oscura/{compliance → validation/compliance}/reporting.py +127 -53
  406. oscura/{compliance → validation/compliance}/testing.py +114 -52
  407. oscura/validation/compliance_tests.py +915 -0
  408. oscura/validation/fuzzer.py +990 -0
  409. oscura/validation/grammar_tests.py +596 -0
  410. oscura/validation/grammar_validator.py +904 -0
  411. oscura/validation/hil_testing.py +977 -0
  412. oscura/{quality → validation/quality}/__init__.py +4 -4
  413. oscura/{quality → validation/quality}/ensemble.py +251 -171
  414. oscura/{quality → validation/quality}/explainer.py +3 -3
  415. oscura/{quality → validation/quality}/scoring.py +1 -1
  416. oscura/{quality → validation/quality}/warnings.py +4 -4
  417. oscura/validation/regression_suite.py +808 -0
  418. oscura/validation/replay.py +788 -0
  419. oscura/{testing → validation/testing}/__init__.py +2 -2
  420. oscura/{testing → validation/testing}/synthetic.py +5 -5
  421. oscura/visualization/__init__.py +9 -0
  422. oscura/visualization/accessibility.py +1 -1
  423. oscura/visualization/annotations.py +64 -67
  424. oscura/visualization/colors.py +7 -7
  425. oscura/visualization/digital.py +180 -81
  426. oscura/visualization/eye.py +236 -85
  427. oscura/visualization/interactive.py +320 -143
  428. oscura/visualization/jitter.py +587 -247
  429. oscura/visualization/layout.py +169 -134
  430. oscura/visualization/optimization.py +103 -52
  431. oscura/visualization/palettes.py +1 -1
  432. oscura/visualization/power.py +427 -211
  433. oscura/visualization/power_extended.py +626 -297
  434. oscura/visualization/presets.py +2 -0
  435. oscura/visualization/protocols.py +495 -181
  436. oscura/visualization/render.py +79 -63
  437. oscura/visualization/reverse_engineering.py +171 -124
  438. oscura/visualization/signal_integrity.py +460 -279
  439. oscura/visualization/specialized.py +190 -100
  440. oscura/visualization/spectral.py +670 -255
  441. oscura/visualization/thumbnails.py +166 -137
  442. oscura/visualization/waveform.py +150 -63
  443. oscura/workflows/__init__.py +3 -0
  444. oscura/{batch → workflows/batch}/__init__.py +5 -5
  445. oscura/{batch → workflows/batch}/advanced.py +150 -75
  446. oscura/workflows/batch/aggregate.py +531 -0
  447. oscura/workflows/batch/analyze.py +236 -0
  448. oscura/{batch → workflows/batch}/logging.py +2 -2
  449. oscura/{batch → workflows/batch}/metrics.py +1 -1
  450. oscura/workflows/complete_re.py +1144 -0
  451. oscura/workflows/compliance.py +44 -54
  452. oscura/workflows/digital.py +197 -51
  453. oscura/workflows/legacy/__init__.py +12 -0
  454. oscura/{workflow → workflows/legacy}/dag.py +4 -1
  455. oscura/workflows/multi_trace.py +9 -9
  456. oscura/workflows/power.py +42 -62
  457. oscura/workflows/protocol.py +82 -49
  458. oscura/workflows/reverse_engineering.py +351 -150
  459. oscura/workflows/signal_integrity.py +157 -82
  460. oscura-0.7.0.dist-info/METADATA +661 -0
  461. oscura-0.7.0.dist-info/RECORD +591 -0
  462. oscura/batch/aggregate.py +0 -300
  463. oscura/batch/analyze.py +0 -139
  464. oscura/dsl/__init__.py +0 -73
  465. oscura/exceptions.py +0 -59
  466. oscura/exploratory/fuzzy.py +0 -513
  467. oscura/exploratory/sync.py +0 -384
  468. oscura/exporters/__init__.py +0 -94
  469. oscura/exporters/csv.py +0 -303
  470. oscura/exporters/exporters.py +0 -44
  471. oscura/exporters/hdf5.py +0 -217
  472. oscura/exporters/html_export.py +0 -701
  473. oscura/exporters/json_export.py +0 -291
  474. oscura/exporters/markdown_export.py +0 -367
  475. oscura/exporters/matlab_export.py +0 -354
  476. oscura/exporters/npz_export.py +0 -219
  477. oscura/exporters/spice_export.py +0 -210
  478. oscura/search/context.py +0 -149
  479. oscura/session/__init__.py +0 -34
  480. oscura/session/annotations.py +0 -289
  481. oscura/session/history.py +0 -313
  482. oscura/session/session.py +0 -520
  483. oscura/workflow/__init__.py +0 -13
  484. oscura-0.5.1.dist-info/METADATA +0 -583
  485. oscura-0.5.1.dist-info/RECORD +0 -481
  486. /oscura/core/{config.py → config/legacy.py} +0 -0
  487. /oscura/{extensibility → core/extensibility}/__init__.py +0 -0
  488. /oscura/{extensibility → core/extensibility}/registry.py +0 -0
  489. /oscura/{plugins → core/plugins}/isolation.py +0 -0
  490. /oscura/{schemas → core/schemas}/bus_configuration.json +0 -0
  491. /oscura/{builders → utils/builders}/signal_builder.py +0 -0
  492. /oscura/{optimization → utils/optimization}/parallel.py +0 -0
  493. /oscura/{pipeline → utils/pipeline}/__init__.py +0 -0
  494. /oscura/{streaming → utils/streaming}/__init__.py +0 -0
  495. {oscura-0.5.1.dist-info → oscura-0.7.0.dist-info}/WHEEL +0 -0
  496. {oscura-0.5.1.dist-info → oscura-0.7.0.dist-info}/entry_points.txt +0 -0
  497. {oscura-0.5.1.dist-info → oscura-0.7.0.dist-info}/licenses/LICENSE +0 -0
@@ -118,13 +118,66 @@ def classify_data_type(data: DataType) -> ClassificationResult:
118
118
  >>> result.primary_type
119
119
  'text'
120
120
  """
121
+ data = _normalize_data(data)
122
+ stats = _compute_statistics(data)
123
+
124
+ # Classification logic in priority order
125
+ result = (
126
+ _check_padding(stats)
127
+ or _check_binary_signatures(data, stats)
128
+ or _check_compression_signatures(data, stats)
129
+ or _check_text(stats)
130
+ or _check_encrypted(stats)
131
+ or _check_compressed(stats)
132
+ or _default_binary(stats)
133
+ )
134
+
135
+ return result
136
+
137
+
138
+ def _normalize_data(data: DataType) -> bytes:
139
+ """Normalize input data to bytes.
140
+
141
+ Args:
142
+ data: Input data as bytes, bytearray, or numpy array.
143
+
144
+ Returns:
145
+ Normalized bytes object.
146
+
147
+ Raises:
148
+ ValueError: If data is empty.
149
+ """
150
+ data_bytes: bytes
121
151
  if isinstance(data, np.ndarray):
122
- data = data.tobytes() if data.dtype == np.uint8 else bytes(data.flatten())
152
+ data_bytes = data.tobytes() if data.dtype == np.uint8 else bytes(data.flatten())
153
+ else:
154
+ data_bytes = bytes(data)
123
155
 
124
- if not data:
156
+ if not data_bytes:
125
157
  raise ValueError("Cannot classify empty data")
126
158
 
127
- # Calculate statistics
159
+ return data_bytes
160
+
161
+
162
+ @dataclass
163
+ class _Statistics:
164
+ """Internal statistics for classification."""
165
+
166
+ entropy: float
167
+ printable_ratio: float
168
+ null_ratio: float
169
+ byte_variance: float
170
+
171
+
172
+ def _compute_statistics(data: bytes) -> _Statistics:
173
+ """Compute statistics for classification.
174
+
175
+ Args:
176
+ data: Input data as bytes.
177
+
178
+ Returns:
179
+ Statistics object with computed metrics.
180
+ """
128
181
  entropy_val = shannon_entropy(data)
129
182
 
130
183
  # Printable ASCII: 0x20-0x7E plus tab, newline, carriage return
@@ -139,103 +192,118 @@ def classify_data_type(data: DataType) -> ClassificationResult:
139
192
  byte_array = np.frombuffer(data, dtype=np.uint8)
140
193
  byte_variance = float(np.var(byte_array))
141
194
 
142
- details = {}
195
+ return _Statistics(
196
+ entropy=entropy_val,
197
+ printable_ratio=printable_ratio,
198
+ null_ratio=null_ratio,
199
+ byte_variance=byte_variance,
200
+ )
201
+
143
202
 
144
- # Classification logic
145
- # 1. Padding/null regions
146
- if null_ratio > 0.9:
203
+ def _check_padding(stats: _Statistics) -> ClassificationResult | None:
204
+ """Check if data is padding/null region."""
205
+ if stats.null_ratio > 0.9:
147
206
  return ClassificationResult(
148
207
  primary_type="padding",
149
- confidence=min(1.0, null_ratio),
150
- entropy=entropy_val,
151
- printable_ratio=printable_ratio,
152
- null_ratio=null_ratio,
153
- byte_variance=byte_variance,
208
+ confidence=min(1.0, stats.null_ratio),
209
+ entropy=stats.entropy,
210
+ printable_ratio=stats.printable_ratio,
211
+ null_ratio=stats.null_ratio,
212
+ byte_variance=stats.byte_variance,
154
213
  details={"reason": "high_null_ratio"},
155
214
  )
215
+ return None
156
216
 
157
- # 2. Check for executable/binary signatures (BEFORE compression and encrypted)
217
+
218
+ def _check_binary_signatures(data: bytes, stats: _Statistics) -> ClassificationResult | None:
219
+ """Check for executable/binary signatures."""
158
220
  for sig, bin_type in BINARY_SIGNATURES.items():
159
221
  if data[: len(sig)] == sig:
160
- details["binary_type"] = bin_type
161
222
  return ClassificationResult(
162
223
  primary_type="binary",
163
224
  confidence=0.95,
164
- entropy=entropy_val,
165
- printable_ratio=printable_ratio,
166
- null_ratio=null_ratio,
167
- byte_variance=byte_variance,
168
- details=details,
225
+ entropy=stats.entropy,
226
+ printable_ratio=stats.printable_ratio,
227
+ null_ratio=stats.null_ratio,
228
+ byte_variance=stats.byte_variance,
229
+ details={"binary_type": bin_type},
169
230
  )
231
+ return None
232
+
170
233
 
171
- # 3. Check for compression signatures
234
+ def _check_compression_signatures(data: bytes, stats: _Statistics) -> ClassificationResult | None:
235
+ """Check for compression signatures."""
172
236
  for sig, comp_type in COMPRESSION_SIGNATURES.items():
173
237
  if data[: len(sig)] == sig:
174
- details["compression_type"] = comp_type
175
238
  return ClassificationResult(
176
239
  primary_type="compressed",
177
240
  confidence=0.95,
178
- entropy=entropy_val,
179
- printable_ratio=printable_ratio,
180
- null_ratio=null_ratio,
181
- byte_variance=byte_variance,
182
- details=details,
241
+ entropy=stats.entropy,
242
+ printable_ratio=stats.printable_ratio,
243
+ null_ratio=stats.null_ratio,
244
+ byte_variance=stats.byte_variance,
245
+ details={"compression_type": comp_type},
183
246
  )
247
+ return None
184
248
 
185
- # 4. Text data (high printable ratio) - check BEFORE entropy-based classification
186
- if printable_ratio > 0.75 and entropy_val < 6.5:
187
- confidence = min(1.0, printable_ratio * 0.95)
188
- details["reason"] = "high_printable_ratio"
249
+
250
+ def _check_text(stats: _Statistics) -> ClassificationResult | None:
251
+ """Check for text data (high printable ratio)."""
252
+ if stats.printable_ratio > 0.75 and stats.entropy < 6.5:
253
+ confidence = min(1.0, stats.printable_ratio * 0.95)
189
254
  return ClassificationResult(
190
255
  primary_type="text",
191
256
  confidence=confidence,
192
- entropy=entropy_val,
193
- printable_ratio=printable_ratio,
194
- null_ratio=null_ratio,
195
- byte_variance=byte_variance,
196
- details=details,
257
+ entropy=stats.entropy,
258
+ printable_ratio=stats.printable_ratio,
259
+ null_ratio=stats.null_ratio,
260
+ byte_variance=stats.byte_variance,
261
+ details={"reason": "high_printable_ratio"},
197
262
  )
263
+ return None
264
+
198
265
 
199
- # 5. Encrypted/random data (high entropy, no structure)
200
- if entropy_val > 7.5 and byte_variance > 5000:
201
- # High entropy with high variance suggests random/encrypted
202
- confidence = min(1.0, (entropy_val - 7.5) / 0.5 + 0.7)
203
- details["reason"] = "high_entropy_and_variance"
266
+ def _check_encrypted(stats: _Statistics) -> ClassificationResult | None:
267
+ """Check for encrypted/random data (high entropy, no structure)."""
268
+ if stats.entropy > 7.5 and stats.byte_variance > 5000:
269
+ confidence = min(1.0, (stats.entropy - 7.5) / 0.5 + 0.7)
204
270
  return ClassificationResult(
205
271
  primary_type="encrypted",
206
272
  confidence=confidence,
207
- entropy=entropy_val,
208
- printable_ratio=printable_ratio,
209
- null_ratio=null_ratio,
210
- byte_variance=byte_variance,
211
- details=details,
273
+ entropy=stats.entropy,
274
+ printable_ratio=stats.printable_ratio,
275
+ null_ratio=stats.null_ratio,
276
+ byte_variance=stats.byte_variance,
277
+ details={"reason": "high_entropy_and_variance"},
212
278
  )
279
+ return None
280
+
213
281
 
214
- # 6. Compressed data (high entropy, some structure)
215
- if 6.5 <= entropy_val <= 7.5:
216
- confidence = 0.7
217
- details["reason"] = "compression_entropy_range"
282
+ def _check_compressed(stats: _Statistics) -> ClassificationResult | None:
283
+ """Check for compressed data (high entropy, some structure)."""
284
+ if 6.5 <= stats.entropy <= 7.5:
218
285
  return ClassificationResult(
219
286
  primary_type="compressed",
220
- confidence=confidence,
221
- entropy=entropy_val,
222
- printable_ratio=printable_ratio,
223
- null_ratio=null_ratio,
224
- byte_variance=byte_variance,
225
- details=details,
287
+ confidence=0.7,
288
+ entropy=stats.entropy,
289
+ printable_ratio=stats.printable_ratio,
290
+ null_ratio=stats.null_ratio,
291
+ byte_variance=stats.byte_variance,
292
+ details={"reason": "compression_entropy_range"},
226
293
  )
294
+ return None
295
+
227
296
 
228
- # 7. Default to binary/structured
229
- confidence = 0.6
230
- details["reason"] = "default_binary"
297
+ def _default_binary(stats: _Statistics) -> ClassificationResult:
298
+ """Default to binary/structured classification."""
231
299
  return ClassificationResult(
232
300
  primary_type="binary",
233
- confidence=confidence,
234
- entropy=entropy_val,
235
- printable_ratio=printable_ratio,
236
- null_ratio=null_ratio,
237
- byte_variance=byte_variance,
238
- details=details,
301
+ confidence=0.6,
302
+ entropy=stats.entropy,
303
+ printable_ratio=stats.printable_ratio,
304
+ null_ratio=stats.null_ratio,
305
+ byte_variance=stats.byte_variance,
306
+ details={"reason": "default_binary"},
239
307
  )
240
308
 
241
309
 
@@ -262,65 +330,100 @@ def detect_text_regions(
262
330
  >>> len(regions) > 0
263
331
  True
264
332
  """
333
+ # Convert numpy arrays to bytes
334
+ data_bytes: bytes
265
335
  if isinstance(data, np.ndarray):
266
- data = data.tobytes() if data.dtype == np.uint8 else bytes(data.flatten())
336
+ data_bytes = data.tobytes() if data.dtype == np.uint8 else bytes(data.flatten())
337
+ else:
338
+ data_bytes = bytes(data)
267
339
 
268
- regions = []
340
+ regions: list[RegionClassification] = []
269
341
  in_region = False
270
342
  region_start = 0
271
- _printable_in_window = 0
272
343
  window_size = min_length
273
344
 
274
- for i, byte in enumerate(data):
275
- _is_printable = 32 <= byte <= 126 or byte in (9, 10, 13)
276
-
345
+ for i in range(len(data_bytes)):
277
346
  if not in_region:
278
347
  # Look for start of text region
279
- if i >= window_size - 1:
280
- # Check window
281
- window = data[i - window_size + 1 : i + 1]
282
- printable_count = sum(1 for b in window if 32 <= b <= 126 or b in (9, 10, 13))
283
- if printable_count / window_size >= min_printable:
284
- in_region = True
285
- region_start = i - window_size + 1
348
+ start_pos = _check_region_start(data_bytes, i, window_size, min_printable)
349
+ if start_pos is not None:
350
+ in_region = True
351
+ region_start = start_pos
286
352
  else:
287
- # In text region, look for end
288
- # Use a sliding window to detect when printable ratio drops
289
- if i >= region_start + window_size:
290
- window = data[i - window_size + 1 : i + 1]
291
- printable_count = sum(1 for b in window if 32 <= b <= 126 or b in (9, 10, 13))
292
- if printable_count / window_size < min_printable:
293
- # End of region
294
- region_data = data[region_start : i - window_size + 1]
295
- if len(region_data) >= min_length:
296
- classification = classify_data_type(region_data)
297
- regions.append(
298
- RegionClassification(
299
- start=region_start,
300
- end=i - window_size + 1,
301
- length=len(region_data),
302
- classification=classification,
303
- )
304
- )
305
- in_region = False
353
+ # Look for end of text region
354
+ if _check_region_end(data_bytes, i, region_start, window_size, min_printable):
355
+ _append_region(data_bytes, regions, region_start, i - window_size + 1, min_length)
356
+ in_region = False
306
357
 
307
358
  # Handle region extending to end
308
359
  if in_region:
309
- region_data = data[region_start:]
310
- if len(region_data) >= min_length:
311
- classification = classify_data_type(region_data)
312
- regions.append(
313
- RegionClassification(
314
- start=region_start,
315
- end=len(data),
316
- length=len(region_data),
317
- classification=classification,
318
- )
319
- )
360
+ _append_region(data_bytes, regions, region_start, len(data_bytes), min_length)
320
361
 
321
362
  return regions
322
363
 
323
364
 
365
+ def _is_printable_byte(byte: int) -> bool:
366
+ """Check if byte is printable ASCII."""
367
+ return 32 <= byte <= 126 or byte in (9, 10, 13)
368
+
369
+
370
+ def _check_region_start(
371
+ data: bytes,
372
+ i: int,
373
+ window_size: int,
374
+ min_printable: float,
375
+ ) -> int | None:
376
+ """Check if position marks start of text region."""
377
+ if i < window_size - 1:
378
+ return None
379
+
380
+ window = data[i - window_size + 1 : i + 1]
381
+ printable_count = sum(1 for b in window if _is_printable_byte(b))
382
+
383
+ if printable_count / window_size >= min_printable:
384
+ return i - window_size + 1
385
+
386
+ return None
387
+
388
+
389
+ def _check_region_end(
390
+ data: bytes,
391
+ i: int,
392
+ region_start: int,
393
+ window_size: int,
394
+ min_printable: float,
395
+ ) -> bool:
396
+ """Check if position marks end of text region."""
397
+ if i < region_start + window_size:
398
+ return False
399
+
400
+ window = data[i - window_size + 1 : i + 1]
401
+ printable_count = sum(1 for b in window if _is_printable_byte(b))
402
+
403
+ return printable_count / window_size < min_printable
404
+
405
+
406
+ def _append_region(
407
+ data: bytes,
408
+ regions: list[RegionClassification],
409
+ start: int,
410
+ end: int,
411
+ min_length: int,
412
+ ) -> None:
413
+ """Append region to list if it meets minimum length."""
414
+ region_data = data[start:end]
415
+ if len(region_data) >= min_length:
416
+ classification = classify_data_type(region_data)
417
+ regions.append(
418
+ RegionClassification(
419
+ start=start,
420
+ end=end,
421
+ length=len(region_data),
422
+ classification=classification,
423
+ )
424
+ )
425
+
426
+
324
427
  def detect_encrypted_regions(
325
428
  data: DataType, min_length: int = 64, min_entropy: float = 7.5
326
429
  ) -> list[RegionClassification]:
@@ -356,6 +459,13 @@ def detect_encrypted_regions(
356
459
  window_size = min_length
357
460
  step = window_size // 4
358
461
 
462
+ # Validate step to prevent infinite loop
463
+ if step <= 0:
464
+ raise ValueError(
465
+ f"Invalid step size {step} (window_size={window_size}). "
466
+ "window_size must be at least 4 to produce positive step."
467
+ )
468
+
359
469
  i = 0
360
470
  while i < len(data) - window_size:
361
471
  window = data[i : i + window_size]
@@ -410,6 +520,9 @@ def detect_compressed_regions(data: DataType, min_length: int = 64) -> list[Regi
410
520
  Returns:
411
521
  List of detected compressed regions
412
522
 
523
+ Raises:
524
+ ValueError: If detected region exceeds MAX_REGION_SIZE (100MB).
525
+
413
526
  Example:
414
527
  >>> import gzip
415
528
  >>> compressed = gzip.compress(b'Hello World' * 100)
@@ -417,6 +530,8 @@ def detect_compressed_regions(data: DataType, min_length: int = 64) -> list[Regi
417
530
  >>> len(regions) > 0
418
531
  True
419
532
  """
533
+ MAX_REGION_SIZE = 100 * 1024 * 1024 # 100MB limit
534
+
420
535
  if isinstance(data, np.ndarray):
421
536
  data = data.tobytes() if data.dtype == np.uint8 else bytes(data.flatten())
422
537
 
@@ -438,6 +553,13 @@ def detect_compressed_regions(data: DataType, min_length: int = 64) -> list[Regi
438
553
  # Extend based on high entropy
439
554
  window_size = 256
440
555
  while region_end < len(data):
556
+ # Safety check: prevent unbounded region growth
557
+ if region_end - region_start >= MAX_REGION_SIZE:
558
+ raise ValueError(
559
+ f"Compressed region size exceeded {MAX_REGION_SIZE // (1024 * 1024)}MB limit. "
560
+ f"This may indicate malformed data or incorrect compression signature detection."
561
+ )
562
+
441
563
  window = data[region_end : region_end + window_size]
442
564
  if len(window) < window_size:
443
565
  break
@@ -396,25 +396,34 @@ def _detect_transitions_boundary_scan(
396
396
  Returns:
397
397
  List of detected transitions
398
398
  """
399
- data_len = len(data)
399
+ region_size = _determine_boundary_scan_region_size(len(data), window)
400
+ if region_size < 4:
401
+ return []
400
402
 
401
- # Region size for comparison - use window or adaptive size
402
- region_size = min(window, data_len // 3)
403
- if region_size < 8:
404
- region_size = max(8, data_len // 4)
403
+ scan_start, scan_end = _compute_boundary_scan_range(len(data), region_size)
404
+ if scan_start >= scan_end:
405
+ return []
405
406
 
406
- if region_size < 4:
407
+ best_transition = _find_best_boundary_transition(
408
+ data, region_size, scan_start, scan_end, threshold, min_gap
409
+ )
410
+
411
+ if best_transition is None:
407
412
  return []
408
413
 
409
- transitions = []
410
- last_offset = -min_gap - 1
414
+ return _accumulate_all_boundary_transitions(data, best_transition, window, threshold, min_gap)
415
+
416
+
417
+ def _determine_boundary_scan_region_size(data_len: int, window: int) -> int:
418
+ """Determine region size for boundary scanning."""
419
+ region_size = min(window, data_len // 3)
420
+ if region_size < 8:
421
+ region_size = max(8, data_len // 4)
422
+ return region_size
411
423
 
412
- # Track best transition found
413
- best_transition = None
414
- best_delta = 0.0
415
424
 
416
- # Scan potential boundary points
417
- # We need at least region_size bytes on each side
425
+ def _compute_boundary_scan_range(data_len: int, region_size: int) -> tuple[int, int]:
426
+ """Compute scan range for boundary detection."""
418
427
  scan_start = region_size
419
428
  scan_end = data_len - region_size
420
429
 
@@ -424,16 +433,25 @@ def _detect_transitions_boundary_scan(
424
433
  scan_start = region_size
425
434
  scan_end = data_len - region_size
426
435
 
427
- if scan_start >= scan_end:
428
- return []
436
+ return scan_start, scan_end
429
437
 
430
- # Use a step size to avoid scanning every byte
438
+
439
+ def _find_best_boundary_transition(
440
+ data: bytes,
441
+ region_size: int,
442
+ scan_start: int,
443
+ scan_end: int,
444
+ threshold: float,
445
+ min_gap: int,
446
+ ) -> EntropyTransition | None:
447
+ """Find the strongest boundary transition in scan range."""
448
+ best_transition = None
449
+ best_delta = 0.0
450
+ last_offset = -min_gap - 1
431
451
  scan_step = max(1, region_size // 4)
432
452
 
433
453
  for offset in range(scan_start, scan_end + 1, scan_step):
434
- # Compute entropy of region BEFORE this point
435
454
  region_before = data[offset - region_size : offset]
436
- # Compute entropy of region AFTER this point
437
455
  region_after = data[offset : offset + region_size]
438
456
 
439
457
  if len(region_before) < 4 or len(region_after) < 4:
@@ -448,43 +466,50 @@ def _detect_transitions_boundary_scan(
448
466
  delta = entropy_after - entropy_before
449
467
 
450
468
  # Track the strongest transition that exceeds threshold
451
- if abs(delta) >= threshold:
452
- # Check min_gap constraint
453
- if offset - last_offset >= min_gap:
454
- if abs(delta) > abs(best_delta):
455
- best_delta = delta
456
- best_transition = EntropyTransition(
457
- offset=offset,
458
- entropy_before=entropy_before,
459
- entropy_after=entropy_after,
460
- delta=delta,
461
- transition_type="low_to_high" if delta > 0 else "high_to_low",
462
- )
469
+ if abs(delta) >= threshold and offset - last_offset >= min_gap:
470
+ if abs(delta) > abs(best_delta):
471
+ best_delta = delta
472
+ best_transition = EntropyTransition(
473
+ offset=offset,
474
+ entropy_before=entropy_before,
475
+ entropy_after=entropy_after,
476
+ delta=delta,
477
+ transition_type="low_to_high" if delta > 0 else "high_to_low",
478
+ )
479
+
480
+ return best_transition
481
+
482
+
483
+ def _accumulate_all_boundary_transitions(
484
+ data: bytes,
485
+ best_transition: EntropyTransition,
486
+ window: int,
487
+ threshold: float,
488
+ min_gap: int,
489
+ ) -> list[EntropyTransition]:
490
+ """Accumulate all boundary transitions including recursive finds."""
491
+ transitions = [best_transition]
492
+ last_offset = best_transition.offset
493
+
494
+ # Continue scanning for more transitions after this one
495
+ remaining_transitions = _detect_transitions_boundary_scan(
496
+ data[best_transition.offset :],
497
+ window,
498
+ threshold,
499
+ min_gap,
500
+ )
463
501
 
464
- if best_transition is not None:
465
- transitions.append(best_transition)
466
- last_offset = best_transition.offset
467
-
468
- # Continue scanning for more transitions after this one
469
- # (for data with multiple transitions)
470
- remaining_transitions = _detect_transitions_boundary_scan(
471
- data[best_transition.offset :],
472
- window,
473
- threshold,
474
- min_gap,
502
+ for t in remaining_transitions:
503
+ adjusted_t = EntropyTransition(
504
+ offset=t.offset + best_transition.offset,
505
+ entropy_before=t.entropy_before,
506
+ entropy_after=t.entropy_after,
507
+ delta=t.delta,
508
+ transition_type=t.transition_type,
475
509
  )
476
- for t in remaining_transitions:
477
- # Adjust offset to be relative to original data
478
- adjusted_t = EntropyTransition(
479
- offset=t.offset + best_transition.offset,
480
- entropy_before=t.entropy_before,
481
- entropy_after=t.entropy_after,
482
- delta=t.delta,
483
- transition_type=t.transition_type,
484
- )
485
- if adjusted_t.offset - last_offset >= min_gap:
486
- transitions.append(adjusted_t)
487
- last_offset = adjusted_t.offset
510
+ if adjusted_t.offset - last_offset >= min_gap:
511
+ transitions.append(adjusted_t)
512
+ last_offset = adjusted_t.offset
488
513
 
489
514
  return transitions
490
515