oscura 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (465) hide show
  1. oscura/__init__.py +813 -8
  2. oscura/__main__.py +392 -0
  3. oscura/analyzers/__init__.py +37 -0
  4. oscura/analyzers/digital/__init__.py +177 -0
  5. oscura/analyzers/digital/bus.py +691 -0
  6. oscura/analyzers/digital/clock.py +805 -0
  7. oscura/analyzers/digital/correlation.py +720 -0
  8. oscura/analyzers/digital/edges.py +632 -0
  9. oscura/analyzers/digital/extraction.py +413 -0
  10. oscura/analyzers/digital/quality.py +878 -0
  11. oscura/analyzers/digital/signal_quality.py +877 -0
  12. oscura/analyzers/digital/thresholds.py +708 -0
  13. oscura/analyzers/digital/timing.py +1104 -0
  14. oscura/analyzers/eye/__init__.py +46 -0
  15. oscura/analyzers/eye/diagram.py +434 -0
  16. oscura/analyzers/eye/metrics.py +555 -0
  17. oscura/analyzers/jitter/__init__.py +83 -0
  18. oscura/analyzers/jitter/ber.py +333 -0
  19. oscura/analyzers/jitter/decomposition.py +759 -0
  20. oscura/analyzers/jitter/measurements.py +413 -0
  21. oscura/analyzers/jitter/spectrum.py +220 -0
  22. oscura/analyzers/measurements.py +40 -0
  23. oscura/analyzers/packet/__init__.py +171 -0
  24. oscura/analyzers/packet/daq.py +1077 -0
  25. oscura/analyzers/packet/metrics.py +437 -0
  26. oscura/analyzers/packet/parser.py +327 -0
  27. oscura/analyzers/packet/payload.py +2156 -0
  28. oscura/analyzers/packet/payload_analysis.py +1312 -0
  29. oscura/analyzers/packet/payload_extraction.py +236 -0
  30. oscura/analyzers/packet/payload_patterns.py +670 -0
  31. oscura/analyzers/packet/stream.py +359 -0
  32. oscura/analyzers/patterns/__init__.py +266 -0
  33. oscura/analyzers/patterns/clustering.py +1036 -0
  34. oscura/analyzers/patterns/discovery.py +539 -0
  35. oscura/analyzers/patterns/learning.py +797 -0
  36. oscura/analyzers/patterns/matching.py +1091 -0
  37. oscura/analyzers/patterns/periodic.py +650 -0
  38. oscura/analyzers/patterns/sequences.py +767 -0
  39. oscura/analyzers/power/__init__.py +116 -0
  40. oscura/analyzers/power/ac_power.py +391 -0
  41. oscura/analyzers/power/basic.py +383 -0
  42. oscura/analyzers/power/conduction.py +314 -0
  43. oscura/analyzers/power/efficiency.py +297 -0
  44. oscura/analyzers/power/ripple.py +356 -0
  45. oscura/analyzers/power/soa.py +372 -0
  46. oscura/analyzers/power/switching.py +479 -0
  47. oscura/analyzers/protocol/__init__.py +150 -0
  48. oscura/analyzers/protocols/__init__.py +150 -0
  49. oscura/analyzers/protocols/base.py +500 -0
  50. oscura/analyzers/protocols/can.py +620 -0
  51. oscura/analyzers/protocols/can_fd.py +448 -0
  52. oscura/analyzers/protocols/flexray.py +405 -0
  53. oscura/analyzers/protocols/hdlc.py +399 -0
  54. oscura/analyzers/protocols/i2c.py +368 -0
  55. oscura/analyzers/protocols/i2s.py +296 -0
  56. oscura/analyzers/protocols/jtag.py +393 -0
  57. oscura/analyzers/protocols/lin.py +445 -0
  58. oscura/analyzers/protocols/manchester.py +333 -0
  59. oscura/analyzers/protocols/onewire.py +501 -0
  60. oscura/analyzers/protocols/spi.py +334 -0
  61. oscura/analyzers/protocols/swd.py +325 -0
  62. oscura/analyzers/protocols/uart.py +393 -0
  63. oscura/analyzers/protocols/usb.py +495 -0
  64. oscura/analyzers/signal_integrity/__init__.py +63 -0
  65. oscura/analyzers/signal_integrity/embedding.py +294 -0
  66. oscura/analyzers/signal_integrity/equalization.py +370 -0
  67. oscura/analyzers/signal_integrity/sparams.py +484 -0
  68. oscura/analyzers/spectral/__init__.py +53 -0
  69. oscura/analyzers/spectral/chunked.py +273 -0
  70. oscura/analyzers/spectral/chunked_fft.py +571 -0
  71. oscura/analyzers/spectral/chunked_wavelet.py +391 -0
  72. oscura/analyzers/spectral/fft.py +92 -0
  73. oscura/analyzers/statistical/__init__.py +250 -0
  74. oscura/analyzers/statistical/checksum.py +923 -0
  75. oscura/analyzers/statistical/chunked_corr.py +228 -0
  76. oscura/analyzers/statistical/classification.py +778 -0
  77. oscura/analyzers/statistical/entropy.py +1113 -0
  78. oscura/analyzers/statistical/ngrams.py +614 -0
  79. oscura/analyzers/statistics/__init__.py +119 -0
  80. oscura/analyzers/statistics/advanced.py +885 -0
  81. oscura/analyzers/statistics/basic.py +263 -0
  82. oscura/analyzers/statistics/correlation.py +630 -0
  83. oscura/analyzers/statistics/distribution.py +298 -0
  84. oscura/analyzers/statistics/outliers.py +463 -0
  85. oscura/analyzers/statistics/streaming.py +93 -0
  86. oscura/analyzers/statistics/trend.py +520 -0
  87. oscura/analyzers/validation.py +598 -0
  88. oscura/analyzers/waveform/__init__.py +36 -0
  89. oscura/analyzers/waveform/measurements.py +943 -0
  90. oscura/analyzers/waveform/measurements_with_uncertainty.py +371 -0
  91. oscura/analyzers/waveform/spectral.py +1689 -0
  92. oscura/analyzers/waveform/wavelets.py +298 -0
  93. oscura/api/__init__.py +62 -0
  94. oscura/api/dsl.py +538 -0
  95. oscura/api/fluent.py +571 -0
  96. oscura/api/operators.py +498 -0
  97. oscura/api/optimization.py +392 -0
  98. oscura/api/profiling.py +396 -0
  99. oscura/automotive/__init__.py +73 -0
  100. oscura/automotive/can/__init__.py +52 -0
  101. oscura/automotive/can/analysis.py +356 -0
  102. oscura/automotive/can/checksum.py +250 -0
  103. oscura/automotive/can/correlation.py +212 -0
  104. oscura/automotive/can/discovery.py +355 -0
  105. oscura/automotive/can/message_wrapper.py +375 -0
  106. oscura/automotive/can/models.py +385 -0
  107. oscura/automotive/can/patterns.py +381 -0
  108. oscura/automotive/can/session.py +452 -0
  109. oscura/automotive/can/state_machine.py +300 -0
  110. oscura/automotive/can/stimulus_response.py +461 -0
  111. oscura/automotive/dbc/__init__.py +15 -0
  112. oscura/automotive/dbc/generator.py +156 -0
  113. oscura/automotive/dbc/parser.py +146 -0
  114. oscura/automotive/dtc/__init__.py +30 -0
  115. oscura/automotive/dtc/database.py +3036 -0
  116. oscura/automotive/j1939/__init__.py +14 -0
  117. oscura/automotive/j1939/decoder.py +745 -0
  118. oscura/automotive/loaders/__init__.py +35 -0
  119. oscura/automotive/loaders/asc.py +98 -0
  120. oscura/automotive/loaders/blf.py +77 -0
  121. oscura/automotive/loaders/csv_can.py +136 -0
  122. oscura/automotive/loaders/dispatcher.py +136 -0
  123. oscura/automotive/loaders/mdf.py +331 -0
  124. oscura/automotive/loaders/pcap.py +132 -0
  125. oscura/automotive/obd/__init__.py +14 -0
  126. oscura/automotive/obd/decoder.py +707 -0
  127. oscura/automotive/uds/__init__.py +48 -0
  128. oscura/automotive/uds/decoder.py +265 -0
  129. oscura/automotive/uds/models.py +64 -0
  130. oscura/automotive/visualization.py +369 -0
  131. oscura/batch/__init__.py +55 -0
  132. oscura/batch/advanced.py +627 -0
  133. oscura/batch/aggregate.py +300 -0
  134. oscura/batch/analyze.py +139 -0
  135. oscura/batch/logging.py +487 -0
  136. oscura/batch/metrics.py +556 -0
  137. oscura/builders/__init__.py +41 -0
  138. oscura/builders/signal_builder.py +1131 -0
  139. oscura/cli/__init__.py +14 -0
  140. oscura/cli/batch.py +339 -0
  141. oscura/cli/characterize.py +273 -0
  142. oscura/cli/compare.py +775 -0
  143. oscura/cli/decode.py +551 -0
  144. oscura/cli/main.py +247 -0
  145. oscura/cli/shell.py +350 -0
  146. oscura/comparison/__init__.py +66 -0
  147. oscura/comparison/compare.py +397 -0
  148. oscura/comparison/golden.py +487 -0
  149. oscura/comparison/limits.py +391 -0
  150. oscura/comparison/mask.py +434 -0
  151. oscura/comparison/trace_diff.py +30 -0
  152. oscura/comparison/visualization.py +481 -0
  153. oscura/compliance/__init__.py +70 -0
  154. oscura/compliance/advanced.py +756 -0
  155. oscura/compliance/masks.py +363 -0
  156. oscura/compliance/reporting.py +483 -0
  157. oscura/compliance/testing.py +298 -0
  158. oscura/component/__init__.py +38 -0
  159. oscura/component/impedance.py +365 -0
  160. oscura/component/reactive.py +598 -0
  161. oscura/component/transmission_line.py +312 -0
  162. oscura/config/__init__.py +191 -0
  163. oscura/config/defaults.py +254 -0
  164. oscura/config/loader.py +348 -0
  165. oscura/config/memory.py +271 -0
  166. oscura/config/migration.py +458 -0
  167. oscura/config/pipeline.py +1077 -0
  168. oscura/config/preferences.py +530 -0
  169. oscura/config/protocol.py +875 -0
  170. oscura/config/schema.py +713 -0
  171. oscura/config/settings.py +420 -0
  172. oscura/config/thresholds.py +599 -0
  173. oscura/convenience.py +457 -0
  174. oscura/core/__init__.py +299 -0
  175. oscura/core/audit.py +457 -0
  176. oscura/core/backend_selector.py +405 -0
  177. oscura/core/cache.py +590 -0
  178. oscura/core/cancellation.py +439 -0
  179. oscura/core/confidence.py +225 -0
  180. oscura/core/config.py +506 -0
  181. oscura/core/correlation.py +216 -0
  182. oscura/core/cross_domain.py +422 -0
  183. oscura/core/debug.py +301 -0
  184. oscura/core/edge_cases.py +541 -0
  185. oscura/core/exceptions.py +535 -0
  186. oscura/core/gpu_backend.py +523 -0
  187. oscura/core/lazy.py +832 -0
  188. oscura/core/log_query.py +540 -0
  189. oscura/core/logging.py +931 -0
  190. oscura/core/logging_advanced.py +952 -0
  191. oscura/core/memoize.py +171 -0
  192. oscura/core/memory_check.py +274 -0
  193. oscura/core/memory_guard.py +290 -0
  194. oscura/core/memory_limits.py +336 -0
  195. oscura/core/memory_monitor.py +453 -0
  196. oscura/core/memory_progress.py +465 -0
  197. oscura/core/memory_warnings.py +315 -0
  198. oscura/core/numba_backend.py +362 -0
  199. oscura/core/performance.py +352 -0
  200. oscura/core/progress.py +524 -0
  201. oscura/core/provenance.py +358 -0
  202. oscura/core/results.py +331 -0
  203. oscura/core/types.py +504 -0
  204. oscura/core/uncertainty.py +383 -0
  205. oscura/discovery/__init__.py +52 -0
  206. oscura/discovery/anomaly_detector.py +672 -0
  207. oscura/discovery/auto_decoder.py +415 -0
  208. oscura/discovery/comparison.py +497 -0
  209. oscura/discovery/quality_validator.py +528 -0
  210. oscura/discovery/signal_detector.py +769 -0
  211. oscura/dsl/__init__.py +73 -0
  212. oscura/dsl/commands.py +246 -0
  213. oscura/dsl/interpreter.py +455 -0
  214. oscura/dsl/parser.py +689 -0
  215. oscura/dsl/repl.py +172 -0
  216. oscura/exceptions.py +59 -0
  217. oscura/exploratory/__init__.py +111 -0
  218. oscura/exploratory/error_recovery.py +642 -0
  219. oscura/exploratory/fuzzy.py +513 -0
  220. oscura/exploratory/fuzzy_advanced.py +786 -0
  221. oscura/exploratory/legacy.py +831 -0
  222. oscura/exploratory/parse.py +358 -0
  223. oscura/exploratory/recovery.py +275 -0
  224. oscura/exploratory/sync.py +382 -0
  225. oscura/exploratory/unknown.py +707 -0
  226. oscura/export/__init__.py +25 -0
  227. oscura/export/wireshark/README.md +265 -0
  228. oscura/export/wireshark/__init__.py +47 -0
  229. oscura/export/wireshark/generator.py +312 -0
  230. oscura/export/wireshark/lua_builder.py +159 -0
  231. oscura/export/wireshark/templates/dissector.lua.j2 +92 -0
  232. oscura/export/wireshark/type_mapping.py +165 -0
  233. oscura/export/wireshark/validator.py +105 -0
  234. oscura/exporters/__init__.py +94 -0
  235. oscura/exporters/csv.py +303 -0
  236. oscura/exporters/exporters.py +44 -0
  237. oscura/exporters/hdf5.py +219 -0
  238. oscura/exporters/html_export.py +701 -0
  239. oscura/exporters/json_export.py +291 -0
  240. oscura/exporters/markdown_export.py +367 -0
  241. oscura/exporters/matlab_export.py +354 -0
  242. oscura/exporters/npz_export.py +219 -0
  243. oscura/exporters/spice_export.py +210 -0
  244. oscura/extensibility/__init__.py +131 -0
  245. oscura/extensibility/docs.py +752 -0
  246. oscura/extensibility/extensions.py +1125 -0
  247. oscura/extensibility/logging.py +259 -0
  248. oscura/extensibility/measurements.py +485 -0
  249. oscura/extensibility/plugins.py +414 -0
  250. oscura/extensibility/registry.py +346 -0
  251. oscura/extensibility/templates.py +913 -0
  252. oscura/extensibility/validation.py +651 -0
  253. oscura/filtering/__init__.py +89 -0
  254. oscura/filtering/base.py +563 -0
  255. oscura/filtering/convenience.py +564 -0
  256. oscura/filtering/design.py +725 -0
  257. oscura/filtering/filters.py +32 -0
  258. oscura/filtering/introspection.py +605 -0
  259. oscura/guidance/__init__.py +24 -0
  260. oscura/guidance/recommender.py +429 -0
  261. oscura/guidance/wizard.py +518 -0
  262. oscura/inference/__init__.py +251 -0
  263. oscura/inference/active_learning/README.md +153 -0
  264. oscura/inference/active_learning/__init__.py +38 -0
  265. oscura/inference/active_learning/lstar.py +257 -0
  266. oscura/inference/active_learning/observation_table.py +230 -0
  267. oscura/inference/active_learning/oracle.py +78 -0
  268. oscura/inference/active_learning/teachers/__init__.py +15 -0
  269. oscura/inference/active_learning/teachers/simulator.py +192 -0
  270. oscura/inference/adaptive_tuning.py +453 -0
  271. oscura/inference/alignment.py +653 -0
  272. oscura/inference/bayesian.py +943 -0
  273. oscura/inference/binary.py +1016 -0
  274. oscura/inference/crc_reverse.py +711 -0
  275. oscura/inference/logic.py +288 -0
  276. oscura/inference/message_format.py +1305 -0
  277. oscura/inference/protocol.py +417 -0
  278. oscura/inference/protocol_dsl.py +1084 -0
  279. oscura/inference/protocol_library.py +1230 -0
  280. oscura/inference/sequences.py +809 -0
  281. oscura/inference/signal_intelligence.py +1509 -0
  282. oscura/inference/spectral.py +215 -0
  283. oscura/inference/state_machine.py +634 -0
  284. oscura/inference/stream.py +918 -0
  285. oscura/integrations/__init__.py +59 -0
  286. oscura/integrations/llm.py +1827 -0
  287. oscura/jupyter/__init__.py +32 -0
  288. oscura/jupyter/display.py +268 -0
  289. oscura/jupyter/magic.py +334 -0
  290. oscura/loaders/__init__.py +526 -0
  291. oscura/loaders/binary.py +69 -0
  292. oscura/loaders/configurable.py +1255 -0
  293. oscura/loaders/csv.py +26 -0
  294. oscura/loaders/csv_loader.py +473 -0
  295. oscura/loaders/hdf5.py +9 -0
  296. oscura/loaders/hdf5_loader.py +510 -0
  297. oscura/loaders/lazy.py +370 -0
  298. oscura/loaders/mmap_loader.py +583 -0
  299. oscura/loaders/numpy_loader.py +436 -0
  300. oscura/loaders/pcap.py +432 -0
  301. oscura/loaders/preprocessing.py +368 -0
  302. oscura/loaders/rigol.py +287 -0
  303. oscura/loaders/sigrok.py +321 -0
  304. oscura/loaders/tdms.py +367 -0
  305. oscura/loaders/tektronix.py +711 -0
  306. oscura/loaders/validation.py +584 -0
  307. oscura/loaders/vcd.py +464 -0
  308. oscura/loaders/wav.py +233 -0
  309. oscura/math/__init__.py +45 -0
  310. oscura/math/arithmetic.py +824 -0
  311. oscura/math/interpolation.py +413 -0
  312. oscura/onboarding/__init__.py +39 -0
  313. oscura/onboarding/help.py +498 -0
  314. oscura/onboarding/tutorials.py +405 -0
  315. oscura/onboarding/wizard.py +466 -0
  316. oscura/optimization/__init__.py +19 -0
  317. oscura/optimization/parallel.py +440 -0
  318. oscura/optimization/search.py +532 -0
  319. oscura/pipeline/__init__.py +43 -0
  320. oscura/pipeline/base.py +338 -0
  321. oscura/pipeline/composition.py +242 -0
  322. oscura/pipeline/parallel.py +448 -0
  323. oscura/pipeline/pipeline.py +375 -0
  324. oscura/pipeline/reverse_engineering.py +1119 -0
  325. oscura/plugins/__init__.py +122 -0
  326. oscura/plugins/base.py +272 -0
  327. oscura/plugins/cli.py +497 -0
  328. oscura/plugins/discovery.py +411 -0
  329. oscura/plugins/isolation.py +418 -0
  330. oscura/plugins/lifecycle.py +959 -0
  331. oscura/plugins/manager.py +493 -0
  332. oscura/plugins/registry.py +421 -0
  333. oscura/plugins/versioning.py +372 -0
  334. oscura/py.typed +0 -0
  335. oscura/quality/__init__.py +65 -0
  336. oscura/quality/ensemble.py +740 -0
  337. oscura/quality/explainer.py +338 -0
  338. oscura/quality/scoring.py +616 -0
  339. oscura/quality/warnings.py +456 -0
  340. oscura/reporting/__init__.py +248 -0
  341. oscura/reporting/advanced.py +1234 -0
  342. oscura/reporting/analyze.py +448 -0
  343. oscura/reporting/argument_preparer.py +596 -0
  344. oscura/reporting/auto_report.py +507 -0
  345. oscura/reporting/batch.py +615 -0
  346. oscura/reporting/chart_selection.py +223 -0
  347. oscura/reporting/comparison.py +330 -0
  348. oscura/reporting/config.py +615 -0
  349. oscura/reporting/content/__init__.py +39 -0
  350. oscura/reporting/content/executive.py +127 -0
  351. oscura/reporting/content/filtering.py +191 -0
  352. oscura/reporting/content/minimal.py +257 -0
  353. oscura/reporting/content/verbosity.py +162 -0
  354. oscura/reporting/core.py +508 -0
  355. oscura/reporting/core_formats/__init__.py +17 -0
  356. oscura/reporting/core_formats/multi_format.py +210 -0
  357. oscura/reporting/engine.py +836 -0
  358. oscura/reporting/export.py +366 -0
  359. oscura/reporting/formatting/__init__.py +129 -0
  360. oscura/reporting/formatting/emphasis.py +81 -0
  361. oscura/reporting/formatting/numbers.py +403 -0
  362. oscura/reporting/formatting/standards.py +55 -0
  363. oscura/reporting/formatting.py +466 -0
  364. oscura/reporting/html.py +578 -0
  365. oscura/reporting/index.py +590 -0
  366. oscura/reporting/multichannel.py +296 -0
  367. oscura/reporting/output.py +379 -0
  368. oscura/reporting/pdf.py +373 -0
  369. oscura/reporting/plots.py +731 -0
  370. oscura/reporting/pptx_export.py +360 -0
  371. oscura/reporting/renderers/__init__.py +11 -0
  372. oscura/reporting/renderers/pdf.py +94 -0
  373. oscura/reporting/sections.py +471 -0
  374. oscura/reporting/standards.py +680 -0
  375. oscura/reporting/summary_generator.py +368 -0
  376. oscura/reporting/tables.py +397 -0
  377. oscura/reporting/template_system.py +724 -0
  378. oscura/reporting/templates/__init__.py +15 -0
  379. oscura/reporting/templates/definition.py +205 -0
  380. oscura/reporting/templates/index.html +649 -0
  381. oscura/reporting/templates/index.md +173 -0
  382. oscura/schemas/__init__.py +158 -0
  383. oscura/schemas/bus_configuration.json +322 -0
  384. oscura/schemas/device_mapping.json +182 -0
  385. oscura/schemas/packet_format.json +418 -0
  386. oscura/schemas/protocol_definition.json +363 -0
  387. oscura/search/__init__.py +16 -0
  388. oscura/search/anomaly.py +292 -0
  389. oscura/search/context.py +149 -0
  390. oscura/search/pattern.py +160 -0
  391. oscura/session/__init__.py +34 -0
  392. oscura/session/annotations.py +289 -0
  393. oscura/session/history.py +313 -0
  394. oscura/session/session.py +445 -0
  395. oscura/streaming/__init__.py +43 -0
  396. oscura/streaming/chunked.py +611 -0
  397. oscura/streaming/progressive.py +393 -0
  398. oscura/streaming/realtime.py +622 -0
  399. oscura/testing/__init__.py +54 -0
  400. oscura/testing/synthetic.py +808 -0
  401. oscura/triggering/__init__.py +68 -0
  402. oscura/triggering/base.py +229 -0
  403. oscura/triggering/edge.py +353 -0
  404. oscura/triggering/pattern.py +344 -0
  405. oscura/triggering/pulse.py +581 -0
  406. oscura/triggering/window.py +453 -0
  407. oscura/ui/__init__.py +48 -0
  408. oscura/ui/formatters.py +526 -0
  409. oscura/ui/progressive_display.py +340 -0
  410. oscura/utils/__init__.py +99 -0
  411. oscura/utils/autodetect.py +338 -0
  412. oscura/utils/buffer.py +389 -0
  413. oscura/utils/lazy.py +407 -0
  414. oscura/utils/lazy_imports.py +147 -0
  415. oscura/utils/memory.py +836 -0
  416. oscura/utils/memory_advanced.py +1326 -0
  417. oscura/utils/memory_extensions.py +465 -0
  418. oscura/utils/progressive.py +352 -0
  419. oscura/utils/windowing.py +362 -0
  420. oscura/visualization/__init__.py +321 -0
  421. oscura/visualization/accessibility.py +526 -0
  422. oscura/visualization/annotations.py +374 -0
  423. oscura/visualization/axis_scaling.py +305 -0
  424. oscura/visualization/colors.py +453 -0
  425. oscura/visualization/digital.py +337 -0
  426. oscura/visualization/eye.py +420 -0
  427. oscura/visualization/histogram.py +281 -0
  428. oscura/visualization/interactive.py +858 -0
  429. oscura/visualization/jitter.py +702 -0
  430. oscura/visualization/keyboard.py +394 -0
  431. oscura/visualization/layout.py +365 -0
  432. oscura/visualization/optimization.py +1028 -0
  433. oscura/visualization/palettes.py +446 -0
  434. oscura/visualization/plot.py +92 -0
  435. oscura/visualization/power.py +290 -0
  436. oscura/visualization/power_extended.py +626 -0
  437. oscura/visualization/presets.py +467 -0
  438. oscura/visualization/protocols.py +932 -0
  439. oscura/visualization/render.py +207 -0
  440. oscura/visualization/rendering.py +444 -0
  441. oscura/visualization/reverse_engineering.py +791 -0
  442. oscura/visualization/signal_integrity.py +808 -0
  443. oscura/visualization/specialized.py +553 -0
  444. oscura/visualization/spectral.py +811 -0
  445. oscura/visualization/styles.py +381 -0
  446. oscura/visualization/thumbnails.py +311 -0
  447. oscura/visualization/time_axis.py +351 -0
  448. oscura/visualization/waveform.py +367 -0
  449. oscura/workflow/__init__.py +13 -0
  450. oscura/workflow/dag.py +377 -0
  451. oscura/workflows/__init__.py +58 -0
  452. oscura/workflows/compliance.py +280 -0
  453. oscura/workflows/digital.py +272 -0
  454. oscura/workflows/multi_trace.py +502 -0
  455. oscura/workflows/power.py +178 -0
  456. oscura/workflows/protocol.py +492 -0
  457. oscura/workflows/reverse_engineering.py +639 -0
  458. oscura/workflows/signal_integrity.py +227 -0
  459. oscura-0.1.0.dist-info/METADATA +300 -0
  460. oscura-0.1.0.dist-info/RECORD +463 -0
  461. oscura-0.1.0.dist-info/entry_points.txt +2 -0
  462. {oscura-0.0.1.dist-info → oscura-0.1.0.dist-info}/licenses/LICENSE +1 -1
  463. oscura-0.0.1.dist-info/METADATA +0 -63
  464. oscura-0.0.1.dist-info/RECORD +0 -5
  465. {oscura-0.0.1.dist-info → oscura-0.1.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,1119 @@
1
+ """Reverse Engineering Pipeline for integrated protocol analysis.
2
+
3
+ - RE-INT-001: RE Pipeline Integration
4
+
5
+ This module provides an integrated pipeline for complete reverse engineering
6
+ workflows from raw packet capture to decoded messages with automatic tool
7
+ selection and chaining.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import logging
14
+ import os
15
+ import time
16
+ from collections.abc import Callable, Sequence
17
+ from dataclasses import dataclass, field
18
+ from datetime import datetime
19
+ from pathlib import Path
20
+ from typing import Any, ClassVar, Literal
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ @dataclass
26
+ class FlowInfo:
27
+ """Information about a network flow.
28
+
29
+ Attributes:
30
+ flow_id: Unique flow identifier.
31
+ src_ip: Source IP address.
32
+ dst_ip: Destination IP address.
33
+ src_port: Source port.
34
+ dst_port: Destination port.
35
+ protocol: Transport protocol.
36
+ packet_count: Number of packets in flow.
37
+ byte_count: Total bytes in flow.
38
+ start_time: Flow start timestamp.
39
+ end_time: Flow end timestamp.
40
+ """
41
+
42
+ flow_id: str
43
+ src_ip: str
44
+ dst_ip: str
45
+ src_port: int
46
+ dst_port: int
47
+ protocol: str
48
+ packet_count: int
49
+ byte_count: int
50
+ start_time: float
51
+ end_time: float
52
+
53
+
54
+ @dataclass
55
+ class MessageTypeInfo:
56
+ """Information about detected message types.
57
+
58
+ Attributes:
59
+ type_id: Unique type identifier.
60
+ name: Type name (auto-generated or from inference).
61
+ sample_count: Number of messages of this type.
62
+ avg_length: Average message length.
63
+ field_count: Number of detected fields.
64
+ signature: Representative byte signature.
65
+ cluster_id: Associated cluster ID.
66
+ """
67
+
68
+ type_id: str
69
+ name: str
70
+ sample_count: int
71
+ avg_length: float
72
+ field_count: int
73
+ signature: bytes
74
+ cluster_id: int
75
+
76
+
77
+ @dataclass
78
+ class ProtocolCandidate:
79
+ """Candidate protocol identification.
80
+
81
+ Attributes:
82
+ name: Protocol name.
83
+ confidence: Detection confidence (0-1).
84
+ matched_patterns: Patterns that matched.
85
+ port_hint: Whether port suggested this protocol.
86
+ header_match: Whether header matched signature.
87
+ """
88
+
89
+ name: str
90
+ confidence: float
91
+ matched_patterns: list[str] = field(default_factory=list)
92
+ port_hint: bool = False
93
+ header_match: bool = False
94
+
95
+
96
+ @dataclass
97
+ class REAnalysisResult:
98
+ """Complete reverse engineering analysis result.
99
+
100
+ Implements RE-INT-001: Analysis result structure.
101
+
102
+ Attributes:
103
+ flow_count: Number of flows analyzed.
104
+ message_count: Total messages extracted.
105
+ message_types: Detected message types.
106
+ protocol_candidates: Candidate protocol identifications.
107
+ field_schemas: Inferred field schemas per message type.
108
+ state_machine: Inferred state machine (if available).
109
+ statistics: Analysis statistics.
110
+ warnings: Warnings encountered during analysis.
111
+ duration_seconds: Analysis duration.
112
+ timestamp: Analysis timestamp.
113
+ """
114
+
115
+ flow_count: int
116
+ message_count: int
117
+ message_types: list[MessageTypeInfo]
118
+ protocol_candidates: list[ProtocolCandidate]
119
+ field_schemas: dict[str, Any]
120
+ state_machine: Any | None
121
+ statistics: dict[str, Any]
122
+ warnings: list[str]
123
+ duration_seconds: float
124
+ timestamp: str
125
+
126
+
127
+ @dataclass
128
+ class StageResult:
129
+ """Result from a single pipeline stage.
130
+
131
+ Attributes:
132
+ stage_name: Name of the stage.
133
+ success: Whether stage completed successfully.
134
+ duration: Stage duration in seconds.
135
+ output: Stage output data.
136
+ error: Error message if failed.
137
+ """
138
+
139
+ stage_name: str
140
+ success: bool
141
+ duration: float
142
+ output: Any
143
+ error: str | None = None
144
+
145
+
146
+ class REPipeline:
147
+ """Integrated reverse engineering pipeline.
148
+
149
+ Implements RE-INT-001: RE Pipeline Integration.
150
+
151
+ Chains all RE tools in coherent pipeline with automatic tool
152
+ selection based on data characteristics.
153
+
154
+ Example:
155
+ >>> pipeline = REPipeline()
156
+ >>> results = pipeline.analyze(packet_data)
157
+ >>> print(f"Detected {len(results.message_types)} message types")
158
+ >>> pipeline.generate_report(results, "report.html")
159
+ """
160
+
161
+ # Default pipeline stages
162
+ DEFAULT_STAGES: ClassVar[list[str]] = [
163
+ "flow_extraction",
164
+ "payload_analysis",
165
+ "pattern_discovery",
166
+ "field_inference",
167
+ "protocol_detection",
168
+ "state_machine",
169
+ ]
170
+
171
+ def __init__(
172
+ self,
173
+ stages: list[str] | None = None,
174
+ config: dict[str, Any] | None = None,
175
+ ) -> None:
176
+ """Initialize RE pipeline.
177
+
178
+ Args:
179
+ stages: List of stage names to execute.
180
+ config: Configuration options.
181
+ """
182
+ self.stages = stages or self.DEFAULT_STAGES
183
+ self.config = config or {}
184
+
185
+ # Default configuration
186
+ self.config.setdefault("min_samples", 10)
187
+ self.config.setdefault("entropy_threshold", 6.0)
188
+ self.config.setdefault("cluster_threshold", 0.8)
189
+ self.config.setdefault("state_machine_algorithm", "rpni")
190
+ self.config.setdefault("max_message_types", 50)
191
+
192
+ # Stage handlers
193
+ self._stage_handlers: dict[str, Callable[[dict[str, Any]], dict[str, Any]]] = {
194
+ "flow_extraction": self._stage_flow_extraction,
195
+ "payload_analysis": self._stage_payload_analysis,
196
+ "pattern_discovery": self._stage_pattern_discovery,
197
+ "field_inference": self._stage_field_inference,
198
+ "protocol_detection": self._stage_protocol_detection,
199
+ "state_machine": self._stage_state_machine,
200
+ }
201
+
202
+ # Progress callback
203
+ self._progress_callback: Callable[[str, float], None] | None = None
204
+
205
+ # Checkpoint support
206
+ self._checkpoint_path: str | None = None
207
+ self._checkpoint_data: dict[str, Any] = {}
208
+
209
+ def analyze(
210
+ self,
211
+ data: bytes | Sequence[dict[str, Any]] | Sequence[bytes],
212
+ checkpoint: str | None = None,
213
+ progress_callback: Callable[[str, float], None] | None = None,
214
+ ) -> REAnalysisResult:
215
+ """Run full reverse engineering analysis.
216
+
217
+ Implements RE-INT-001: Complete analysis workflow.
218
+
219
+ Args:
220
+ data: Raw binary data, packet list, or PCAP path.
221
+ checkpoint: Path for checkpointing progress.
222
+ progress_callback: Callback for progress reporting.
223
+
224
+ Returns:
225
+ REAnalysisResult with complete analysis.
226
+
227
+ Example:
228
+ >>> results = pipeline.analyze(packets)
229
+ >>> for msg_type in results.message_types:
230
+ ... print(f"{msg_type.name}: {msg_type.sample_count} samples")
231
+ """
232
+ start_time = time.time()
233
+ self._progress_callback = progress_callback
234
+ self._checkpoint_path = checkpoint
235
+ self._checkpoint_data = {}
236
+
237
+ # Load checkpoint if available
238
+ if checkpoint and os.path.exists(checkpoint):
239
+ self._load_checkpoint(checkpoint)
240
+
241
+ # Initialize context
242
+ context: dict[str, Any] = {
243
+ "raw_data": data,
244
+ "flows": [],
245
+ "payloads": [],
246
+ "messages": [],
247
+ "patterns": [],
248
+ "clusters": [],
249
+ "schemas": {},
250
+ "protocol_candidates": [],
251
+ "state_machine": None,
252
+ "warnings": [],
253
+ "statistics": {},
254
+ }
255
+
256
+ # Execute stages
257
+ stage_results = []
258
+ total_stages = len(self.stages)
259
+
260
+ for i, stage_name in enumerate(self.stages):
261
+ if stage_name in self._checkpoint_data:
262
+ # Skip completed stages
263
+ context.update(self._checkpoint_data[stage_name])
264
+ continue
265
+
266
+ self._report_progress(stage_name, (i / total_stages) * 100)
267
+
268
+ handler = self._stage_handlers.get(stage_name)
269
+ if handler:
270
+ try:
271
+ stage_start = time.time()
272
+ output = handler(context)
273
+ stage_duration = time.time() - stage_start
274
+
275
+ stage_results.append(
276
+ StageResult(
277
+ stage_name=stage_name,
278
+ success=True,
279
+ duration=stage_duration,
280
+ output=output,
281
+ )
282
+ )
283
+
284
+ # Update context with stage output
285
+ if output:
286
+ context.update(output)
287
+
288
+ # Checkpoint after each stage
289
+ if checkpoint:
290
+ self._save_checkpoint(checkpoint, stage_name, context)
291
+
292
+ except Exception as e:
293
+ stage_results.append(
294
+ StageResult(
295
+ stage_name=stage_name,
296
+ success=False,
297
+ duration=0,
298
+ output=None,
299
+ error=str(e),
300
+ )
301
+ )
302
+ warnings_list: list[str] = context.get("warnings", [])
303
+ warnings_list.append(f"Stage {stage_name} failed: {e}")
304
+ context["warnings"] = warnings_list
305
+
306
+ self._report_progress("complete", 100)
307
+
308
+ # Build result
309
+ duration = time.time() - start_time
310
+
311
+ flows_list: list[Any] = context.get("flows", [])
312
+ messages_list: list[Any] = context.get("messages", [])
313
+ protocol_candidates_list: list[ProtocolCandidate] = context.get("protocol_candidates", [])
314
+ schemas_dict: dict[str, Any] = context.get("schemas", {})
315
+ warnings_list_result: list[str] = context.get("warnings", [])
316
+
317
+ return REAnalysisResult(
318
+ flow_count=len(flows_list),
319
+ message_count=len(messages_list),
320
+ message_types=self._build_message_types(context),
321
+ protocol_candidates=protocol_candidates_list,
322
+ field_schemas=schemas_dict,
323
+ state_machine=context.get("state_machine"),
324
+ statistics=self._build_statistics(context, stage_results),
325
+ warnings=warnings_list_result,
326
+ duration_seconds=duration,
327
+ timestamp=datetime.now().isoformat(),
328
+ )
329
+
330
+ def analyze_pcap(
331
+ self,
332
+ path: str | Path,
333
+ checkpoint: str | None = None,
334
+ ) -> REAnalysisResult:
335
+ """Analyze packets from a PCAP file.
336
+
337
+ Implements RE-INT-001: PCAP file analysis.
338
+
339
+ Args:
340
+ path: Path to PCAP file.
341
+ checkpoint: Optional checkpoint path.
342
+
343
+ Returns:
344
+ REAnalysisResult with analysis results.
345
+
346
+ Raises:
347
+ FileNotFoundError: If PCAP file not found.
348
+ """
349
+ # Load PCAP (simplified - would use scapy or pyshark in real impl)
350
+ path = Path(path)
351
+ if not path.exists():
352
+ raise FileNotFoundError(f"PCAP file not found: {path}")
353
+
354
+ with open(path, "rb") as f:
355
+ data = f.read()
356
+
357
+ return self.analyze(data, checkpoint=checkpoint)
358
+
359
+ def register_stage_handler(
360
+ self,
361
+ stage_name: str,
362
+ handler: Callable[[dict[str, Any]], dict[str, Any]],
363
+ ) -> None:
364
+ """Register a custom handler for a pipeline stage.
365
+
366
+ This allows replacing or adding custom stage handlers for testing
367
+ or extending pipeline functionality.
368
+
369
+ Args:
370
+ stage_name: Name of the stage to register handler for.
371
+ handler: Callable that takes context dict and returns output dict.
372
+
373
+ Example:
374
+ >>> def custom_handler(context):
375
+ ... return {"custom_data": "processed"}
376
+ >>> pipeline = REPipeline()
377
+ >>> pipeline.register_stage_handler("custom_stage", custom_handler)
378
+ """
379
+ self._stage_handlers[stage_name] = handler
380
+
381
+ def generate_report(
382
+ self,
383
+ results: REAnalysisResult,
384
+ output_path: str | Path,
385
+ format: Literal["html", "json", "markdown"] = "html",
386
+ ) -> None:
387
+ """Generate analysis report.
388
+
389
+ Implements RE-INT-001: Report generation.
390
+
391
+ Args:
392
+ results: Analysis results.
393
+ output_path: Output file path.
394
+ format: Report format.
395
+
396
+ Example:
397
+ >>> pipeline.generate_report(results, "report.html")
398
+ """
399
+ output_path = Path(output_path)
400
+
401
+ if format == "json":
402
+ self._generate_json_report(results, output_path)
403
+ elif format == "markdown":
404
+ self._generate_markdown_report(results, output_path)
405
+ else:
406
+ self._generate_html_report(results, output_path)
407
+
408
+ # =========================================================================
409
+ # Pipeline Stages
410
+ # =========================================================================
411
+
412
+ def _stage_flow_extraction(self, context: dict[str, Any]) -> dict[str, Any]:
413
+ """Extract network flows from raw data.
414
+
415
+ Args:
416
+ context: Pipeline context.
417
+
418
+ Returns:
419
+ Updated context with flows.
420
+ """
421
+ data = context["raw_data"]
422
+ flows = []
423
+ payloads = []
424
+
425
+ if isinstance(data, bytes):
426
+ # Raw binary - treat as single payload
427
+ payloads.append(data)
428
+ flows.append(
429
+ FlowInfo(
430
+ flow_id="flow_0",
431
+ src_ip="unknown",
432
+ dst_ip="unknown",
433
+ src_port=0,
434
+ dst_port=0,
435
+ protocol="unknown",
436
+ packet_count=1,
437
+ byte_count=len(data),
438
+ start_time=0,
439
+ end_time=0,
440
+ )
441
+ )
442
+
443
+ elif isinstance(data, list | tuple):
444
+ # List of packets
445
+ flow_map: dict[str, dict[str, Any]] = {}
446
+ raw_bytes_payloads: list[bytes] = []
447
+
448
+ for _i, pkt in enumerate(data):
449
+ if isinstance(pkt, dict):
450
+ # Packet with metadata
451
+ payload_raw = pkt.get("data", pkt.get("payload", b""))
452
+ if isinstance(payload_raw, list | tuple):
453
+ payload = bytes(payload_raw)
454
+ else:
455
+ payload = payload_raw if isinstance(payload_raw, bytes) else b""
456
+
457
+ # Create flow key
458
+ src_ip = pkt.get("src_ip", "0.0.0.0")
459
+ dst_ip = pkt.get("dst_ip", "0.0.0.0")
460
+ src_port = pkt.get("src_port", 0)
461
+ dst_port = pkt.get("dst_port", 0)
462
+ protocol = pkt.get("protocol", "unknown")
463
+
464
+ flow_key = f"{src_ip}:{src_port}-{dst_ip}:{dst_port}-{protocol}"
465
+
466
+ if flow_key not in flow_map:
467
+ flow_map[flow_key] = {
468
+ "src_ip": src_ip,
469
+ "dst_ip": dst_ip,
470
+ "src_port": src_port,
471
+ "dst_port": dst_port,
472
+ "protocol": protocol,
473
+ "packets": [],
474
+ "payloads": [],
475
+ "timestamps": [],
476
+ }
477
+
478
+ flow_map[flow_key]["packets"].append(pkt)
479
+ flow_map[flow_key]["payloads"].append(payload)
480
+ if "timestamp" in pkt:
481
+ flow_map[flow_key]["timestamps"].append(pkt["timestamp"])
482
+
483
+ payloads.append(payload)
484
+
485
+ else:
486
+ # Raw bytes - collect for default flow
487
+ raw_payload = bytes(pkt) if not isinstance(pkt, bytes) else pkt
488
+ payloads.append(raw_payload)
489
+ raw_bytes_payloads.append(raw_payload)
490
+
491
+ # Build flow objects from flow_map
492
+ for flow_id, flow_data in flow_map.items():
493
+ timestamps = flow_data.get("timestamps", [0])
494
+ flows.append(
495
+ FlowInfo(
496
+ flow_id=flow_id,
497
+ src_ip=flow_data["src_ip"],
498
+ dst_ip=flow_data["dst_ip"],
499
+ src_port=flow_data["src_port"],
500
+ dst_port=flow_data["dst_port"],
501
+ protocol=flow_data["protocol"],
502
+ packet_count=len(flow_data["packets"]),
503
+ byte_count=sum(len(p) for p in flow_data["payloads"]),
504
+ start_time=min(timestamps) if timestamps else 0,
505
+ end_time=max(timestamps) if timestamps else 0,
506
+ )
507
+ )
508
+
509
+ # Create a default flow for raw bytes if we have any
510
+ # This ensures flow_count >= 1 when analyzing raw byte sequences
511
+ if raw_bytes_payloads and not flows:
512
+ flows.append(
513
+ FlowInfo(
514
+ flow_id="flow_default",
515
+ src_ip="unknown",
516
+ dst_ip="unknown",
517
+ src_port=0,
518
+ dst_port=0,
519
+ protocol="unknown",
520
+ packet_count=len(raw_bytes_payloads),
521
+ byte_count=sum(len(p) for p in raw_bytes_payloads),
522
+ start_time=0,
523
+ end_time=0,
524
+ )
525
+ )
526
+
527
+ # Initialize statistics dict if it doesn't exist
528
+ if "statistics" not in context:
529
+ context["statistics"] = {}
530
+
531
+ context["statistics"]["flow_extraction"] = {
532
+ "flow_count": len(flows),
533
+ "payload_count": len(payloads),
534
+ "total_bytes": sum(len(p) for p in payloads),
535
+ }
536
+
537
+ return {"flows": flows, "payloads": payloads}
538
+
539
+ def _stage_payload_analysis(self, context: dict[str, Any]) -> dict[str, Any]:
540
+ """Analyze payloads for structure.
541
+
542
+ Args:
543
+ context: Pipeline context.
544
+
545
+ Returns:
546
+ Updated context with payload analysis.
547
+ """
548
+ payloads = context.get("payloads", [])
549
+
550
+ # Filter non-empty payloads
551
+ valid_payloads = [p for p in payloads if p and len(p) > 0]
552
+
553
+ # Basic statistics
554
+ if valid_payloads:
555
+ lengths = [len(p) for p in valid_payloads]
556
+ avg_len = sum(lengths) / len(lengths)
557
+ min_len = min(lengths)
558
+ max_len = max(lengths)
559
+ else:
560
+ avg_len = min_len = max_len = 0
561
+
562
+ # Detect delimiter patterns
563
+ delimiter_info = None
564
+ if valid_payloads:
565
+ try:
566
+ from oscura.analyzers.packet.payload import detect_delimiter
567
+
568
+ concat = b"".join(valid_payloads)
569
+ delimiter_result = detect_delimiter(concat)
570
+ if delimiter_result.confidence > 0.5:
571
+ delimiter_info = {
572
+ "delimiter": delimiter_result.delimiter.hex(),
573
+ "confidence": delimiter_result.confidence,
574
+ }
575
+ except Exception as e:
576
+ logger.debug("Delimiter detection failed (non-critical): %s", e)
577
+
578
+ # Initialize statistics dict if it doesn't exist
579
+ if "statistics" not in context:
580
+ context["statistics"] = {}
581
+
582
+ context["statistics"]["payload_analysis"] = {
583
+ "payload_count": len(valid_payloads),
584
+ "avg_length": avg_len,
585
+ "min_length": min_len,
586
+ "max_length": max_len,
587
+ "delimiter": delimiter_info,
588
+ }
589
+
590
+ return {"messages": valid_payloads}
591
+
592
+ def _stage_pattern_discovery(self, context: dict[str, Any]) -> dict[str, Any]:
593
+ """Discover patterns in messages.
594
+
595
+ Args:
596
+ context: Pipeline context.
597
+
598
+ Returns:
599
+ Updated context with patterns.
600
+ """
601
+ messages = context.get("messages", [])
602
+ patterns = []
603
+ clusters = []
604
+
605
+ if len(messages) >= 2:
606
+ try:
607
+ from oscura.analyzers.packet.payload import cluster_payloads
608
+
609
+ # Cluster similar messages
610
+ threshold = self.config.get("cluster_threshold", 0.8)
611
+ clusters = cluster_payloads(messages, threshold=threshold)
612
+
613
+ # Extract common patterns from clusters
614
+ for cluster in clusters:
615
+ if len(cluster.payloads) >= 2:
616
+ # Find common prefix
617
+ common_prefix = cluster.payloads[0]
618
+ for payload in cluster.payloads[1:]:
619
+ new_prefix = bytearray()
620
+ for i in range(min(len(common_prefix), len(payload))):
621
+ if common_prefix[i] == payload[i]:
622
+ new_prefix.append(common_prefix[i])
623
+ else:
624
+ break
625
+ common_prefix = bytes(new_prefix)
626
+
627
+ if len(common_prefix) >= 2:
628
+ patterns.append(
629
+ {
630
+ "pattern": common_prefix,
631
+ "cluster_id": cluster.cluster_id,
632
+ "frequency": len(cluster.payloads),
633
+ }
634
+ )
635
+
636
+ except Exception as e:
637
+ context["warnings"].append(f"Pattern discovery failed: {e}")
638
+
639
+ # Initialize statistics dict if it doesn't exist
640
+ if "statistics" not in context:
641
+ context["statistics"] = {}
642
+
643
+ context["statistics"]["pattern_discovery"] = {
644
+ "cluster_count": len(clusters),
645
+ "pattern_count": len(patterns),
646
+ }
647
+
648
+ return {"patterns": patterns, "clusters": clusters}
649
+
650
+ def _stage_field_inference(self, context: dict[str, Any]) -> dict[str, Any]:
651
+ """Infer field structure in messages.
652
+
653
+ Args:
654
+ context: Pipeline context.
655
+
656
+ Returns:
657
+ Updated context with field schemas.
658
+ """
659
+ clusters = context.get("clusters", [])
660
+ schemas = {}
661
+
662
+ for cluster in clusters:
663
+ if not hasattr(cluster, "payloads") or len(cluster.payloads) < 5:
664
+ continue
665
+
666
+ try:
667
+ from oscura.analyzers.packet.payload import FieldInferrer
668
+
669
+ inferrer = FieldInferrer(min_samples=self.config.get("min_samples", 10))
670
+ schema = inferrer.infer_fields(cluster.payloads)
671
+
672
+ if schema.fields:
673
+ cluster_id = getattr(cluster, "cluster_id", 0)
674
+ schemas[f"type_{cluster_id}"] = {
675
+ "field_count": len(schema.fields),
676
+ "message_length": schema.message_length,
677
+ "fixed_length": schema.fixed_length,
678
+ "confidence": schema.confidence,
679
+ "fields": [
680
+ {
681
+ "name": f.name,
682
+ "offset": f.offset,
683
+ "size": f.size,
684
+ "type": f.inferred_type,
685
+ "is_constant": f.is_constant,
686
+ "is_sequence": f.is_sequence,
687
+ }
688
+ for f in schema.fields
689
+ ],
690
+ }
691
+
692
+ except Exception as e:
693
+ context["warnings"].append(f"Field inference failed for cluster: {e}")
694
+
695
+ # Initialize statistics dict if it doesn't exist
696
+ if "statistics" not in context:
697
+ context["statistics"] = {}
698
+
699
+ context["statistics"]["field_inference"] = {
700
+ "schema_count": len(schemas),
701
+ }
702
+
703
+ return {"schemas": schemas}
704
+
705
+ def _stage_protocol_detection(self, context: dict[str, Any]) -> dict[str, Any]:
706
+ """Detect protocol candidates.
707
+
708
+ Args:
709
+ context: Pipeline context.
710
+
711
+ Returns:
712
+ Updated context with protocol candidates.
713
+ """
714
+ messages = context.get("messages", [])
715
+ flows = context.get("flows", [])
716
+ candidates = []
717
+
718
+ # Check well-known port mappings
719
+ port_protocols = {
720
+ 53: "dns",
721
+ 80: "http",
722
+ 443: "https",
723
+ 502: "modbus_tcp",
724
+ 1883: "mqtt",
725
+ 5683: "coap",
726
+ 47808: "bacnet",
727
+ }
728
+
729
+ for flow in flows:
730
+ port = flow.dst_port or flow.src_port
731
+ if port in port_protocols:
732
+ candidates.append(
733
+ ProtocolCandidate(
734
+ name=port_protocols[port],
735
+ confidence=0.6,
736
+ port_hint=True,
737
+ )
738
+ )
739
+
740
+ # Check magic byte signatures
741
+ if messages:
742
+ try:
743
+ from oscura.inference.binary import MagicByteDetector
744
+
745
+ detector = MagicByteDetector()
746
+ sample = messages[0] if messages else b""
747
+
748
+ if len(sample) >= 2:
749
+ result = detector.detect(sample)
750
+ if result and result.known_format:
751
+ candidates.append(
752
+ ProtocolCandidate(
753
+ name=result.known_format,
754
+ confidence=result.confidence,
755
+ header_match=True,
756
+ )
757
+ )
758
+
759
+ except Exception as e:
760
+ logger.debug("Magic byte detection failed (non-critical): %s", e)
761
+
762
+ # Check protocol library
763
+ try:
764
+ from oscura.inference.protocol_library import get_library
765
+
766
+ library = get_library()
767
+
768
+ for protocol in library.list_protocols():
769
+ if protocol.definition:
770
+ # Check if first bytes match protocol header
771
+ for msg in messages[:10]:
772
+ if len(msg) >= 4:
773
+ # Simple header matching
774
+ first_field = (
775
+ protocol.definition.fields[0]
776
+ if protocol.definition.fields
777
+ else None
778
+ )
779
+ if first_field and hasattr(first_field, "value"):
780
+ # Has expected value
781
+ candidates.append(
782
+ ProtocolCandidate(
783
+ name=protocol.name,
784
+ confidence=0.4,
785
+ matched_patterns=["header_value"],
786
+ )
787
+ )
788
+ break
789
+
790
+ except Exception as e:
791
+ logger.debug("Protocol library matching failed (non-critical): %s", e)
792
+
793
+ # Deduplicate by name, keeping highest confidence
794
+ unique_candidates: dict[str, ProtocolCandidate] = {}
795
+ for c in candidates:
796
+ if (
797
+ c.name not in unique_candidates
798
+ or c.confidence > unique_candidates[c.name].confidence
799
+ ):
800
+ unique_candidates[c.name] = c
801
+
802
+ return {"protocol_candidates": list(unique_candidates.values())}
803
+
804
+ def _stage_state_machine(self, context: dict[str, Any]) -> dict[str, Any]:
805
+ """Infer protocol state machine.
806
+
807
+ Args:
808
+ context: Pipeline context.
809
+
810
+ Returns:
811
+ Updated context with state machine.
812
+ """
813
+ clusters = context.get("clusters", [])
814
+
815
+ if len(clusters) < 2:
816
+ return {"state_machine": None}
817
+
818
+ try:
819
+ # Build sequences from cluster transitions
820
+ messages = context.get("messages", [])
821
+ message_to_cluster = {}
822
+
823
+ for cluster in clusters:
824
+ for idx in getattr(cluster, "indices", []):
825
+ message_to_cluster[idx] = getattr(cluster, "cluster_id", 0)
826
+
827
+ # Build observation sequence
828
+ sequence = [
829
+ f"type_{message_to_cluster.get(i, 0)}"
830
+ for i in range(len(messages))
831
+ if i in message_to_cluster
832
+ ]
833
+
834
+ if len(sequence) >= 3:
835
+ from oscura.inference.state_machine import StateMachineInferrer
836
+
837
+ inferrer = StateMachineInferrer()
838
+ automaton = inferrer.infer_rpni([sequence])
839
+
840
+ return {
841
+ "state_machine": {
842
+ "states": len(automaton.states) if automaton is not None else 0,
843
+ "transitions": len(automaton.transitions) if automaton is not None else 0,
844
+ "automaton": automaton,
845
+ }
846
+ }
847
+
848
+ except Exception as e:
849
+ context["warnings"].append(f"State machine inference failed: {e}")
850
+
851
+ return {"state_machine": None}
852
+
853
+ # =========================================================================
854
+ # Helper Methods
855
+ # =========================================================================
856
+
857
+ def _report_progress(self, stage: str, percent: float) -> None:
858
+ """Report progress to callback."""
859
+ if self._progress_callback:
860
+ self._progress_callback(stage, percent)
861
+
862
+ def _load_checkpoint(self, path: str) -> None:
863
+ """Load checkpoint data."""
864
+ try:
865
+ with open(path) as f:
866
+ self._checkpoint_data = json.load(f)
867
+ except Exception:
868
+ self._checkpoint_data = {}
869
+
870
+ def _save_checkpoint(self, path: str, stage: str, context: dict[str, Any]) -> None:
871
+ """Save checkpoint data."""
872
+ try:
873
+ # Extract serializable parts of context
874
+ checkpoint = {
875
+ stage: {
876
+ "flow_count": len(context.get("flows", [])),
877
+ "message_count": len(context.get("messages", [])),
878
+ "cluster_count": len(context.get("clusters", [])),
879
+ }
880
+ }
881
+
882
+ if os.path.exists(path):
883
+ with open(path) as f:
884
+ existing = json.load(f)
885
+ checkpoint.update(existing)
886
+
887
+ with open(path, "w") as f:
888
+ json.dump(checkpoint, f, indent=2)
889
+
890
+ except Exception as e:
891
+ logger.debug("Checkpoint file save failed (non-critical): %s", e)
892
+
893
+ def _build_message_types(self, context: dict[str, Any]) -> list[MessageTypeInfo]:
894
+ """Build message type information from context."""
895
+ clusters = context.get("clusters", [])
896
+ message_types = []
897
+
898
+ for cluster in clusters:
899
+ payloads = getattr(cluster, "payloads", [])
900
+ if not payloads:
901
+ continue
902
+
903
+ cluster_id = getattr(cluster, "cluster_id", 0)
904
+ avg_len = sum(len(p) for p in payloads) / len(payloads) if payloads else 0
905
+
906
+ # Get schema if available
907
+ schema = context.get("schemas", {}).get(f"type_{cluster_id}", {})
908
+ field_count = schema.get("field_count", 0)
909
+
910
+ # Use representative as signature
911
+ signature = payloads[0][:16] if payloads else b""
912
+
913
+ message_types.append(
914
+ MessageTypeInfo(
915
+ type_id=f"type_{cluster_id}",
916
+ name=f"Message Type {cluster_id}",
917
+ sample_count=len(payloads),
918
+ avg_length=avg_len,
919
+ field_count=field_count,
920
+ signature=signature,
921
+ cluster_id=cluster_id,
922
+ )
923
+ )
924
+
925
+ return message_types
926
+
927
+ def _build_statistics(
928
+ self, context: dict[str, Any], stage_results: list[StageResult]
929
+ ) -> dict[str, Any]:
930
+ """Build analysis statistics."""
931
+ stats: dict[str, Any] = context.get("statistics", {})
932
+
933
+ # Add stage timing
934
+ stats["stage_timing"] = {r.stage_name: r.duration for r in stage_results}
935
+
936
+ # Add success info
937
+ stats["stages_completed"] = sum(1 for r in stage_results if r.success)
938
+ stats["stages_failed"] = sum(1 for r in stage_results if not r.success)
939
+
940
+ return stats
941
+
942
+ def _generate_json_report(self, results: REAnalysisResult, path: Path) -> None:
943
+ """Generate JSON report."""
944
+ report = {
945
+ "flow_count": results.flow_count,
946
+ "message_count": results.message_count,
947
+ "message_types": [
948
+ {
949
+ "type_id": mt.type_id,
950
+ "name": mt.name,
951
+ "sample_count": mt.sample_count,
952
+ "avg_length": mt.avg_length,
953
+ "field_count": mt.field_count,
954
+ "signature": mt.signature.hex(),
955
+ }
956
+ for mt in results.message_types
957
+ ],
958
+ "protocol_candidates": [
959
+ {
960
+ "name": pc.name,
961
+ "confidence": pc.confidence,
962
+ "port_hint": pc.port_hint,
963
+ "header_match": pc.header_match,
964
+ }
965
+ for pc in results.protocol_candidates
966
+ ],
967
+ "field_schemas": results.field_schemas,
968
+ "statistics": results.statistics,
969
+ "warnings": results.warnings,
970
+ "duration_seconds": results.duration_seconds,
971
+ "timestamp": results.timestamp,
972
+ }
973
+
974
+ with open(path, "w") as f:
975
+ json.dump(report, f, indent=2)
976
+
977
+ def _generate_markdown_report(self, results: REAnalysisResult, path: Path) -> None:
978
+ """Generate Markdown report."""
979
+ lines = [
980
+ "# Reverse Engineering Analysis Report",
981
+ "",
982
+ f"**Generated:** {results.timestamp}",
983
+ f"**Duration:** {results.duration_seconds:.2f} seconds",
984
+ "",
985
+ "## Summary",
986
+ "",
987
+ f"- Flows analyzed: {results.flow_count}",
988
+ f"- Messages extracted: {results.message_count}",
989
+ f"- Message types detected: {len(results.message_types)}",
990
+ f"- Protocol candidates: {len(results.protocol_candidates)}",
991
+ "",
992
+ "## Message Types",
993
+ "",
994
+ ]
995
+
996
+ for mt in results.message_types:
997
+ lines.extend(
998
+ [
999
+ f"### {mt.name}",
1000
+ f"- Samples: {mt.sample_count}",
1001
+ f"- Average length: {mt.avg_length:.1f} bytes",
1002
+ f"- Fields detected: {mt.field_count}",
1003
+ f"- Signature: `{mt.signature.hex()}`",
1004
+ "",
1005
+ ]
1006
+ )
1007
+
1008
+ if results.protocol_candidates:
1009
+ lines.extend(
1010
+ [
1011
+ "## Protocol Candidates",
1012
+ "",
1013
+ ]
1014
+ )
1015
+ for pc in results.protocol_candidates:
1016
+ lines.append(f"- **{pc.name}** (confidence: {pc.confidence:.2%})")
1017
+ lines.append("")
1018
+
1019
+ if results.warnings:
1020
+ lines.extend(
1021
+ [
1022
+ "## Warnings",
1023
+ "",
1024
+ ]
1025
+ )
1026
+ for warning in results.warnings:
1027
+ lines.append(f"- {warning}")
1028
+
1029
+ with open(path, "w") as f:
1030
+ f.write("\n".join(lines))
1031
+
1032
+ def _generate_html_report(self, results: REAnalysisResult, path: Path) -> None:
1033
+ """Generate HTML report."""
1034
+ html = f"""<!DOCTYPE html>
1035
+ <html>
1036
+ <head>
1037
+ <title>RE Analysis Report</title>
1038
+ <style>
1039
+ body {{ font-family: Arial, sans-serif; margin: 40px; }}
1040
+ h1 {{ color: #333; }}
1041
+ h2 {{ color: #666; border-bottom: 1px solid #ddd; }}
1042
+ .summary {{ background: #f5f5f5; padding: 20px; border-radius: 5px; }}
1043
+ .type-card {{ border: 1px solid #ddd; padding: 15px; margin: 10px 0; border-radius: 5px; }}
1044
+ .signature {{ font-family: monospace; background: #eee; padding: 5px; }}
1045
+ .warning {{ color: #856404; background: #fff3cd; padding: 10px; border-radius: 5px; }}
1046
+ </style>
1047
+ </head>
1048
+ <body>
1049
+ <h1>Reverse Engineering Analysis Report</h1>
1050
+
1051
+ <div class="summary">
1052
+ <p><strong>Generated:</strong> {results.timestamp}</p>
1053
+ <p><strong>Duration:</strong> {results.duration_seconds:.2f} seconds</p>
1054
+ <p><strong>Flows:</strong> {results.flow_count}</p>
1055
+ <p><strong>Messages:</strong> {results.message_count}</p>
1056
+ <p><strong>Types:</strong> {len(results.message_types)}</p>
1057
+ </div>
1058
+
1059
+ <h2>Message Types</h2>
1060
+ """
1061
+ for mt in results.message_types:
1062
+ html += f"""
1063
+ <div class="type-card">
1064
+ <h3>{mt.name}</h3>
1065
+ <p><strong>Samples:</strong> {mt.sample_count}</p>
1066
+ <p><strong>Avg Length:</strong> {mt.avg_length:.1f} bytes</p>
1067
+ <p><strong>Fields:</strong> {mt.field_count}</p>
1068
+ <p><strong>Signature:</strong> <span class="signature">{mt.signature.hex()}</span></p>
1069
+ </div>
1070
+ """
1071
+ if results.protocol_candidates:
1072
+ html += "<h2>Protocol Candidates</h2><ul>"
1073
+ for pc in results.protocol_candidates:
1074
+ html += f"<li><strong>{pc.name}</strong> ({pc.confidence:.0%})</li>"
1075
+ html += "</ul>"
1076
+
1077
+ if results.warnings:
1078
+ html += "<h2>Warnings</h2>"
1079
+ for warning in results.warnings:
1080
+ html += f'<div class="warning">{warning}</div>'
1081
+
1082
+ html += """
1083
+ </body>
1084
+ </html>
1085
+ """
1086
+ with open(path, "w") as f:
1087
+ f.write(html)
1088
+
1089
+
1090
+ def analyze(
1091
+ data: bytes | Sequence[dict[str, Any]] | Sequence[bytes],
1092
+ stages: list[str] | None = None,
1093
+ config: dict[str, Any] | None = None,
1094
+ ) -> REAnalysisResult:
1095
+ """Run reverse engineering analysis on data.
1096
+
1097
+ Implements RE-INT-001: Quick analysis function.
1098
+
1099
+ Args:
1100
+ data: Data to analyze.
1101
+ stages: Pipeline stages to run.
1102
+ config: Configuration options.
1103
+
1104
+ Returns:
1105
+ REAnalysisResult with analysis.
1106
+ """
1107
+ pipeline = REPipeline(stages=stages, config=config)
1108
+ return pipeline.analyze(data)
1109
+
1110
+
1111
+ __all__ = [
1112
+ "FlowInfo",
1113
+ "MessageTypeInfo",
1114
+ "ProtocolCandidate",
1115
+ "REAnalysisResult",
1116
+ "REPipeline",
1117
+ "StageResult",
1118
+ "analyze",
1119
+ ]