oscura 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oscura/__init__.py +169 -167
- oscura/analyzers/__init__.py +3 -0
- oscura/analyzers/classification.py +659 -0
- oscura/analyzers/digital/edges.py +325 -65
- oscura/analyzers/digital/quality.py +293 -166
- oscura/analyzers/digital/timing.py +260 -115
- oscura/analyzers/digital/timing_numba.py +334 -0
- oscura/analyzers/entropy.py +605 -0
- oscura/analyzers/eye/diagram.py +176 -109
- oscura/analyzers/eye/metrics.py +5 -5
- oscura/analyzers/jitter/__init__.py +6 -4
- oscura/analyzers/jitter/ber.py +52 -52
- oscura/analyzers/jitter/classification.py +156 -0
- oscura/analyzers/jitter/decomposition.py +163 -113
- oscura/analyzers/jitter/spectrum.py +80 -64
- oscura/analyzers/ml/__init__.py +39 -0
- oscura/analyzers/ml/features.py +600 -0
- oscura/analyzers/ml/signal_classifier.py +604 -0
- oscura/analyzers/packet/daq.py +246 -158
- oscura/analyzers/packet/parser.py +12 -1
- oscura/analyzers/packet/payload.py +50 -2110
- oscura/analyzers/packet/payload_analysis.py +361 -181
- oscura/analyzers/packet/payload_patterns.py +133 -70
- oscura/analyzers/packet/stream.py +84 -23
- oscura/analyzers/patterns/__init__.py +26 -5
- oscura/analyzers/patterns/anomaly_detection.py +908 -0
- oscura/analyzers/patterns/clustering.py +169 -108
- oscura/analyzers/patterns/clustering_optimized.py +227 -0
- oscura/analyzers/patterns/discovery.py +1 -1
- oscura/analyzers/patterns/matching.py +581 -197
- oscura/analyzers/patterns/pattern_mining.py +778 -0
- oscura/analyzers/patterns/periodic.py +121 -38
- oscura/analyzers/patterns/sequences.py +175 -78
- oscura/analyzers/power/conduction.py +1 -1
- oscura/analyzers/power/soa.py +6 -6
- oscura/analyzers/power/switching.py +250 -110
- oscura/analyzers/protocol/__init__.py +17 -1
- oscura/analyzers/protocols/base.py +6 -6
- oscura/analyzers/protocols/ble/__init__.py +38 -0
- oscura/analyzers/protocols/ble/analyzer.py +809 -0
- oscura/analyzers/protocols/ble/uuids.py +288 -0
- oscura/analyzers/protocols/can.py +257 -127
- oscura/analyzers/protocols/can_fd.py +107 -80
- oscura/analyzers/protocols/flexray.py +139 -80
- oscura/analyzers/protocols/hdlc.py +93 -58
- oscura/analyzers/protocols/i2c.py +247 -106
- oscura/analyzers/protocols/i2s.py +138 -86
- oscura/analyzers/protocols/industrial/__init__.py +40 -0
- oscura/analyzers/protocols/industrial/bacnet/__init__.py +33 -0
- oscura/analyzers/protocols/industrial/bacnet/analyzer.py +708 -0
- oscura/analyzers/protocols/industrial/bacnet/encoding.py +412 -0
- oscura/analyzers/protocols/industrial/bacnet/services.py +622 -0
- oscura/analyzers/protocols/industrial/ethercat/__init__.py +30 -0
- oscura/analyzers/protocols/industrial/ethercat/analyzer.py +474 -0
- oscura/analyzers/protocols/industrial/ethercat/mailbox.py +339 -0
- oscura/analyzers/protocols/industrial/ethercat/topology.py +166 -0
- oscura/analyzers/protocols/industrial/modbus/__init__.py +31 -0
- oscura/analyzers/protocols/industrial/modbus/analyzer.py +525 -0
- oscura/analyzers/protocols/industrial/modbus/crc.py +79 -0
- oscura/analyzers/protocols/industrial/modbus/functions.py +436 -0
- oscura/analyzers/protocols/industrial/opcua/__init__.py +21 -0
- oscura/analyzers/protocols/industrial/opcua/analyzer.py +552 -0
- oscura/analyzers/protocols/industrial/opcua/datatypes.py +446 -0
- oscura/analyzers/protocols/industrial/opcua/services.py +264 -0
- oscura/analyzers/protocols/industrial/profinet/__init__.py +23 -0
- oscura/analyzers/protocols/industrial/profinet/analyzer.py +441 -0
- oscura/analyzers/protocols/industrial/profinet/dcp.py +263 -0
- oscura/analyzers/protocols/industrial/profinet/ptcp.py +200 -0
- oscura/analyzers/protocols/jtag.py +180 -98
- oscura/analyzers/protocols/lin.py +219 -114
- oscura/analyzers/protocols/manchester.py +4 -4
- oscura/analyzers/protocols/onewire.py +253 -149
- oscura/analyzers/protocols/parallel_bus/__init__.py +20 -0
- oscura/analyzers/protocols/parallel_bus/centronics.py +92 -0
- oscura/analyzers/protocols/parallel_bus/gpib.py +137 -0
- oscura/analyzers/protocols/spi.py +192 -95
- oscura/analyzers/protocols/swd.py +321 -167
- oscura/analyzers/protocols/uart.py +267 -125
- oscura/analyzers/protocols/usb.py +235 -131
- oscura/analyzers/side_channel/power.py +17 -12
- oscura/analyzers/signal/__init__.py +15 -0
- oscura/analyzers/signal/timing_analysis.py +1086 -0
- oscura/analyzers/signal_integrity/__init__.py +4 -1
- oscura/analyzers/signal_integrity/sparams.py +2 -19
- oscura/analyzers/spectral/chunked.py +129 -60
- oscura/analyzers/spectral/chunked_fft.py +300 -94
- oscura/analyzers/spectral/chunked_wavelet.py +100 -80
- oscura/analyzers/statistical/checksum.py +376 -217
- oscura/analyzers/statistical/classification.py +229 -107
- oscura/analyzers/statistical/entropy.py +78 -53
- oscura/analyzers/statistics/correlation.py +407 -211
- oscura/analyzers/statistics/outliers.py +2 -2
- oscura/analyzers/statistics/streaming.py +30 -5
- oscura/analyzers/validation.py +216 -101
- oscura/analyzers/waveform/measurements.py +9 -0
- oscura/analyzers/waveform/measurements_with_uncertainty.py +31 -15
- oscura/analyzers/waveform/spectral.py +500 -228
- oscura/api/__init__.py +31 -5
- oscura/api/dsl/__init__.py +582 -0
- oscura/{dsl → api/dsl}/commands.py +43 -76
- oscura/{dsl → api/dsl}/interpreter.py +26 -51
- oscura/{dsl → api/dsl}/parser.py +107 -77
- oscura/{dsl → api/dsl}/repl.py +2 -2
- oscura/api/dsl.py +1 -1
- oscura/{integrations → api/integrations}/__init__.py +1 -1
- oscura/{integrations → api/integrations}/llm.py +201 -102
- oscura/api/operators.py +3 -3
- oscura/api/optimization.py +144 -30
- oscura/api/rest_server.py +921 -0
- oscura/api/server/__init__.py +17 -0
- oscura/api/server/dashboard.py +850 -0
- oscura/api/server/static/README.md +34 -0
- oscura/api/server/templates/base.html +181 -0
- oscura/api/server/templates/export.html +120 -0
- oscura/api/server/templates/home.html +284 -0
- oscura/api/server/templates/protocols.html +58 -0
- oscura/api/server/templates/reports.html +43 -0
- oscura/api/server/templates/session_detail.html +89 -0
- oscura/api/server/templates/sessions.html +83 -0
- oscura/api/server/templates/waveforms.html +73 -0
- oscura/automotive/__init__.py +8 -1
- oscura/automotive/can/__init__.py +10 -0
- oscura/automotive/can/checksum.py +3 -1
- oscura/automotive/can/dbc_generator.py +590 -0
- oscura/automotive/can/message_wrapper.py +121 -74
- oscura/automotive/can/patterns.py +98 -21
- oscura/automotive/can/session.py +292 -56
- oscura/automotive/can/state_machine.py +6 -3
- oscura/automotive/can/stimulus_response.py +97 -75
- oscura/automotive/dbc/__init__.py +10 -2
- oscura/automotive/dbc/generator.py +84 -56
- oscura/automotive/dbc/parser.py +6 -6
- oscura/automotive/dtc/data.json +17 -102
- oscura/automotive/dtc/database.py +2 -2
- oscura/automotive/flexray/__init__.py +31 -0
- oscura/automotive/flexray/analyzer.py +504 -0
- oscura/automotive/flexray/crc.py +185 -0
- oscura/automotive/flexray/fibex.py +449 -0
- oscura/automotive/j1939/__init__.py +45 -8
- oscura/automotive/j1939/analyzer.py +605 -0
- oscura/automotive/j1939/spns.py +326 -0
- oscura/automotive/j1939/transport.py +306 -0
- oscura/automotive/lin/__init__.py +47 -0
- oscura/automotive/lin/analyzer.py +612 -0
- oscura/automotive/loaders/blf.py +13 -2
- oscura/automotive/loaders/csv_can.py +143 -72
- oscura/automotive/loaders/dispatcher.py +50 -2
- oscura/automotive/loaders/mdf.py +86 -45
- oscura/automotive/loaders/pcap.py +111 -61
- oscura/automotive/uds/__init__.py +4 -0
- oscura/automotive/uds/analyzer.py +725 -0
- oscura/automotive/uds/decoder.py +140 -58
- oscura/automotive/uds/models.py +7 -1
- oscura/automotive/visualization.py +1 -1
- oscura/cli/analyze.py +348 -0
- oscura/cli/batch.py +142 -122
- oscura/cli/benchmark.py +275 -0
- oscura/cli/characterize.py +137 -82
- oscura/cli/compare.py +224 -131
- oscura/cli/completion.py +250 -0
- oscura/cli/config_cmd.py +361 -0
- oscura/cli/decode.py +164 -87
- oscura/cli/export.py +286 -0
- oscura/cli/main.py +115 -31
- oscura/{onboarding → cli/onboarding}/__init__.py +3 -3
- oscura/{onboarding → cli/onboarding}/help.py +80 -58
- oscura/{onboarding → cli/onboarding}/tutorials.py +97 -72
- oscura/{onboarding → cli/onboarding}/wizard.py +55 -36
- oscura/cli/progress.py +147 -0
- oscura/cli/shell.py +157 -135
- oscura/cli/validate_cmd.py +204 -0
- oscura/cli/visualize.py +158 -0
- oscura/convenience.py +125 -79
- oscura/core/__init__.py +4 -2
- oscura/core/backend_selector.py +3 -3
- oscura/core/cache.py +126 -15
- oscura/core/cancellation.py +1 -1
- oscura/{config → core/config}/__init__.py +20 -11
- oscura/{config → core/config}/defaults.py +1 -1
- oscura/{config → core/config}/loader.py +7 -5
- oscura/{config → core/config}/memory.py +5 -5
- oscura/{config → core/config}/migration.py +1 -1
- oscura/{config → core/config}/pipeline.py +99 -23
- oscura/{config → core/config}/preferences.py +1 -1
- oscura/{config → core/config}/protocol.py +3 -3
- oscura/{config → core/config}/schema.py +426 -272
- oscura/{config → core/config}/settings.py +1 -1
- oscura/{config → core/config}/thresholds.py +195 -153
- oscura/core/correlation.py +5 -6
- oscura/core/cross_domain.py +0 -2
- oscura/core/debug.py +9 -5
- oscura/{extensibility → core/extensibility}/docs.py +158 -70
- oscura/{extensibility → core/extensibility}/extensions.py +160 -76
- oscura/{extensibility → core/extensibility}/logging.py +1 -1
- oscura/{extensibility → core/extensibility}/measurements.py +1 -1
- oscura/{extensibility → core/extensibility}/plugins.py +1 -1
- oscura/{extensibility → core/extensibility}/templates.py +73 -3
- oscura/{extensibility → core/extensibility}/validation.py +1 -1
- oscura/core/gpu_backend.py +11 -7
- oscura/core/log_query.py +101 -11
- oscura/core/logging.py +126 -54
- oscura/core/logging_advanced.py +5 -5
- oscura/core/memory_limits.py +108 -70
- oscura/core/memory_monitor.py +2 -2
- oscura/core/memory_progress.py +7 -7
- oscura/core/memory_warnings.py +1 -1
- oscura/core/numba_backend.py +13 -13
- oscura/{plugins → core/plugins}/__init__.py +9 -9
- oscura/{plugins → core/plugins}/base.py +7 -7
- oscura/{plugins → core/plugins}/cli.py +3 -3
- oscura/{plugins → core/plugins}/discovery.py +186 -106
- oscura/{plugins → core/plugins}/lifecycle.py +1 -1
- oscura/{plugins → core/plugins}/manager.py +7 -7
- oscura/{plugins → core/plugins}/registry.py +3 -3
- oscura/{plugins → core/plugins}/versioning.py +1 -1
- oscura/core/progress.py +16 -1
- oscura/core/provenance.py +8 -2
- oscura/{schemas → core/schemas}/__init__.py +2 -2
- oscura/{schemas → core/schemas}/device_mapping.json +2 -8
- oscura/{schemas → core/schemas}/packet_format.json +4 -24
- oscura/{schemas → core/schemas}/protocol_definition.json +2 -12
- oscura/core/types.py +4 -0
- oscura/core/uncertainty.py +3 -3
- oscura/correlation/__init__.py +52 -0
- oscura/correlation/multi_protocol.py +811 -0
- oscura/discovery/auto_decoder.py +117 -35
- oscura/discovery/comparison.py +191 -86
- oscura/discovery/quality_validator.py +155 -68
- oscura/discovery/signal_detector.py +196 -79
- oscura/export/__init__.py +18 -8
- oscura/export/kaitai_struct.py +513 -0
- oscura/export/scapy_layer.py +801 -0
- oscura/export/wireshark/generator.py +1 -1
- oscura/export/wireshark/templates/dissector.lua.j2 +2 -2
- oscura/export/wireshark_dissector.py +746 -0
- oscura/guidance/wizard.py +207 -111
- oscura/hardware/__init__.py +19 -0
- oscura/{acquisition → hardware/acquisition}/__init__.py +4 -4
- oscura/{acquisition → hardware/acquisition}/file.py +2 -2
- oscura/{acquisition → hardware/acquisition}/hardware.py +7 -7
- oscura/{acquisition → hardware/acquisition}/saleae.py +15 -12
- oscura/{acquisition → hardware/acquisition}/socketcan.py +1 -1
- oscura/{acquisition → hardware/acquisition}/streaming.py +2 -2
- oscura/{acquisition → hardware/acquisition}/synthetic.py +3 -3
- oscura/{acquisition → hardware/acquisition}/visa.py +33 -11
- oscura/hardware/firmware/__init__.py +29 -0
- oscura/hardware/firmware/pattern_recognition.py +874 -0
- oscura/hardware/hal_detector.py +736 -0
- oscura/hardware/security/__init__.py +37 -0
- oscura/hardware/security/side_channel_detector.py +1126 -0
- oscura/inference/__init__.py +4 -0
- oscura/inference/active_learning/observation_table.py +4 -1
- oscura/inference/alignment.py +216 -123
- oscura/inference/bayesian.py +113 -33
- oscura/inference/crc_reverse.py +101 -55
- oscura/inference/logic.py +6 -2
- oscura/inference/message_format.py +342 -183
- oscura/inference/protocol.py +95 -44
- oscura/inference/protocol_dsl.py +180 -82
- oscura/inference/signal_intelligence.py +1439 -706
- oscura/inference/spectral.py +99 -57
- oscura/inference/state_machine.py +810 -158
- oscura/inference/stream.py +270 -110
- oscura/iot/__init__.py +34 -0
- oscura/iot/coap/__init__.py +32 -0
- oscura/iot/coap/analyzer.py +668 -0
- oscura/iot/coap/options.py +212 -0
- oscura/iot/lorawan/__init__.py +21 -0
- oscura/iot/lorawan/crypto.py +206 -0
- oscura/iot/lorawan/decoder.py +801 -0
- oscura/iot/lorawan/mac_commands.py +341 -0
- oscura/iot/mqtt/__init__.py +27 -0
- oscura/iot/mqtt/analyzer.py +999 -0
- oscura/iot/mqtt/properties.py +315 -0
- oscura/iot/zigbee/__init__.py +31 -0
- oscura/iot/zigbee/analyzer.py +615 -0
- oscura/iot/zigbee/security.py +153 -0
- oscura/iot/zigbee/zcl.py +349 -0
- oscura/jupyter/display.py +125 -45
- oscura/{exploratory → jupyter/exploratory}/__init__.py +8 -8
- oscura/{exploratory → jupyter/exploratory}/error_recovery.py +298 -141
- oscura/jupyter/exploratory/fuzzy.py +746 -0
- oscura/{exploratory → jupyter/exploratory}/fuzzy_advanced.py +258 -100
- oscura/{exploratory → jupyter/exploratory}/legacy.py +464 -242
- oscura/{exploratory → jupyter/exploratory}/parse.py +167 -145
- oscura/{exploratory → jupyter/exploratory}/recovery.py +119 -87
- oscura/jupyter/exploratory/sync.py +612 -0
- oscura/{exploratory → jupyter/exploratory}/unknown.py +299 -176
- oscura/jupyter/magic.py +4 -4
- oscura/{ui → jupyter/ui}/__init__.py +2 -2
- oscura/{ui → jupyter/ui}/formatters.py +3 -3
- oscura/{ui → jupyter/ui}/progressive_display.py +153 -82
- oscura/loaders/__init__.py +183 -67
- oscura/loaders/binary.py +88 -1
- oscura/loaders/chipwhisperer.py +153 -137
- oscura/loaders/configurable.py +208 -86
- oscura/loaders/csv_loader.py +458 -215
- oscura/loaders/hdf5_loader.py +278 -119
- oscura/loaders/lazy.py +87 -54
- oscura/loaders/mmap_loader.py +1 -1
- oscura/loaders/numpy_loader.py +253 -116
- oscura/loaders/pcap.py +226 -151
- oscura/loaders/rigol.py +110 -49
- oscura/loaders/sigrok.py +201 -78
- oscura/loaders/tdms.py +81 -58
- oscura/loaders/tektronix.py +291 -174
- oscura/loaders/touchstone.py +182 -87
- oscura/loaders/tss.py +456 -0
- oscura/loaders/vcd.py +215 -117
- oscura/loaders/wav.py +155 -68
- oscura/reporting/__init__.py +9 -0
- oscura/reporting/analyze.py +352 -146
- oscura/reporting/argument_preparer.py +69 -14
- oscura/reporting/auto_report.py +97 -61
- oscura/reporting/batch.py +131 -58
- oscura/reporting/chart_selection.py +57 -45
- oscura/reporting/comparison.py +63 -17
- oscura/reporting/content/executive.py +76 -24
- oscura/reporting/core_formats/multi_format.py +11 -8
- oscura/reporting/engine.py +312 -158
- oscura/reporting/enhanced_reports.py +949 -0
- oscura/reporting/export.py +86 -43
- oscura/reporting/formatting/numbers.py +69 -42
- oscura/reporting/html.py +139 -58
- oscura/reporting/index.py +137 -65
- oscura/reporting/output.py +158 -67
- oscura/reporting/pdf.py +67 -102
- oscura/reporting/plots.py +191 -112
- oscura/reporting/sections.py +88 -47
- oscura/reporting/standards.py +104 -61
- oscura/reporting/summary_generator.py +75 -55
- oscura/reporting/tables.py +138 -54
- oscura/reporting/templates/enhanced/protocol_re.html +525 -0
- oscura/sessions/__init__.py +14 -23
- oscura/sessions/base.py +3 -3
- oscura/sessions/blackbox.py +106 -10
- oscura/sessions/generic.py +2 -2
- oscura/sessions/legacy.py +783 -0
- oscura/side_channel/__init__.py +63 -0
- oscura/side_channel/dpa.py +1025 -0
- oscura/utils/__init__.py +15 -1
- oscura/utils/bitwise.py +118 -0
- oscura/{builders → utils/builders}/__init__.py +1 -1
- oscura/{comparison → utils/comparison}/__init__.py +6 -6
- oscura/{comparison → utils/comparison}/compare.py +202 -101
- oscura/{comparison → utils/comparison}/golden.py +83 -63
- oscura/{comparison → utils/comparison}/limits.py +313 -89
- oscura/{comparison → utils/comparison}/mask.py +151 -45
- oscura/{comparison → utils/comparison}/trace_diff.py +1 -1
- oscura/{comparison → utils/comparison}/visualization.py +147 -89
- oscura/{component → utils/component}/__init__.py +3 -3
- oscura/{component → utils/component}/impedance.py +122 -58
- oscura/{component → utils/component}/reactive.py +165 -168
- oscura/{component → utils/component}/transmission_line.py +3 -3
- oscura/{filtering → utils/filtering}/__init__.py +6 -6
- oscura/{filtering → utils/filtering}/base.py +1 -1
- oscura/{filtering → utils/filtering}/convenience.py +2 -2
- oscura/{filtering → utils/filtering}/design.py +169 -93
- oscura/{filtering → utils/filtering}/filters.py +2 -2
- oscura/{filtering → utils/filtering}/introspection.py +2 -2
- oscura/utils/geometry.py +31 -0
- oscura/utils/imports.py +184 -0
- oscura/utils/lazy.py +1 -1
- oscura/{math → utils/math}/__init__.py +2 -2
- oscura/{math → utils/math}/arithmetic.py +114 -48
- oscura/{math → utils/math}/interpolation.py +139 -106
- oscura/utils/memory.py +129 -66
- oscura/utils/memory_advanced.py +92 -9
- oscura/utils/memory_extensions.py +10 -8
- oscura/{optimization → utils/optimization}/__init__.py +1 -1
- oscura/{optimization → utils/optimization}/search.py +2 -2
- oscura/utils/performance/__init__.py +58 -0
- oscura/utils/performance/caching.py +889 -0
- oscura/utils/performance/lsh_clustering.py +333 -0
- oscura/utils/performance/memory_optimizer.py +699 -0
- oscura/utils/performance/optimizations.py +675 -0
- oscura/utils/performance/parallel.py +654 -0
- oscura/utils/performance/profiling.py +661 -0
- oscura/{pipeline → utils/pipeline}/base.py +1 -1
- oscura/{pipeline → utils/pipeline}/composition.py +1 -1
- oscura/{pipeline → utils/pipeline}/parallel.py +3 -2
- oscura/{pipeline → utils/pipeline}/pipeline.py +1 -1
- oscura/{pipeline → utils/pipeline}/reverse_engineering.py +412 -221
- oscura/{search → utils/search}/__init__.py +3 -3
- oscura/{search → utils/search}/anomaly.py +188 -58
- oscura/utils/search/context.py +294 -0
- oscura/{search → utils/search}/pattern.py +138 -10
- oscura/utils/serial.py +51 -0
- oscura/utils/storage/__init__.py +61 -0
- oscura/utils/storage/database.py +1166 -0
- oscura/{streaming → utils/streaming}/chunked.py +302 -143
- oscura/{streaming → utils/streaming}/progressive.py +1 -1
- oscura/{streaming → utils/streaming}/realtime.py +3 -2
- oscura/{triggering → utils/triggering}/__init__.py +6 -6
- oscura/{triggering → utils/triggering}/base.py +6 -6
- oscura/{triggering → utils/triggering}/edge.py +2 -2
- oscura/{triggering → utils/triggering}/pattern.py +2 -2
- oscura/{triggering → utils/triggering}/pulse.py +115 -74
- oscura/{triggering → utils/triggering}/window.py +2 -2
- oscura/utils/validation.py +32 -0
- oscura/validation/__init__.py +121 -0
- oscura/{compliance → validation/compliance}/__init__.py +5 -5
- oscura/{compliance → validation/compliance}/advanced.py +5 -5
- oscura/{compliance → validation/compliance}/masks.py +1 -1
- oscura/{compliance → validation/compliance}/reporting.py +127 -53
- oscura/{compliance → validation/compliance}/testing.py +114 -52
- oscura/validation/compliance_tests.py +915 -0
- oscura/validation/fuzzer.py +990 -0
- oscura/validation/grammar_tests.py +596 -0
- oscura/validation/grammar_validator.py +904 -0
- oscura/validation/hil_testing.py +977 -0
- oscura/{quality → validation/quality}/__init__.py +4 -4
- oscura/{quality → validation/quality}/ensemble.py +251 -171
- oscura/{quality → validation/quality}/explainer.py +3 -3
- oscura/{quality → validation/quality}/scoring.py +1 -1
- oscura/{quality → validation/quality}/warnings.py +4 -4
- oscura/validation/regression_suite.py +808 -0
- oscura/validation/replay.py +788 -0
- oscura/{testing → validation/testing}/__init__.py +2 -2
- oscura/{testing → validation/testing}/synthetic.py +5 -5
- oscura/visualization/__init__.py +9 -0
- oscura/visualization/accessibility.py +1 -1
- oscura/visualization/annotations.py +64 -67
- oscura/visualization/colors.py +7 -7
- oscura/visualization/digital.py +180 -81
- oscura/visualization/eye.py +236 -85
- oscura/visualization/interactive.py +320 -143
- oscura/visualization/jitter.py +587 -247
- oscura/visualization/layout.py +169 -134
- oscura/visualization/optimization.py +103 -52
- oscura/visualization/palettes.py +1 -1
- oscura/visualization/power.py +427 -211
- oscura/visualization/power_extended.py +626 -297
- oscura/visualization/presets.py +2 -0
- oscura/visualization/protocols.py +495 -181
- oscura/visualization/render.py +79 -63
- oscura/visualization/reverse_engineering.py +171 -124
- oscura/visualization/signal_integrity.py +460 -279
- oscura/visualization/specialized.py +190 -100
- oscura/visualization/spectral.py +670 -255
- oscura/visualization/thumbnails.py +166 -137
- oscura/visualization/waveform.py +150 -63
- oscura/workflows/__init__.py +3 -0
- oscura/{batch → workflows/batch}/__init__.py +5 -5
- oscura/{batch → workflows/batch}/advanced.py +150 -75
- oscura/workflows/batch/aggregate.py +531 -0
- oscura/workflows/batch/analyze.py +236 -0
- oscura/{batch → workflows/batch}/logging.py +2 -2
- oscura/{batch → workflows/batch}/metrics.py +1 -1
- oscura/workflows/complete_re.py +1144 -0
- oscura/workflows/compliance.py +44 -54
- oscura/workflows/digital.py +197 -51
- oscura/workflows/legacy/__init__.py +12 -0
- oscura/{workflow → workflows/legacy}/dag.py +4 -1
- oscura/workflows/multi_trace.py +9 -9
- oscura/workflows/power.py +42 -62
- oscura/workflows/protocol.py +82 -49
- oscura/workflows/reverse_engineering.py +351 -150
- oscura/workflows/signal_integrity.py +157 -82
- oscura-0.7.0.dist-info/METADATA +661 -0
- oscura-0.7.0.dist-info/RECORD +591 -0
- oscura/batch/aggregate.py +0 -300
- oscura/batch/analyze.py +0 -139
- oscura/dsl/__init__.py +0 -73
- oscura/exceptions.py +0 -59
- oscura/exploratory/fuzzy.py +0 -513
- oscura/exploratory/sync.py +0 -384
- oscura/exporters/__init__.py +0 -94
- oscura/exporters/csv.py +0 -303
- oscura/exporters/exporters.py +0 -44
- oscura/exporters/hdf5.py +0 -217
- oscura/exporters/html_export.py +0 -701
- oscura/exporters/json_export.py +0 -291
- oscura/exporters/markdown_export.py +0 -367
- oscura/exporters/matlab_export.py +0 -354
- oscura/exporters/npz_export.py +0 -219
- oscura/exporters/spice_export.py +0 -210
- oscura/search/context.py +0 -149
- oscura/session/__init__.py +0 -34
- oscura/session/annotations.py +0 -289
- oscura/session/history.py +0 -313
- oscura/session/session.py +0 -520
- oscura/workflow/__init__.py +0 -13
- oscura-0.5.1.dist-info/METADATA +0 -583
- oscura-0.5.1.dist-info/RECORD +0 -481
- /oscura/core/{config.py → config/legacy.py} +0 -0
- /oscura/{extensibility → core/extensibility}/__init__.py +0 -0
- /oscura/{extensibility → core/extensibility}/registry.py +0 -0
- /oscura/{plugins → core/plugins}/isolation.py +0 -0
- /oscura/{schemas → core/schemas}/bus_configuration.json +0 -0
- /oscura/{builders → utils/builders}/signal_builder.py +0 -0
- /oscura/{optimization → utils/optimization}/parallel.py +0 -0
- /oscura/{pipeline → utils/pipeline}/__init__.py +0 -0
- /oscura/{streaming → utils/streaming}/__init__.py +0 -0
- {oscura-0.5.1.dist-info → oscura-0.7.0.dist-info}/WHEEL +0 -0
- {oscura-0.5.1.dist-info → oscura-0.7.0.dist-info}/entry_points.txt +0 -0
- {oscura-0.5.1.dist-info → oscura-0.7.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -469,17 +469,9 @@ class FieldInferrer:
|
|
|
469
469
|
"""
|
|
470
470
|
size = end - start
|
|
471
471
|
name = f"field_{index}"
|
|
472
|
+
raw_values = self._extract_field_values(messages, start, end)
|
|
472
473
|
|
|
473
|
-
|
|
474
|
-
values = []
|
|
475
|
-
raw_values = []
|
|
476
|
-
for msg in messages:
|
|
477
|
-
if len(msg) >= end:
|
|
478
|
-
field_bytes = msg[start:end]
|
|
479
|
-
raw_values.append(field_bytes)
|
|
480
|
-
values.append(field_bytes)
|
|
481
|
-
|
|
482
|
-
if not values:
|
|
474
|
+
if not raw_values:
|
|
483
475
|
return InferredField(
|
|
484
476
|
name=name,
|
|
485
477
|
offset=start,
|
|
@@ -488,31 +480,60 @@ class FieldInferrer:
|
|
|
488
480
|
confidence=0.0,
|
|
489
481
|
)
|
|
490
482
|
|
|
491
|
-
#
|
|
483
|
+
# Analyze field properties
|
|
492
484
|
unique_values = set(raw_values)
|
|
493
485
|
is_constant = len(unique_values) == 1
|
|
486
|
+
is_sequence = self._check_sequence(raw_values, size, is_constant)
|
|
487
|
+
is_checksum = self._check_checksum(messages, start, size)
|
|
494
488
|
|
|
495
|
-
#
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
int_values = [int.from_bytes(v, "big") for v in raw_values]
|
|
499
|
-
is_sequence = self._is_sequence(int_values)
|
|
489
|
+
# Infer type and create sample values
|
|
490
|
+
inferred_type, endianness, confidence = self._infer_type(raw_values, size)
|
|
491
|
+
sample_values = self._create_sample_values(raw_values[:5], inferred_type, endianness)
|
|
500
492
|
|
|
501
|
-
#
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
score = self._check_checksum_correlation(messages, start, size)
|
|
505
|
-
is_checksum = score > 0.7
|
|
493
|
+
# Cast to Literal types for type checker
|
|
494
|
+
type_literal = self._cast_type_literal(inferred_type)
|
|
495
|
+
endianness_literal = self._cast_endianness_literal(endianness)
|
|
506
496
|
|
|
507
|
-
|
|
508
|
-
|
|
497
|
+
return InferredField(
|
|
498
|
+
name=name,
|
|
499
|
+
offset=start,
|
|
500
|
+
size=size,
|
|
501
|
+
inferred_type=type_literal,
|
|
502
|
+
endianness=endianness_literal,
|
|
503
|
+
is_constant=is_constant,
|
|
504
|
+
is_sequence=is_sequence,
|
|
505
|
+
is_checksum=is_checksum,
|
|
506
|
+
constant_value=raw_values[0] if is_constant else None,
|
|
507
|
+
confidence=confidence,
|
|
508
|
+
sample_values=sample_values,
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
def _extract_field_values(self, messages: Sequence[bytes], start: int, end: int) -> list[bytes]:
|
|
512
|
+
"""Extract field values from messages."""
|
|
513
|
+
return [msg[start:end] for msg in messages if len(msg) >= end]
|
|
514
|
+
|
|
515
|
+
def _check_sequence(self, raw_values: list[bytes], size: int, is_constant: bool) -> bool:
|
|
516
|
+
"""Check if field values form a sequence."""
|
|
517
|
+
if is_constant or size not in [1, 2, 4, 8]:
|
|
518
|
+
return False
|
|
519
|
+
int_values = [int.from_bytes(v, "big") for v in raw_values]
|
|
520
|
+
return self._is_sequence(int_values)
|
|
509
521
|
|
|
510
|
-
|
|
522
|
+
def _check_checksum(self, messages: Sequence[bytes], start: int, size: int) -> bool:
|
|
523
|
+
"""Check if field appears to be a checksum."""
|
|
524
|
+
if start < min(len(m) for m in messages) - 4:
|
|
525
|
+
return False
|
|
526
|
+
score = self._check_checksum_correlation(messages, start, size)
|
|
527
|
+
return score > 0.7
|
|
528
|
+
|
|
529
|
+
def _create_sample_values(
|
|
530
|
+
self, raw_values: list[bytes], inferred_type: str, endianness: str
|
|
531
|
+
) -> list[int | str]:
|
|
532
|
+
"""Create sample values for debugging."""
|
|
511
533
|
sample_values: list[int | str] = []
|
|
512
|
-
for v in raw_values
|
|
513
|
-
if inferred_type.startswith("uint"
|
|
534
|
+
for v in raw_values:
|
|
535
|
+
if inferred_type.startswith(("uint", "int")):
|
|
514
536
|
try:
|
|
515
|
-
# Cast endianness to Literal type for type checker
|
|
516
537
|
byte_order: Literal["big", "little"] = (
|
|
517
538
|
"big" if endianness == "n/a" else endianness # type: ignore[assignment]
|
|
518
539
|
)
|
|
@@ -526,38 +547,31 @@ class FieldInferrer:
|
|
|
526
547
|
sample_values.append(v.hex())
|
|
527
548
|
else:
|
|
528
549
|
sample_values.append(v.hex())
|
|
550
|
+
return sample_values
|
|
529
551
|
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
552
|
+
def _cast_type_literal(
|
|
553
|
+
self, inferred_type: str
|
|
554
|
+
) -> Literal[
|
|
555
|
+
"uint8",
|
|
556
|
+
"uint16",
|
|
557
|
+
"uint32",
|
|
558
|
+
"uint64",
|
|
559
|
+
"int8",
|
|
560
|
+
"int16",
|
|
561
|
+
"int32",
|
|
562
|
+
"int64",
|
|
563
|
+
"float32",
|
|
564
|
+
"float64",
|
|
565
|
+
"bytes",
|
|
566
|
+
"string",
|
|
567
|
+
"unknown",
|
|
568
|
+
]:
|
|
569
|
+
"""Cast inferred type to Literal for type checker."""
|
|
570
|
+
return inferred_type # type: ignore[return-value]
|
|
547
571
|
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
size=size,
|
|
552
|
-
inferred_type=inferred_type_literal,
|
|
553
|
-
endianness=endianness_literal,
|
|
554
|
-
is_constant=is_constant,
|
|
555
|
-
is_sequence=is_sequence,
|
|
556
|
-
is_checksum=is_checksum,
|
|
557
|
-
constant_value=raw_values[0] if is_constant else None,
|
|
558
|
-
confidence=confidence,
|
|
559
|
-
sample_values=sample_values,
|
|
560
|
-
)
|
|
572
|
+
def _cast_endianness_literal(self, endianness: str) -> Literal["big", "little", "n/a"]:
|
|
573
|
+
"""Cast endianness to Literal for type checker."""
|
|
574
|
+
return endianness # type: ignore[return-value]
|
|
561
575
|
|
|
562
576
|
def _infer_type(
|
|
563
577
|
self,
|
|
@@ -576,59 +590,114 @@ class FieldInferrer:
|
|
|
576
590
|
if not values:
|
|
577
591
|
return "unknown", "n/a", 0.0
|
|
578
592
|
|
|
579
|
-
# Check for string
|
|
593
|
+
# Check for string first
|
|
594
|
+
string_result = self._check_string_type(values, size)
|
|
595
|
+
if string_result is not None:
|
|
596
|
+
return string_result
|
|
597
|
+
|
|
598
|
+
# Infer based on field size
|
|
599
|
+
if size == 1:
|
|
600
|
+
return "uint8", "n/a", 0.9
|
|
601
|
+
elif size == 2:
|
|
602
|
+
return self._infer_uint16_type(values)
|
|
603
|
+
elif size == 4:
|
|
604
|
+
return self._infer_4byte_type(values)
|
|
605
|
+
elif size == 8:
|
|
606
|
+
return self._infer_uint64_type(values)
|
|
607
|
+
else:
|
|
608
|
+
return "bytes", "n/a", 0.6
|
|
609
|
+
|
|
610
|
+
def _check_string_type(self, values: list[bytes], size: int) -> tuple[str, str, float] | None:
|
|
611
|
+
"""Check if values represent string data.
|
|
612
|
+
|
|
613
|
+
Args:
|
|
614
|
+
values: Field values to check.
|
|
615
|
+
size: Field size.
|
|
616
|
+
|
|
617
|
+
Returns:
|
|
618
|
+
Type tuple if string, None otherwise.
|
|
619
|
+
"""
|
|
580
620
|
printable_ratio = sum(
|
|
581
621
|
1 for v in values for b in v if 32 <= b <= 126 or b in (9, 10, 13)
|
|
582
622
|
) / (len(values) * size)
|
|
583
623
|
|
|
584
624
|
if printable_ratio > 0.8:
|
|
585
625
|
return "string", "n/a", printable_ratio
|
|
626
|
+
return None
|
|
586
627
|
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
return "uint8", "n/a", 0.9
|
|
628
|
+
def _infer_uint16_type(self, values: list[bytes]) -> tuple[str, str, float]:
|
|
629
|
+
"""Infer uint16 type and detect endianness.
|
|
590
630
|
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
be_variance = np.var([int.from_bytes(v, "big") for v in values])
|
|
594
|
-
le_variance = np.var([int.from_bytes(v, "little") for v in values])
|
|
631
|
+
Args:
|
|
632
|
+
values: Field values.
|
|
595
633
|
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
634
|
+
Returns:
|
|
635
|
+
Type tuple with endianness.
|
|
636
|
+
"""
|
|
637
|
+
endian = self._detect_endianness(values)
|
|
638
|
+
return "uint16", endian, 0.8
|
|
600
639
|
|
|
601
|
-
|
|
640
|
+
def _infer_4byte_type(self, values: list[bytes]) -> tuple[str, str, float]:
|
|
641
|
+
"""Infer 4-byte type (float32 or uint32).
|
|
602
642
|
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
float_valid = 0
|
|
606
|
-
for v in values:
|
|
607
|
-
try:
|
|
608
|
-
f = struct.unpack(">f", v)[0]
|
|
609
|
-
if not (np.isnan(f) or np.isinf(f)) and -1e10 < f < 1e10:
|
|
610
|
-
float_valid += 1
|
|
611
|
-
except Exception:
|
|
612
|
-
pass
|
|
643
|
+
Args:
|
|
644
|
+
values: Field values.
|
|
613
645
|
|
|
614
|
-
|
|
615
|
-
|
|
646
|
+
Returns:
|
|
647
|
+
Type tuple with endianness.
|
|
648
|
+
"""
|
|
649
|
+
# Check if float32
|
|
650
|
+
if self._is_valid_float32(values):
|
|
651
|
+
return "float32", "big", 0.7
|
|
616
652
|
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
endian = "big" if be_variance < le_variance else "little"
|
|
621
|
-
return "uint32", endian, 0.8
|
|
653
|
+
# Otherwise uint32
|
|
654
|
+
endian = self._detect_endianness(values)
|
|
655
|
+
return "uint32", endian, 0.8
|
|
622
656
|
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
be_variance = np.var([int.from_bytes(v, "big") for v in values])
|
|
626
|
-
le_variance = np.var([int.from_bytes(v, "little") for v in values])
|
|
627
|
-
endian = "big" if be_variance < le_variance else "little"
|
|
628
|
-
return "uint64", endian, 0.7
|
|
657
|
+
def _infer_uint64_type(self, values: list[bytes]) -> tuple[str, str, float]:
|
|
658
|
+
"""Infer uint64 type and detect endianness.
|
|
629
659
|
|
|
630
|
-
|
|
631
|
-
|
|
660
|
+
Args:
|
|
661
|
+
values: Field values.
|
|
662
|
+
|
|
663
|
+
Returns:
|
|
664
|
+
Type tuple with endianness.
|
|
665
|
+
"""
|
|
666
|
+
endian = self._detect_endianness(values)
|
|
667
|
+
return "uint64", endian, 0.7
|
|
668
|
+
|
|
669
|
+
def _detect_endianness(self, values: list[bytes]) -> str:
|
|
670
|
+
"""Detect endianness by comparing variance.
|
|
671
|
+
|
|
672
|
+
Args:
|
|
673
|
+
values: Field values.
|
|
674
|
+
|
|
675
|
+
Returns:
|
|
676
|
+
Endianness string ("big" or "little").
|
|
677
|
+
"""
|
|
678
|
+
be_variance = np.var([int.from_bytes(v, "big") for v in values])
|
|
679
|
+
le_variance = np.var([int.from_bytes(v, "little") for v in values])
|
|
680
|
+
return "big" if be_variance < le_variance else "little"
|
|
681
|
+
|
|
682
|
+
def _is_valid_float32(self, values: list[bytes]) -> bool:
|
|
683
|
+
"""Check if values are valid float32 numbers.
|
|
684
|
+
|
|
685
|
+
Args:
|
|
686
|
+
values: Field values to check.
|
|
687
|
+
|
|
688
|
+
Returns:
|
|
689
|
+
True if majority are valid floats.
|
|
690
|
+
"""
|
|
691
|
+
float_valid = 0
|
|
692
|
+
for v in values:
|
|
693
|
+
try:
|
|
694
|
+
f = struct.unpack(">f", v)[0]
|
|
695
|
+
if not (np.isnan(f) or np.isinf(f)) and -1e10 < f < 1e10:
|
|
696
|
+
float_valid += 1
|
|
697
|
+
except Exception:
|
|
698
|
+
pass
|
|
699
|
+
|
|
700
|
+
return float_valid / len(values) > 0.8
|
|
632
701
|
|
|
633
702
|
def _is_sequence(self, values: list[int]) -> bool:
|
|
634
703
|
"""Check if values form a sequence.
|
|
@@ -827,30 +896,82 @@ def diff_payloads(payload_a: bytes, payload_b: bytes) -> PayloadDiff:
|
|
|
827
896
|
>>> print(f"Common prefix: {diff.common_prefix_length} bytes")
|
|
828
897
|
>>> print(f"Different bytes: {len(diff.differences)}")
|
|
829
898
|
"""
|
|
830
|
-
# Find common prefix
|
|
831
|
-
common_prefix = 0
|
|
832
899
|
min_len = min(len(payload_a), len(payload_b))
|
|
900
|
+
|
|
901
|
+
common_prefix = _find_common_prefix(payload_a, payload_b, min_len)
|
|
902
|
+
common_suffix = _find_common_suffix(payload_a, payload_b, min_len, common_prefix)
|
|
903
|
+
differences = _find_payload_differences(payload_a, payload_b, min_len)
|
|
904
|
+
|
|
905
|
+
similarity = _calculate_similarity(payload_a, payload_b, min_len, differences)
|
|
906
|
+
edit_distance = _levenshtein_distance(payload_a, payload_b)
|
|
907
|
+
|
|
908
|
+
return PayloadDiff(
|
|
909
|
+
common_prefix_length=common_prefix,
|
|
910
|
+
common_suffix_length=common_suffix,
|
|
911
|
+
differences=differences,
|
|
912
|
+
similarity=similarity,
|
|
913
|
+
edit_distance=edit_distance,
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
|
|
917
|
+
def _find_common_prefix(payload_a: bytes, payload_b: bytes, min_len: int) -> int:
|
|
918
|
+
"""Find length of common prefix.
|
|
919
|
+
|
|
920
|
+
Args:
|
|
921
|
+
payload_a: First payload.
|
|
922
|
+
payload_b: Second payload.
|
|
923
|
+
min_len: Minimum payload length.
|
|
924
|
+
|
|
925
|
+
Returns:
|
|
926
|
+
Length of common prefix in bytes.
|
|
927
|
+
"""
|
|
833
928
|
for i in range(min_len):
|
|
834
|
-
if payload_a[i]
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
break
|
|
929
|
+
if payload_a[i] != payload_b[i]:
|
|
930
|
+
return i
|
|
931
|
+
return min_len
|
|
838
932
|
|
|
839
|
-
|
|
840
|
-
|
|
933
|
+
|
|
934
|
+
def _find_common_suffix(
|
|
935
|
+
payload_a: bytes, payload_b: bytes, min_len: int, common_prefix: int
|
|
936
|
+
) -> int:
|
|
937
|
+
"""Find length of common suffix.
|
|
938
|
+
|
|
939
|
+
Args:
|
|
940
|
+
payload_a: First payload.
|
|
941
|
+
payload_b: Second payload.
|
|
942
|
+
min_len: Minimum payload length.
|
|
943
|
+
common_prefix: Length of common prefix.
|
|
944
|
+
|
|
945
|
+
Returns:
|
|
946
|
+
Length of common suffix in bytes.
|
|
947
|
+
"""
|
|
841
948
|
for i in range(1, min_len - common_prefix + 1):
|
|
842
|
-
if payload_a[-i]
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
949
|
+
if payload_a[-i] != payload_b[-i]:
|
|
950
|
+
return i - 1
|
|
951
|
+
return min_len - common_prefix
|
|
952
|
+
|
|
953
|
+
|
|
954
|
+
def _find_payload_differences(
|
|
955
|
+
payload_a: bytes, payload_b: bytes, min_len: int
|
|
956
|
+
) -> list[tuple[int, int, int]]:
|
|
957
|
+
"""Find all byte differences between payloads.
|
|
958
|
+
|
|
959
|
+
Args:
|
|
960
|
+
payload_a: First payload.
|
|
961
|
+
payload_b: Second payload.
|
|
962
|
+
min_len: Minimum payload length.
|
|
846
963
|
|
|
847
|
-
|
|
964
|
+
Returns:
|
|
965
|
+
List of (offset, byte_a, byte_b) tuples (-1 for missing bytes).
|
|
966
|
+
"""
|
|
848
967
|
differences = []
|
|
968
|
+
|
|
969
|
+
# Differences in overlapping region
|
|
849
970
|
for i in range(min_len):
|
|
850
971
|
if payload_a[i] != payload_b[i]:
|
|
851
972
|
differences.append((i, payload_a[i], payload_b[i]))
|
|
852
973
|
|
|
853
|
-
#
|
|
974
|
+
# Length differences
|
|
854
975
|
if len(payload_a) > len(payload_b):
|
|
855
976
|
for i in range(len(payload_b), len(payload_a)):
|
|
856
977
|
differences.append((i, payload_a[i], -1))
|
|
@@ -858,24 +979,29 @@ def diff_payloads(payload_a: bytes, payload_b: bytes) -> PayloadDiff:
|
|
|
858
979
|
for i in range(len(payload_a), len(payload_b)):
|
|
859
980
|
differences.append((i, -1, payload_b[i]))
|
|
860
981
|
|
|
861
|
-
|
|
982
|
+
return differences
|
|
983
|
+
|
|
984
|
+
|
|
985
|
+
def _calculate_similarity(
|
|
986
|
+
payload_a: bytes, payload_b: bytes, min_len: int, differences: list[tuple[int, int, int]]
|
|
987
|
+
) -> float:
|
|
988
|
+
"""Calculate payload similarity ratio.
|
|
989
|
+
|
|
990
|
+
Args:
|
|
991
|
+
payload_a: First payload.
|
|
992
|
+
payload_b: Second payload.
|
|
993
|
+
min_len: Minimum payload length.
|
|
994
|
+
differences: List of differences.
|
|
995
|
+
|
|
996
|
+
Returns:
|
|
997
|
+
Similarity ratio (0.0-1.0).
|
|
998
|
+
"""
|
|
862
999
|
max_len = max(len(payload_a), len(payload_b))
|
|
863
1000
|
if max_len == 0:
|
|
864
|
-
|
|
865
|
-
else:
|
|
866
|
-
matching = min_len - len([d for d in differences if d[0] < min_len])
|
|
867
|
-
similarity = matching / max_len
|
|
1001
|
+
return 1.0
|
|
868
1002
|
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
return PayloadDiff(
|
|
873
|
-
common_prefix_length=common_prefix,
|
|
874
|
-
common_suffix_length=common_suffix,
|
|
875
|
-
differences=differences,
|
|
876
|
-
similarity=similarity,
|
|
877
|
-
edit_distance=edit_distance,
|
|
878
|
-
)
|
|
1003
|
+
matching = min_len - len([d for d in differences if d[0] < min_len])
|
|
1004
|
+
return matching / max_len
|
|
879
1005
|
|
|
880
1006
|
|
|
881
1007
|
def find_common_bytes(payloads: Sequence[bytes]) -> bytes:
|
|
@@ -1008,7 +1134,7 @@ def compute_similarity(
|
|
|
1008
1134
|
def cluster_payloads(
|
|
1009
1135
|
payloads: Sequence[bytes],
|
|
1010
1136
|
threshold: float = 0.8,
|
|
1011
|
-
algorithm: Literal["greedy", "dbscan"] = "greedy",
|
|
1137
|
+
algorithm: Literal["greedy", "dbscan", "lsh"] = "greedy",
|
|
1012
1138
|
) -> list[PayloadCluster]:
|
|
1013
1139
|
"""Cluster similar payloads together.
|
|
1014
1140
|
|
|
@@ -1017,7 +1143,7 @@ def cluster_payloads(
|
|
|
1017
1143
|
Args:
|
|
1018
1144
|
payloads: List of payloads to cluster.
|
|
1019
1145
|
threshold: Similarity threshold for clustering.
|
|
1020
|
-
algorithm: Clustering algorithm.
|
|
1146
|
+
algorithm: Clustering algorithm (greedy: O(n²), lsh: O(n log n)).
|
|
1021
1147
|
|
|
1022
1148
|
Returns:
|
|
1023
1149
|
List of PayloadCluster objects.
|
|
@@ -1026,11 +1152,19 @@ def cluster_payloads(
|
|
|
1026
1152
|
>>> clusters = cluster_payloads(payloads, threshold=0.85)
|
|
1027
1153
|
>>> for c in clusters:
|
|
1028
1154
|
... print(f"Cluster {c.cluster_id}: {c.size} payloads")
|
|
1155
|
+
|
|
1156
|
+
>>> # For large datasets (>1000 payloads), use LSH for 100-1000x speedup
|
|
1157
|
+
>>> clusters = cluster_payloads(payloads, threshold=0.85, algorithm="lsh")
|
|
1029
1158
|
"""
|
|
1030
1159
|
if not payloads:
|
|
1031
1160
|
return []
|
|
1032
1161
|
|
|
1033
|
-
if algorithm == "
|
|
1162
|
+
if algorithm == "lsh":
|
|
1163
|
+
# Use LSH for O(n log n) performance on large datasets
|
|
1164
|
+
from oscura.utils.performance.lsh_clustering import cluster_payloads_lsh
|
|
1165
|
+
|
|
1166
|
+
return cluster_payloads_lsh(payloads, threshold=threshold)
|
|
1167
|
+
elif algorithm == "greedy":
|
|
1034
1168
|
return _cluster_greedy_optimized(payloads, threshold)
|
|
1035
1169
|
# algorithm == "dbscan"
|
|
1036
1170
|
return _cluster_dbscan(payloads, threshold)
|
|
@@ -1103,78 +1237,82 @@ def _levenshtein_distance(a: bytes, b: bytes) -> int:
|
|
|
1103
1237
|
return previous_row[-1]
|
|
1104
1238
|
|
|
1105
1239
|
|
|
1106
|
-
def
|
|
1107
|
-
"""
|
|
1108
|
-
|
|
1109
|
-
Uses length-based filtering and sampling to quickly reject dissimilar payloads.
|
|
1110
|
-
Returns None if payloads are likely similar (needs full check),
|
|
1111
|
-
or a similarity value if they can be quickly determined.
|
|
1240
|
+
def _check_length_similarity(len_a: int, len_b: int, threshold: float) -> float | None:
|
|
1241
|
+
"""Check if length difference allows similarity threshold.
|
|
1112
1242
|
|
|
1113
1243
|
Args:
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
threshold: Similarity threshold
|
|
1244
|
+
len_a: Length of first payload.
|
|
1245
|
+
len_b: Length of second payload.
|
|
1246
|
+
threshold: Similarity threshold.
|
|
1117
1247
|
|
|
1118
1248
|
Returns:
|
|
1119
|
-
Similarity
|
|
1249
|
+
Similarity if can be determined from length, None otherwise.
|
|
1120
1250
|
"""
|
|
1121
|
-
len_a = len(payload_a)
|
|
1122
|
-
len_b = len(payload_b)
|
|
1123
|
-
|
|
1124
1251
|
# Empty payloads
|
|
1125
1252
|
if len_a == 0 and len_b == 0:
|
|
1126
1253
|
return 1.0
|
|
1127
1254
|
if len_a == 0 or len_b == 0:
|
|
1128
1255
|
return 0.0
|
|
1129
1256
|
|
|
1130
|
-
#
|
|
1131
|
-
# similarity can't exceed threshold
|
|
1257
|
+
# Maximum possible similarity given length difference
|
|
1132
1258
|
max_len = max(len_a, len_b)
|
|
1133
1259
|
min_len = min(len_a, len_b)
|
|
1134
|
-
_length_diff = max_len - min_len
|
|
1135
|
-
|
|
1136
|
-
# Maximum possible similarity given length difference
|
|
1137
1260
|
max_possible_similarity = min_len / max_len
|
|
1261
|
+
|
|
1138
1262
|
if max_possible_similarity < threshold:
|
|
1139
1263
|
return max_possible_similarity
|
|
1140
1264
|
|
|
1141
|
-
|
|
1142
|
-
if len_a == len_b:
|
|
1143
|
-
# Sample comparison for large payloads
|
|
1144
|
-
if len_a > 50:
|
|
1145
|
-
# Sample first 16, last 16, and some middle bytes
|
|
1146
|
-
sample_size = min(48, len_a)
|
|
1147
|
-
mismatches = 0
|
|
1148
|
-
|
|
1149
|
-
# First 16 bytes
|
|
1150
|
-
for i in range(min(16, len_a)):
|
|
1151
|
-
if payload_a[i] != payload_b[i]:
|
|
1152
|
-
mismatches += 1
|
|
1153
|
-
|
|
1154
|
-
# Last 16 bytes
|
|
1155
|
-
for i in range(1, min(17, len_a + 1)):
|
|
1156
|
-
if payload_a[-i] != payload_b[-i]:
|
|
1157
|
-
mismatches += 1
|
|
1158
|
-
|
|
1159
|
-
# Middle samples (len_a > 32 always true here since len_a > 50)
|
|
1160
|
-
step = (len_a - 32) // 16
|
|
1161
|
-
if step > 0:
|
|
1162
|
-
for i in range(16, len_a - 16, step):
|
|
1163
|
-
if payload_a[i] != payload_b[i]:
|
|
1164
|
-
mismatches += 1
|
|
1165
|
-
|
|
1166
|
-
# Estimate similarity from sample
|
|
1167
|
-
estimated_similarity = 1.0 - (mismatches / sample_size)
|
|
1265
|
+
return None
|
|
1168
1266
|
|
|
1169
|
-
# If sample shows very low similarity, reject early
|
|
1170
|
-
if estimated_similarity < threshold * 0.8:
|
|
1171
|
-
return estimated_similarity
|
|
1172
1267
|
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
return matches / len_a
|
|
1268
|
+
def _sample_hamming_similarity(payload_a: bytes, payload_b: bytes, length: int) -> float:
|
|
1269
|
+
"""Compute similarity by sampling first 16, last 16, and middle bytes.
|
|
1176
1270
|
|
|
1177
|
-
|
|
1271
|
+
Args:
|
|
1272
|
+
payload_a: First payload.
|
|
1273
|
+
payload_b: Second payload.
|
|
1274
|
+
length: Length of payloads (must be equal).
|
|
1275
|
+
|
|
1276
|
+
Returns:
|
|
1277
|
+
Estimated similarity based on samples.
|
|
1278
|
+
"""
|
|
1279
|
+
sample_size = min(48, length)
|
|
1280
|
+
mismatches = 0
|
|
1281
|
+
|
|
1282
|
+
# First 16 bytes
|
|
1283
|
+
for i in range(min(16, length)):
|
|
1284
|
+
if payload_a[i] != payload_b[i]:
|
|
1285
|
+
mismatches += 1
|
|
1286
|
+
|
|
1287
|
+
# Last 16 bytes
|
|
1288
|
+
for i in range(1, min(17, length + 1)):
|
|
1289
|
+
if payload_a[-i] != payload_b[-i]:
|
|
1290
|
+
mismatches += 1
|
|
1291
|
+
|
|
1292
|
+
# Middle samples (only if length > 32)
|
|
1293
|
+
step = (length - 32) // 16
|
|
1294
|
+
if step > 0:
|
|
1295
|
+
for i in range(16, length - 16, step):
|
|
1296
|
+
if payload_a[i] != payload_b[i]:
|
|
1297
|
+
mismatches += 1
|
|
1298
|
+
|
|
1299
|
+
return 1.0 - (mismatches / sample_size)
|
|
1300
|
+
|
|
1301
|
+
|
|
1302
|
+
def _prefix_suffix_similarity(
|
|
1303
|
+
payload_a: bytes, payload_b: bytes, min_len: int, max_len: int
|
|
1304
|
+
) -> float:
|
|
1305
|
+
"""Estimate similarity from common prefix and suffix.
|
|
1306
|
+
|
|
1307
|
+
Args:
|
|
1308
|
+
payload_a: First payload.
|
|
1309
|
+
payload_b: Second payload.
|
|
1310
|
+
min_len: Minimum length.
|
|
1311
|
+
max_len: Maximum length.
|
|
1312
|
+
|
|
1313
|
+
Returns:
|
|
1314
|
+
Estimated similarity.
|
|
1315
|
+
"""
|
|
1178
1316
|
common_prefix = 0
|
|
1179
1317
|
for i in range(min_len):
|
|
1180
1318
|
if payload_a[i] == payload_b[i]:
|
|
@@ -1189,9 +1327,51 @@ def _fast_similarity(payload_a: bytes, payload_b: bytes, threshold: float) -> fl
|
|
|
1189
1327
|
else:
|
|
1190
1328
|
break
|
|
1191
1329
|
|
|
1192
|
-
# Estimate similarity from prefix/suffix
|
|
1193
1330
|
common_bytes = common_prefix + common_suffix
|
|
1194
|
-
|
|
1331
|
+
return common_bytes / max_len
|
|
1332
|
+
|
|
1333
|
+
|
|
1334
|
+
def _fast_similarity(payload_a: bytes, payload_b: bytes, threshold: float) -> float | None:
|
|
1335
|
+
"""Fast similarity check with early termination.
|
|
1336
|
+
|
|
1337
|
+
Uses length-based filtering and sampling to quickly reject dissimilar payloads.
|
|
1338
|
+
Returns None if payloads are likely similar (needs full check),
|
|
1339
|
+
or a similarity value if they can be quickly determined.
|
|
1340
|
+
|
|
1341
|
+
Args:
|
|
1342
|
+
payload_a: First payload.
|
|
1343
|
+
payload_b: Second payload.
|
|
1344
|
+
threshold: Similarity threshold for clustering.
|
|
1345
|
+
|
|
1346
|
+
Returns:
|
|
1347
|
+
Similarity value if quickly determined, None if full check needed.
|
|
1348
|
+
"""
|
|
1349
|
+
len_a = len(payload_a)
|
|
1350
|
+
len_b = len(payload_b)
|
|
1351
|
+
|
|
1352
|
+
# Check length-based similarity
|
|
1353
|
+
length_result = _check_length_similarity(len_a, len_b, threshold)
|
|
1354
|
+
if length_result is not None:
|
|
1355
|
+
return length_result
|
|
1356
|
+
|
|
1357
|
+
# For same-length payloads, use fast hamming similarity
|
|
1358
|
+
if len_a == len_b:
|
|
1359
|
+
# Sample comparison for large payloads
|
|
1360
|
+
if len_a > 50:
|
|
1361
|
+
estimated_similarity = _sample_hamming_similarity(payload_a, payload_b, len_a)
|
|
1362
|
+
|
|
1363
|
+
# If sample shows very low similarity, reject early
|
|
1364
|
+
if estimated_similarity < threshold * 0.8:
|
|
1365
|
+
return estimated_similarity
|
|
1366
|
+
|
|
1367
|
+
# Full hamming comparison for same-length payloads (faster than Levenshtein)
|
|
1368
|
+
matches = sum(a == b for a, b in zip(payload_a, payload_b, strict=True))
|
|
1369
|
+
return matches / len_a
|
|
1370
|
+
|
|
1371
|
+
# For different-length payloads, use common prefix/suffix heuristic
|
|
1372
|
+
max_len = max(len_a, len_b)
|
|
1373
|
+
min_len = min(len_a, len_b)
|
|
1374
|
+
estimated_similarity = _prefix_suffix_similarity(payload_a, payload_b, min_len, max_len)
|
|
1195
1375
|
|
|
1196
1376
|
# If common bytes suggest low similarity, reject
|
|
1197
1377
|
if estimated_similarity < threshold * 0.7:
|