oscura 0.0.1__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oscura/__init__.py +813 -8
- oscura/__main__.py +392 -0
- oscura/analyzers/__init__.py +37 -0
- oscura/analyzers/digital/__init__.py +177 -0
- oscura/analyzers/digital/bus.py +691 -0
- oscura/analyzers/digital/clock.py +805 -0
- oscura/analyzers/digital/correlation.py +720 -0
- oscura/analyzers/digital/edges.py +632 -0
- oscura/analyzers/digital/extraction.py +413 -0
- oscura/analyzers/digital/quality.py +878 -0
- oscura/analyzers/digital/signal_quality.py +877 -0
- oscura/analyzers/digital/thresholds.py +708 -0
- oscura/analyzers/digital/timing.py +1104 -0
- oscura/analyzers/eye/__init__.py +46 -0
- oscura/analyzers/eye/diagram.py +434 -0
- oscura/analyzers/eye/metrics.py +555 -0
- oscura/analyzers/jitter/__init__.py +83 -0
- oscura/analyzers/jitter/ber.py +333 -0
- oscura/analyzers/jitter/decomposition.py +759 -0
- oscura/analyzers/jitter/measurements.py +413 -0
- oscura/analyzers/jitter/spectrum.py +220 -0
- oscura/analyzers/measurements.py +40 -0
- oscura/analyzers/packet/__init__.py +171 -0
- oscura/analyzers/packet/daq.py +1077 -0
- oscura/analyzers/packet/metrics.py +437 -0
- oscura/analyzers/packet/parser.py +327 -0
- oscura/analyzers/packet/payload.py +2156 -0
- oscura/analyzers/packet/payload_analysis.py +1312 -0
- oscura/analyzers/packet/payload_extraction.py +236 -0
- oscura/analyzers/packet/payload_patterns.py +670 -0
- oscura/analyzers/packet/stream.py +359 -0
- oscura/analyzers/patterns/__init__.py +266 -0
- oscura/analyzers/patterns/clustering.py +1036 -0
- oscura/analyzers/patterns/discovery.py +539 -0
- oscura/analyzers/patterns/learning.py +797 -0
- oscura/analyzers/patterns/matching.py +1091 -0
- oscura/analyzers/patterns/periodic.py +650 -0
- oscura/analyzers/patterns/sequences.py +767 -0
- oscura/analyzers/power/__init__.py +116 -0
- oscura/analyzers/power/ac_power.py +391 -0
- oscura/analyzers/power/basic.py +383 -0
- oscura/analyzers/power/conduction.py +314 -0
- oscura/analyzers/power/efficiency.py +297 -0
- oscura/analyzers/power/ripple.py +356 -0
- oscura/analyzers/power/soa.py +372 -0
- oscura/analyzers/power/switching.py +479 -0
- oscura/analyzers/protocol/__init__.py +150 -0
- oscura/analyzers/protocols/__init__.py +150 -0
- oscura/analyzers/protocols/base.py +500 -0
- oscura/analyzers/protocols/can.py +620 -0
- oscura/analyzers/protocols/can_fd.py +448 -0
- oscura/analyzers/protocols/flexray.py +405 -0
- oscura/analyzers/protocols/hdlc.py +399 -0
- oscura/analyzers/protocols/i2c.py +368 -0
- oscura/analyzers/protocols/i2s.py +296 -0
- oscura/analyzers/protocols/jtag.py +393 -0
- oscura/analyzers/protocols/lin.py +445 -0
- oscura/analyzers/protocols/manchester.py +333 -0
- oscura/analyzers/protocols/onewire.py +501 -0
- oscura/analyzers/protocols/spi.py +334 -0
- oscura/analyzers/protocols/swd.py +325 -0
- oscura/analyzers/protocols/uart.py +393 -0
- oscura/analyzers/protocols/usb.py +495 -0
- oscura/analyzers/signal_integrity/__init__.py +63 -0
- oscura/analyzers/signal_integrity/embedding.py +294 -0
- oscura/analyzers/signal_integrity/equalization.py +370 -0
- oscura/analyzers/signal_integrity/sparams.py +484 -0
- oscura/analyzers/spectral/__init__.py +53 -0
- oscura/analyzers/spectral/chunked.py +273 -0
- oscura/analyzers/spectral/chunked_fft.py +571 -0
- oscura/analyzers/spectral/chunked_wavelet.py +391 -0
- oscura/analyzers/spectral/fft.py +92 -0
- oscura/analyzers/statistical/__init__.py +250 -0
- oscura/analyzers/statistical/checksum.py +923 -0
- oscura/analyzers/statistical/chunked_corr.py +228 -0
- oscura/analyzers/statistical/classification.py +778 -0
- oscura/analyzers/statistical/entropy.py +1113 -0
- oscura/analyzers/statistical/ngrams.py +614 -0
- oscura/analyzers/statistics/__init__.py +119 -0
- oscura/analyzers/statistics/advanced.py +885 -0
- oscura/analyzers/statistics/basic.py +263 -0
- oscura/analyzers/statistics/correlation.py +630 -0
- oscura/analyzers/statistics/distribution.py +298 -0
- oscura/analyzers/statistics/outliers.py +463 -0
- oscura/analyzers/statistics/streaming.py +93 -0
- oscura/analyzers/statistics/trend.py +520 -0
- oscura/analyzers/validation.py +598 -0
- oscura/analyzers/waveform/__init__.py +36 -0
- oscura/analyzers/waveform/measurements.py +943 -0
- oscura/analyzers/waveform/measurements_with_uncertainty.py +371 -0
- oscura/analyzers/waveform/spectral.py +1689 -0
- oscura/analyzers/waveform/wavelets.py +298 -0
- oscura/api/__init__.py +62 -0
- oscura/api/dsl.py +538 -0
- oscura/api/fluent.py +571 -0
- oscura/api/operators.py +498 -0
- oscura/api/optimization.py +392 -0
- oscura/api/profiling.py +396 -0
- oscura/automotive/__init__.py +73 -0
- oscura/automotive/can/__init__.py +52 -0
- oscura/automotive/can/analysis.py +356 -0
- oscura/automotive/can/checksum.py +250 -0
- oscura/automotive/can/correlation.py +212 -0
- oscura/automotive/can/discovery.py +355 -0
- oscura/automotive/can/message_wrapper.py +375 -0
- oscura/automotive/can/models.py +385 -0
- oscura/automotive/can/patterns.py +381 -0
- oscura/automotive/can/session.py +452 -0
- oscura/automotive/can/state_machine.py +300 -0
- oscura/automotive/can/stimulus_response.py +461 -0
- oscura/automotive/dbc/__init__.py +15 -0
- oscura/automotive/dbc/generator.py +156 -0
- oscura/automotive/dbc/parser.py +146 -0
- oscura/automotive/dtc/__init__.py +30 -0
- oscura/automotive/dtc/database.py +3036 -0
- oscura/automotive/j1939/__init__.py +14 -0
- oscura/automotive/j1939/decoder.py +745 -0
- oscura/automotive/loaders/__init__.py +35 -0
- oscura/automotive/loaders/asc.py +98 -0
- oscura/automotive/loaders/blf.py +77 -0
- oscura/automotive/loaders/csv_can.py +136 -0
- oscura/automotive/loaders/dispatcher.py +136 -0
- oscura/automotive/loaders/mdf.py +331 -0
- oscura/automotive/loaders/pcap.py +132 -0
- oscura/automotive/obd/__init__.py +14 -0
- oscura/automotive/obd/decoder.py +707 -0
- oscura/automotive/uds/__init__.py +48 -0
- oscura/automotive/uds/decoder.py +265 -0
- oscura/automotive/uds/models.py +64 -0
- oscura/automotive/visualization.py +369 -0
- oscura/batch/__init__.py +55 -0
- oscura/batch/advanced.py +627 -0
- oscura/batch/aggregate.py +300 -0
- oscura/batch/analyze.py +139 -0
- oscura/batch/logging.py +487 -0
- oscura/batch/metrics.py +556 -0
- oscura/builders/__init__.py +41 -0
- oscura/builders/signal_builder.py +1131 -0
- oscura/cli/__init__.py +14 -0
- oscura/cli/batch.py +339 -0
- oscura/cli/characterize.py +273 -0
- oscura/cli/compare.py +775 -0
- oscura/cli/decode.py +551 -0
- oscura/cli/main.py +247 -0
- oscura/cli/shell.py +350 -0
- oscura/comparison/__init__.py +66 -0
- oscura/comparison/compare.py +397 -0
- oscura/comparison/golden.py +487 -0
- oscura/comparison/limits.py +391 -0
- oscura/comparison/mask.py +434 -0
- oscura/comparison/trace_diff.py +30 -0
- oscura/comparison/visualization.py +481 -0
- oscura/compliance/__init__.py +70 -0
- oscura/compliance/advanced.py +756 -0
- oscura/compliance/masks.py +363 -0
- oscura/compliance/reporting.py +483 -0
- oscura/compliance/testing.py +298 -0
- oscura/component/__init__.py +38 -0
- oscura/component/impedance.py +365 -0
- oscura/component/reactive.py +598 -0
- oscura/component/transmission_line.py +312 -0
- oscura/config/__init__.py +191 -0
- oscura/config/defaults.py +254 -0
- oscura/config/loader.py +348 -0
- oscura/config/memory.py +271 -0
- oscura/config/migration.py +458 -0
- oscura/config/pipeline.py +1077 -0
- oscura/config/preferences.py +530 -0
- oscura/config/protocol.py +875 -0
- oscura/config/schema.py +713 -0
- oscura/config/settings.py +420 -0
- oscura/config/thresholds.py +599 -0
- oscura/convenience.py +457 -0
- oscura/core/__init__.py +299 -0
- oscura/core/audit.py +457 -0
- oscura/core/backend_selector.py +405 -0
- oscura/core/cache.py +590 -0
- oscura/core/cancellation.py +439 -0
- oscura/core/confidence.py +225 -0
- oscura/core/config.py +506 -0
- oscura/core/correlation.py +216 -0
- oscura/core/cross_domain.py +422 -0
- oscura/core/debug.py +301 -0
- oscura/core/edge_cases.py +541 -0
- oscura/core/exceptions.py +535 -0
- oscura/core/gpu_backend.py +523 -0
- oscura/core/lazy.py +832 -0
- oscura/core/log_query.py +540 -0
- oscura/core/logging.py +931 -0
- oscura/core/logging_advanced.py +952 -0
- oscura/core/memoize.py +171 -0
- oscura/core/memory_check.py +274 -0
- oscura/core/memory_guard.py +290 -0
- oscura/core/memory_limits.py +336 -0
- oscura/core/memory_monitor.py +453 -0
- oscura/core/memory_progress.py +465 -0
- oscura/core/memory_warnings.py +315 -0
- oscura/core/numba_backend.py +362 -0
- oscura/core/performance.py +352 -0
- oscura/core/progress.py +524 -0
- oscura/core/provenance.py +358 -0
- oscura/core/results.py +331 -0
- oscura/core/types.py +504 -0
- oscura/core/uncertainty.py +383 -0
- oscura/discovery/__init__.py +52 -0
- oscura/discovery/anomaly_detector.py +672 -0
- oscura/discovery/auto_decoder.py +415 -0
- oscura/discovery/comparison.py +497 -0
- oscura/discovery/quality_validator.py +528 -0
- oscura/discovery/signal_detector.py +769 -0
- oscura/dsl/__init__.py +73 -0
- oscura/dsl/commands.py +246 -0
- oscura/dsl/interpreter.py +455 -0
- oscura/dsl/parser.py +689 -0
- oscura/dsl/repl.py +172 -0
- oscura/exceptions.py +59 -0
- oscura/exploratory/__init__.py +111 -0
- oscura/exploratory/error_recovery.py +642 -0
- oscura/exploratory/fuzzy.py +513 -0
- oscura/exploratory/fuzzy_advanced.py +786 -0
- oscura/exploratory/legacy.py +831 -0
- oscura/exploratory/parse.py +358 -0
- oscura/exploratory/recovery.py +275 -0
- oscura/exploratory/sync.py +382 -0
- oscura/exploratory/unknown.py +707 -0
- oscura/export/__init__.py +25 -0
- oscura/export/wireshark/README.md +265 -0
- oscura/export/wireshark/__init__.py +47 -0
- oscura/export/wireshark/generator.py +312 -0
- oscura/export/wireshark/lua_builder.py +159 -0
- oscura/export/wireshark/templates/dissector.lua.j2 +92 -0
- oscura/export/wireshark/type_mapping.py +165 -0
- oscura/export/wireshark/validator.py +105 -0
- oscura/exporters/__init__.py +94 -0
- oscura/exporters/csv.py +303 -0
- oscura/exporters/exporters.py +44 -0
- oscura/exporters/hdf5.py +219 -0
- oscura/exporters/html_export.py +701 -0
- oscura/exporters/json_export.py +291 -0
- oscura/exporters/markdown_export.py +367 -0
- oscura/exporters/matlab_export.py +354 -0
- oscura/exporters/npz_export.py +219 -0
- oscura/exporters/spice_export.py +210 -0
- oscura/extensibility/__init__.py +131 -0
- oscura/extensibility/docs.py +752 -0
- oscura/extensibility/extensions.py +1125 -0
- oscura/extensibility/logging.py +259 -0
- oscura/extensibility/measurements.py +485 -0
- oscura/extensibility/plugins.py +414 -0
- oscura/extensibility/registry.py +346 -0
- oscura/extensibility/templates.py +913 -0
- oscura/extensibility/validation.py +651 -0
- oscura/filtering/__init__.py +89 -0
- oscura/filtering/base.py +563 -0
- oscura/filtering/convenience.py +564 -0
- oscura/filtering/design.py +725 -0
- oscura/filtering/filters.py +32 -0
- oscura/filtering/introspection.py +605 -0
- oscura/guidance/__init__.py +24 -0
- oscura/guidance/recommender.py +429 -0
- oscura/guidance/wizard.py +518 -0
- oscura/inference/__init__.py +251 -0
- oscura/inference/active_learning/README.md +153 -0
- oscura/inference/active_learning/__init__.py +38 -0
- oscura/inference/active_learning/lstar.py +257 -0
- oscura/inference/active_learning/observation_table.py +230 -0
- oscura/inference/active_learning/oracle.py +78 -0
- oscura/inference/active_learning/teachers/__init__.py +15 -0
- oscura/inference/active_learning/teachers/simulator.py +192 -0
- oscura/inference/adaptive_tuning.py +453 -0
- oscura/inference/alignment.py +653 -0
- oscura/inference/bayesian.py +943 -0
- oscura/inference/binary.py +1016 -0
- oscura/inference/crc_reverse.py +711 -0
- oscura/inference/logic.py +288 -0
- oscura/inference/message_format.py +1305 -0
- oscura/inference/protocol.py +417 -0
- oscura/inference/protocol_dsl.py +1084 -0
- oscura/inference/protocol_library.py +1230 -0
- oscura/inference/sequences.py +809 -0
- oscura/inference/signal_intelligence.py +1509 -0
- oscura/inference/spectral.py +215 -0
- oscura/inference/state_machine.py +634 -0
- oscura/inference/stream.py +918 -0
- oscura/integrations/__init__.py +59 -0
- oscura/integrations/llm.py +1827 -0
- oscura/jupyter/__init__.py +32 -0
- oscura/jupyter/display.py +268 -0
- oscura/jupyter/magic.py +334 -0
- oscura/loaders/__init__.py +526 -0
- oscura/loaders/binary.py +69 -0
- oscura/loaders/configurable.py +1255 -0
- oscura/loaders/csv.py +26 -0
- oscura/loaders/csv_loader.py +473 -0
- oscura/loaders/hdf5.py +9 -0
- oscura/loaders/hdf5_loader.py +510 -0
- oscura/loaders/lazy.py +370 -0
- oscura/loaders/mmap_loader.py +583 -0
- oscura/loaders/numpy_loader.py +436 -0
- oscura/loaders/pcap.py +432 -0
- oscura/loaders/preprocessing.py +368 -0
- oscura/loaders/rigol.py +287 -0
- oscura/loaders/sigrok.py +321 -0
- oscura/loaders/tdms.py +367 -0
- oscura/loaders/tektronix.py +711 -0
- oscura/loaders/validation.py +584 -0
- oscura/loaders/vcd.py +464 -0
- oscura/loaders/wav.py +233 -0
- oscura/math/__init__.py +45 -0
- oscura/math/arithmetic.py +824 -0
- oscura/math/interpolation.py +413 -0
- oscura/onboarding/__init__.py +39 -0
- oscura/onboarding/help.py +498 -0
- oscura/onboarding/tutorials.py +405 -0
- oscura/onboarding/wizard.py +466 -0
- oscura/optimization/__init__.py +19 -0
- oscura/optimization/parallel.py +440 -0
- oscura/optimization/search.py +532 -0
- oscura/pipeline/__init__.py +43 -0
- oscura/pipeline/base.py +338 -0
- oscura/pipeline/composition.py +242 -0
- oscura/pipeline/parallel.py +448 -0
- oscura/pipeline/pipeline.py +375 -0
- oscura/pipeline/reverse_engineering.py +1119 -0
- oscura/plugins/__init__.py +122 -0
- oscura/plugins/base.py +272 -0
- oscura/plugins/cli.py +497 -0
- oscura/plugins/discovery.py +411 -0
- oscura/plugins/isolation.py +418 -0
- oscura/plugins/lifecycle.py +959 -0
- oscura/plugins/manager.py +493 -0
- oscura/plugins/registry.py +421 -0
- oscura/plugins/versioning.py +372 -0
- oscura/py.typed +0 -0
- oscura/quality/__init__.py +65 -0
- oscura/quality/ensemble.py +740 -0
- oscura/quality/explainer.py +338 -0
- oscura/quality/scoring.py +616 -0
- oscura/quality/warnings.py +456 -0
- oscura/reporting/__init__.py +248 -0
- oscura/reporting/advanced.py +1234 -0
- oscura/reporting/analyze.py +448 -0
- oscura/reporting/argument_preparer.py +596 -0
- oscura/reporting/auto_report.py +507 -0
- oscura/reporting/batch.py +615 -0
- oscura/reporting/chart_selection.py +223 -0
- oscura/reporting/comparison.py +330 -0
- oscura/reporting/config.py +615 -0
- oscura/reporting/content/__init__.py +39 -0
- oscura/reporting/content/executive.py +127 -0
- oscura/reporting/content/filtering.py +191 -0
- oscura/reporting/content/minimal.py +257 -0
- oscura/reporting/content/verbosity.py +162 -0
- oscura/reporting/core.py +508 -0
- oscura/reporting/core_formats/__init__.py +17 -0
- oscura/reporting/core_formats/multi_format.py +210 -0
- oscura/reporting/engine.py +836 -0
- oscura/reporting/export.py +366 -0
- oscura/reporting/formatting/__init__.py +129 -0
- oscura/reporting/formatting/emphasis.py +81 -0
- oscura/reporting/formatting/numbers.py +403 -0
- oscura/reporting/formatting/standards.py +55 -0
- oscura/reporting/formatting.py +466 -0
- oscura/reporting/html.py +578 -0
- oscura/reporting/index.py +590 -0
- oscura/reporting/multichannel.py +296 -0
- oscura/reporting/output.py +379 -0
- oscura/reporting/pdf.py +373 -0
- oscura/reporting/plots.py +731 -0
- oscura/reporting/pptx_export.py +360 -0
- oscura/reporting/renderers/__init__.py +11 -0
- oscura/reporting/renderers/pdf.py +94 -0
- oscura/reporting/sections.py +471 -0
- oscura/reporting/standards.py +680 -0
- oscura/reporting/summary_generator.py +368 -0
- oscura/reporting/tables.py +397 -0
- oscura/reporting/template_system.py +724 -0
- oscura/reporting/templates/__init__.py +15 -0
- oscura/reporting/templates/definition.py +205 -0
- oscura/reporting/templates/index.html +649 -0
- oscura/reporting/templates/index.md +173 -0
- oscura/schemas/__init__.py +158 -0
- oscura/schemas/bus_configuration.json +322 -0
- oscura/schemas/device_mapping.json +182 -0
- oscura/schemas/packet_format.json +418 -0
- oscura/schemas/protocol_definition.json +363 -0
- oscura/search/__init__.py +16 -0
- oscura/search/anomaly.py +292 -0
- oscura/search/context.py +149 -0
- oscura/search/pattern.py +160 -0
- oscura/session/__init__.py +34 -0
- oscura/session/annotations.py +289 -0
- oscura/session/history.py +313 -0
- oscura/session/session.py +445 -0
- oscura/streaming/__init__.py +43 -0
- oscura/streaming/chunked.py +611 -0
- oscura/streaming/progressive.py +393 -0
- oscura/streaming/realtime.py +622 -0
- oscura/testing/__init__.py +54 -0
- oscura/testing/synthetic.py +808 -0
- oscura/triggering/__init__.py +68 -0
- oscura/triggering/base.py +229 -0
- oscura/triggering/edge.py +353 -0
- oscura/triggering/pattern.py +344 -0
- oscura/triggering/pulse.py +581 -0
- oscura/triggering/window.py +453 -0
- oscura/ui/__init__.py +48 -0
- oscura/ui/formatters.py +526 -0
- oscura/ui/progressive_display.py +340 -0
- oscura/utils/__init__.py +99 -0
- oscura/utils/autodetect.py +338 -0
- oscura/utils/buffer.py +389 -0
- oscura/utils/lazy.py +407 -0
- oscura/utils/lazy_imports.py +147 -0
- oscura/utils/memory.py +836 -0
- oscura/utils/memory_advanced.py +1326 -0
- oscura/utils/memory_extensions.py +465 -0
- oscura/utils/progressive.py +352 -0
- oscura/utils/windowing.py +362 -0
- oscura/visualization/__init__.py +321 -0
- oscura/visualization/accessibility.py +526 -0
- oscura/visualization/annotations.py +374 -0
- oscura/visualization/axis_scaling.py +305 -0
- oscura/visualization/colors.py +453 -0
- oscura/visualization/digital.py +337 -0
- oscura/visualization/eye.py +420 -0
- oscura/visualization/histogram.py +281 -0
- oscura/visualization/interactive.py +858 -0
- oscura/visualization/jitter.py +702 -0
- oscura/visualization/keyboard.py +394 -0
- oscura/visualization/layout.py +365 -0
- oscura/visualization/optimization.py +1028 -0
- oscura/visualization/palettes.py +446 -0
- oscura/visualization/plot.py +92 -0
- oscura/visualization/power.py +290 -0
- oscura/visualization/power_extended.py +626 -0
- oscura/visualization/presets.py +467 -0
- oscura/visualization/protocols.py +932 -0
- oscura/visualization/render.py +207 -0
- oscura/visualization/rendering.py +444 -0
- oscura/visualization/reverse_engineering.py +791 -0
- oscura/visualization/signal_integrity.py +808 -0
- oscura/visualization/specialized.py +553 -0
- oscura/visualization/spectral.py +811 -0
- oscura/visualization/styles.py +381 -0
- oscura/visualization/thumbnails.py +311 -0
- oscura/visualization/time_axis.py +351 -0
- oscura/visualization/waveform.py +367 -0
- oscura/workflow/__init__.py +13 -0
- oscura/workflow/dag.py +377 -0
- oscura/workflows/__init__.py +58 -0
- oscura/workflows/compliance.py +280 -0
- oscura/workflows/digital.py +272 -0
- oscura/workflows/multi_trace.py +502 -0
- oscura/workflows/power.py +178 -0
- oscura/workflows/protocol.py +492 -0
- oscura/workflows/reverse_engineering.py +639 -0
- oscura/workflows/signal_integrity.py +227 -0
- oscura-0.1.1.dist-info/METADATA +300 -0
- oscura-0.1.1.dist-info/RECORD +463 -0
- oscura-0.1.1.dist-info/entry_points.txt +2 -0
- {oscura-0.0.1.dist-info → oscura-0.1.1.dist-info}/licenses/LICENSE +1 -1
- oscura-0.0.1.dist-info/METADATA +0 -63
- oscura-0.0.1.dist-info/RECORD +0 -5
- {oscura-0.0.1.dist-info → oscura-0.1.1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,786 @@
|
|
|
1
|
+
"""Advanced fuzzy matching for binary pattern analysis.
|
|
2
|
+
|
|
3
|
+
This module provides advanced fuzzy matching capabilities including
|
|
4
|
+
pattern variant characterization, consensus finding, and multiple
|
|
5
|
+
binary sequence alignment.
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
Example:
|
|
9
|
+
>>> from oscura.exploratory.fuzzy_advanced import (
|
|
10
|
+
... characterize_variants,
|
|
11
|
+
... align_sequences,
|
|
12
|
+
... )
|
|
13
|
+
>>> patterns = [b'\\x12\\x34\\x56', b'\\x12\\x35\\x56', b'\\x12\\x34\\x57']
|
|
14
|
+
>>> result = characterize_variants(patterns)
|
|
15
|
+
>>> print(f"Consensus: {result.consensus.hex()}")
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import logging
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
from enum import Enum
|
|
23
|
+
from typing import TYPE_CHECKING
|
|
24
|
+
|
|
25
|
+
import numpy as np
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from collections.abc import Sequence
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"AlignedSequence",
|
|
34
|
+
"AlignmentResult",
|
|
35
|
+
"PositionAnalysis",
|
|
36
|
+
"VariantCharacterization",
|
|
37
|
+
"VariationType",
|
|
38
|
+
"align_sequences",
|
|
39
|
+
"align_two_sequences",
|
|
40
|
+
"characterize_variants",
|
|
41
|
+
"compute_conservation_scores",
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# =============================================================================
|
|
46
|
+
# =============================================================================
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class VariationType(Enum):
|
|
50
|
+
"""Classification of position variation.
|
|
51
|
+
|
|
52
|
+
References:
|
|
53
|
+
FUZZY-004: Binary Pattern Variant Characterization and Consensus
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
CONSTANT = "constant" # Entropy < 0.5 bits, confidence >= 0.95
|
|
57
|
+
LOW_VARIATION = "low_variation" # Entropy 0.5-2.0 bits
|
|
58
|
+
HIGH_VARIATION = "high_variation" # Entropy 2.0-6.0 bits
|
|
59
|
+
RANDOM = "random" # Entropy > 6.0 bits (likely random or encrypted)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class PositionAnalysis:
|
|
64
|
+
"""Analysis of a single byte position.
|
|
65
|
+
|
|
66
|
+
Attributes:
|
|
67
|
+
position: Byte position index
|
|
68
|
+
consensus_byte: Most common byte at this position
|
|
69
|
+
consensus_confidence: Frequency of consensus byte
|
|
70
|
+
entropy: Shannon entropy in bits
|
|
71
|
+
variation_type: Classification of variation
|
|
72
|
+
value_distribution: Distribution of byte values
|
|
73
|
+
is_error: True if variation likely from errors
|
|
74
|
+
|
|
75
|
+
References:
|
|
76
|
+
FUZZY-004: Binary Pattern Variant Characterization and Consensus
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
position: int
|
|
80
|
+
consensus_byte: int
|
|
81
|
+
consensus_confidence: float
|
|
82
|
+
entropy: float
|
|
83
|
+
variation_type: VariationType
|
|
84
|
+
value_distribution: dict[int, int]
|
|
85
|
+
is_error: bool = False
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass
|
|
89
|
+
class VariantCharacterization:
|
|
90
|
+
"""Result of variant characterization.
|
|
91
|
+
|
|
92
|
+
Attributes:
|
|
93
|
+
consensus: Consensus pattern (most common byte per position)
|
|
94
|
+
positions: Per-position analysis
|
|
95
|
+
constant_positions: Indices of constant positions
|
|
96
|
+
variable_positions: Indices of variable positions
|
|
97
|
+
suggested_boundaries: Suggested field boundaries
|
|
98
|
+
pattern_count: Number of patterns analyzed
|
|
99
|
+
min_length: Minimum pattern length
|
|
100
|
+
|
|
101
|
+
References:
|
|
102
|
+
FUZZY-004: Binary Pattern Variant Characterization and Consensus
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
consensus: bytes
|
|
106
|
+
positions: list[PositionAnalysis]
|
|
107
|
+
constant_positions: list[int]
|
|
108
|
+
variable_positions: list[int]
|
|
109
|
+
suggested_boundaries: list[int]
|
|
110
|
+
pattern_count: int
|
|
111
|
+
min_length: int
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _compute_entropy(values: list[int]) -> float:
|
|
115
|
+
"""Compute Shannon entropy of byte values.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
values: List of byte values
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Entropy in bits
|
|
122
|
+
"""
|
|
123
|
+
if not values:
|
|
124
|
+
return 0.0
|
|
125
|
+
|
|
126
|
+
# Count frequencies
|
|
127
|
+
counts: dict[int, int] = {}
|
|
128
|
+
for v in values:
|
|
129
|
+
counts[v] = counts.get(v, 0) + 1
|
|
130
|
+
|
|
131
|
+
total = len(values)
|
|
132
|
+
entropy = 0.0
|
|
133
|
+
|
|
134
|
+
for count in counts.values():
|
|
135
|
+
if count > 0:
|
|
136
|
+
p = count / total
|
|
137
|
+
entropy -= p * np.log2(p)
|
|
138
|
+
|
|
139
|
+
return entropy
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _classify_variation(entropy: float, confidence: float) -> VariationType:
|
|
143
|
+
"""Classify variation type based on entropy and confidence.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
entropy: Shannon entropy in bits
|
|
147
|
+
confidence: Consensus confidence
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
Variation type classification
|
|
151
|
+
"""
|
|
152
|
+
if entropy < 0.5 and confidence >= 0.95:
|
|
153
|
+
return VariationType.CONSTANT
|
|
154
|
+
elif entropy < 2.0:
|
|
155
|
+
return VariationType.LOW_VARIATION
|
|
156
|
+
elif entropy < 6.0:
|
|
157
|
+
return VariationType.HIGH_VARIATION
|
|
158
|
+
else:
|
|
159
|
+
return VariationType.RANDOM
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _detect_error_variation(
|
|
163
|
+
values: list[int],
|
|
164
|
+
consensus: int,
|
|
165
|
+
confidence: float,
|
|
166
|
+
) -> bool:
|
|
167
|
+
"""Detect if variation is likely from errors vs intentional.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
values: List of byte values at position
|
|
171
|
+
consensus: Consensus byte
|
|
172
|
+
confidence: Consensus confidence
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
True if variation appears to be from errors
|
|
176
|
+
"""
|
|
177
|
+
if confidence >= 0.99:
|
|
178
|
+
# Very rare variations are likely errors
|
|
179
|
+
return True
|
|
180
|
+
|
|
181
|
+
# Check if variations are single-bit flips from consensus
|
|
182
|
+
variations = [v for v in set(values) if v != consensus]
|
|
183
|
+
if not variations:
|
|
184
|
+
return False
|
|
185
|
+
|
|
186
|
+
single_bit_flips = 0
|
|
187
|
+
for v in variations:
|
|
188
|
+
diff = v ^ consensus
|
|
189
|
+
# Check if exactly one bit different
|
|
190
|
+
if diff != 0 and (diff & (diff - 1)) == 0:
|
|
191
|
+
single_bit_flips += 1
|
|
192
|
+
|
|
193
|
+
# If most variations are single-bit flips, likely errors
|
|
194
|
+
return single_bit_flips >= len(variations) * 0.8
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def characterize_variants(
|
|
198
|
+
patterns: Sequence[bytes | bytearray],
|
|
199
|
+
min_confidence: float = 0.95,
|
|
200
|
+
) -> VariantCharacterization:
|
|
201
|
+
"""Characterize variants in a collection of binary patterns.
|
|
202
|
+
|
|
203
|
+
Analyzes a collection of similar patterns to find consensus sequence,
|
|
204
|
+
variable positions, and variant frequencies.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
patterns: Collection of binary patterns
|
|
208
|
+
min_confidence: Minimum confidence for constant classification
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
VariantCharacterization with analysis results
|
|
212
|
+
|
|
213
|
+
Example:
|
|
214
|
+
>>> patterns = [b'\\x12\\x34\\x56', b'\\x12\\x35\\x56', b'\\x12\\x34\\x57']
|
|
215
|
+
>>> result = characterize_variants(patterns)
|
|
216
|
+
>>> print(f"Consensus: {result.consensus.hex()}")
|
|
217
|
+
>>> print(f"Variable positions: {result.variable_positions}")
|
|
218
|
+
|
|
219
|
+
References:
|
|
220
|
+
FUZZY-004: Binary Pattern Variant Characterization and Consensus
|
|
221
|
+
"""
|
|
222
|
+
if not patterns:
|
|
223
|
+
return VariantCharacterization(
|
|
224
|
+
consensus=b"",
|
|
225
|
+
positions=[],
|
|
226
|
+
constant_positions=[],
|
|
227
|
+
variable_positions=[],
|
|
228
|
+
suggested_boundaries=[],
|
|
229
|
+
pattern_count=0,
|
|
230
|
+
min_length=0,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
pattern_count = len(patterns)
|
|
234
|
+
min_length = min(len(p) for p in patterns)
|
|
235
|
+
|
|
236
|
+
positions: list[PositionAnalysis] = []
|
|
237
|
+
consensus_bytes: list[int] = []
|
|
238
|
+
constant_positions: list[int] = []
|
|
239
|
+
variable_positions: list[int] = []
|
|
240
|
+
|
|
241
|
+
for pos in range(min_length):
|
|
242
|
+
# Collect values at this position
|
|
243
|
+
values = [p[pos] for p in patterns if pos < len(p)]
|
|
244
|
+
|
|
245
|
+
# Count distribution
|
|
246
|
+
distribution: dict[int, int] = {}
|
|
247
|
+
for v in values:
|
|
248
|
+
distribution[v] = distribution.get(v, 0) + 1
|
|
249
|
+
|
|
250
|
+
# Find consensus (mode)
|
|
251
|
+
consensus_byte = max(distribution, key=distribution.get) # type: ignore[arg-type]
|
|
252
|
+
consensus_count = distribution[consensus_byte]
|
|
253
|
+
consensus_confidence = consensus_count / len(values)
|
|
254
|
+
|
|
255
|
+
# Compute entropy
|
|
256
|
+
entropy = _compute_entropy(values)
|
|
257
|
+
|
|
258
|
+
# Classify variation
|
|
259
|
+
variation_type = _classify_variation(entropy, consensus_confidence)
|
|
260
|
+
|
|
261
|
+
# Detect error vs intentional variation
|
|
262
|
+
is_error = _detect_error_variation(values, consensus_byte, consensus_confidence)
|
|
263
|
+
|
|
264
|
+
analysis = PositionAnalysis(
|
|
265
|
+
position=pos,
|
|
266
|
+
consensus_byte=consensus_byte,
|
|
267
|
+
consensus_confidence=consensus_confidence,
|
|
268
|
+
entropy=entropy,
|
|
269
|
+
variation_type=variation_type,
|
|
270
|
+
value_distribution=distribution,
|
|
271
|
+
is_error=is_error,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
positions.append(analysis)
|
|
275
|
+
consensus_bytes.append(consensus_byte)
|
|
276
|
+
|
|
277
|
+
if variation_type == VariationType.CONSTANT:
|
|
278
|
+
constant_positions.append(pos)
|
|
279
|
+
else:
|
|
280
|
+
variable_positions.append(pos)
|
|
281
|
+
|
|
282
|
+
# Suggest field boundaries (transitions between constant/variable)
|
|
283
|
+
boundaries: list[int] = []
|
|
284
|
+
prev_is_constant = None
|
|
285
|
+
|
|
286
|
+
for pos, analysis in enumerate(positions):
|
|
287
|
+
is_constant = analysis.variation_type == VariationType.CONSTANT
|
|
288
|
+
if prev_is_constant is not None and is_constant != prev_is_constant:
|
|
289
|
+
boundaries.append(pos)
|
|
290
|
+
prev_is_constant = is_constant
|
|
291
|
+
|
|
292
|
+
return VariantCharacterization(
|
|
293
|
+
consensus=bytes(consensus_bytes),
|
|
294
|
+
positions=positions,
|
|
295
|
+
constant_positions=constant_positions,
|
|
296
|
+
variable_positions=variable_positions,
|
|
297
|
+
suggested_boundaries=boundaries,
|
|
298
|
+
pattern_count=pattern_count,
|
|
299
|
+
min_length=min_length,
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
# =============================================================================
|
|
304
|
+
# =============================================================================
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
@dataclass
|
|
308
|
+
class AlignedSequence:
|
|
309
|
+
"""A sequence with alignment information.
|
|
310
|
+
|
|
311
|
+
Attributes:
|
|
312
|
+
original: Original sequence
|
|
313
|
+
aligned: Aligned sequence with gaps
|
|
314
|
+
gaps: Gap positions
|
|
315
|
+
score: Alignment score
|
|
316
|
+
|
|
317
|
+
References:
|
|
318
|
+
FUZZY-005: Multiple Binary Sequence Alignment (MSA)
|
|
319
|
+
"""
|
|
320
|
+
|
|
321
|
+
original: bytes
|
|
322
|
+
aligned: bytes
|
|
323
|
+
gaps: list[int]
|
|
324
|
+
score: float
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
@dataclass
|
|
328
|
+
class AlignmentResult:
|
|
329
|
+
"""Result of sequence alignment.
|
|
330
|
+
|
|
331
|
+
Attributes:
|
|
332
|
+
sequences: Aligned sequences
|
|
333
|
+
conservation_scores: Per-position conservation scores
|
|
334
|
+
conserved_regions: Indices of highly conserved regions
|
|
335
|
+
gap_positions: Common gap positions
|
|
336
|
+
alignment_score: Overall alignment score
|
|
337
|
+
|
|
338
|
+
References:
|
|
339
|
+
FUZZY-005: Multiple Binary Sequence Alignment (MSA)
|
|
340
|
+
"""
|
|
341
|
+
|
|
342
|
+
sequences: list[AlignedSequence]
|
|
343
|
+
conservation_scores: list[float]
|
|
344
|
+
conserved_regions: list[tuple[int, int]]
|
|
345
|
+
gap_positions: list[int]
|
|
346
|
+
alignment_score: float
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
# Gap representation
|
|
350
|
+
GAP_BYTE = 0xFF # Using 0xFF as gap marker
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def _needleman_wunsch(
|
|
354
|
+
seq1: bytes,
|
|
355
|
+
seq2: bytes,
|
|
356
|
+
match_bonus: int = 1,
|
|
357
|
+
mismatch_penalty: int = -1,
|
|
358
|
+
gap_open: int = -2,
|
|
359
|
+
gap_extend: int = -1,
|
|
360
|
+
) -> tuple[bytes, bytes, float]:
|
|
361
|
+
"""Needleman-Wunsch global alignment algorithm.
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
seq1: First sequence
|
|
365
|
+
seq2: Second sequence
|
|
366
|
+
match_bonus: Score for match
|
|
367
|
+
mismatch_penalty: Score for mismatch
|
|
368
|
+
gap_open: Gap opening penalty
|
|
369
|
+
gap_extend: Gap extension penalty
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
(aligned_seq1, aligned_seq2, score)
|
|
373
|
+
"""
|
|
374
|
+
m, n = len(seq1), len(seq2)
|
|
375
|
+
|
|
376
|
+
# Initialize score matrix
|
|
377
|
+
score = np.zeros((m + 1, n + 1), dtype=np.int32)
|
|
378
|
+
traceback = np.zeros((m + 1, n + 1), dtype=np.int8)
|
|
379
|
+
|
|
380
|
+
# Direction constants
|
|
381
|
+
DIAG, UP, LEFT = 0, 1, 2
|
|
382
|
+
|
|
383
|
+
# Initialize first row and column
|
|
384
|
+
for i in range(1, m + 1):
|
|
385
|
+
score[i, 0] = gap_open + (i - 1) * gap_extend
|
|
386
|
+
traceback[i, 0] = UP
|
|
387
|
+
|
|
388
|
+
for j in range(1, n + 1):
|
|
389
|
+
score[0, j] = gap_open + (j - 1) * gap_extend
|
|
390
|
+
traceback[0, j] = LEFT
|
|
391
|
+
|
|
392
|
+
# Fill matrix
|
|
393
|
+
for i in range(1, m + 1):
|
|
394
|
+
for j in range(1, n + 1):
|
|
395
|
+
# Match/mismatch
|
|
396
|
+
if seq1[i - 1] == seq2[j - 1]:
|
|
397
|
+
diag_score = score[i - 1, j - 1] + match_bonus
|
|
398
|
+
else:
|
|
399
|
+
diag_score = score[i - 1, j - 1] + mismatch_penalty
|
|
400
|
+
|
|
401
|
+
# Gap in seq2 (moving down)
|
|
402
|
+
if traceback[i - 1, j] == UP:
|
|
403
|
+
up_score = score[i - 1, j] + gap_extend
|
|
404
|
+
else:
|
|
405
|
+
up_score = score[i - 1, j] + gap_open
|
|
406
|
+
|
|
407
|
+
# Gap in seq1 (moving right)
|
|
408
|
+
if traceback[i, j - 1] == LEFT:
|
|
409
|
+
left_score = score[i, j - 1] + gap_extend
|
|
410
|
+
else:
|
|
411
|
+
left_score = score[i, j - 1] + gap_open
|
|
412
|
+
|
|
413
|
+
# Choose best
|
|
414
|
+
best = max(diag_score, up_score, left_score)
|
|
415
|
+
score[i, j] = best
|
|
416
|
+
|
|
417
|
+
if best == diag_score:
|
|
418
|
+
traceback[i, j] = DIAG
|
|
419
|
+
elif best == up_score:
|
|
420
|
+
traceback[i, j] = UP
|
|
421
|
+
else:
|
|
422
|
+
traceback[i, j] = LEFT
|
|
423
|
+
|
|
424
|
+
# Traceback
|
|
425
|
+
aligned1: list[int] = []
|
|
426
|
+
aligned2: list[int] = []
|
|
427
|
+
i, j = m, n
|
|
428
|
+
|
|
429
|
+
while i > 0 or j > 0:
|
|
430
|
+
if i > 0 and j > 0 and traceback[i, j] == DIAG:
|
|
431
|
+
aligned1.append(seq1[i - 1])
|
|
432
|
+
aligned2.append(seq2[j - 1])
|
|
433
|
+
i -= 1
|
|
434
|
+
j -= 1
|
|
435
|
+
elif i > 0 and traceback[i, j] == UP:
|
|
436
|
+
aligned1.append(seq1[i - 1])
|
|
437
|
+
aligned2.append(GAP_BYTE)
|
|
438
|
+
i -= 1
|
|
439
|
+
else:
|
|
440
|
+
aligned1.append(GAP_BYTE)
|
|
441
|
+
aligned2.append(seq2[j - 1])
|
|
442
|
+
j -= 1
|
|
443
|
+
|
|
444
|
+
# Reverse (traceback goes backwards)
|
|
445
|
+
aligned1.reverse()
|
|
446
|
+
aligned2.reverse()
|
|
447
|
+
|
|
448
|
+
return bytes(aligned1), bytes(aligned2), float(score[m, n])
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def _smith_waterman(
|
|
452
|
+
seq1: bytes,
|
|
453
|
+
seq2: bytes,
|
|
454
|
+
match_bonus: int = 1,
|
|
455
|
+
mismatch_penalty: int = -1,
|
|
456
|
+
gap_penalty: int = -2,
|
|
457
|
+
) -> tuple[bytes, bytes, float, int, int]:
|
|
458
|
+
"""Smith-Waterman local alignment algorithm.
|
|
459
|
+
|
|
460
|
+
Args:
|
|
461
|
+
seq1: First sequence
|
|
462
|
+
seq2: Second sequence
|
|
463
|
+
match_bonus: Score for match
|
|
464
|
+
mismatch_penalty: Score for mismatch
|
|
465
|
+
gap_penalty: Gap penalty
|
|
466
|
+
|
|
467
|
+
Returns:
|
|
468
|
+
(aligned_seq1, aligned_seq2, score, start1, start2)
|
|
469
|
+
"""
|
|
470
|
+
m, n = len(seq1), len(seq2)
|
|
471
|
+
|
|
472
|
+
# Initialize score matrix
|
|
473
|
+
score = np.zeros((m + 1, n + 1), dtype=np.int32)
|
|
474
|
+
traceback = np.zeros((m + 1, n + 1), dtype=np.int8)
|
|
475
|
+
|
|
476
|
+
max_score = 0
|
|
477
|
+
max_i, max_j = 0, 0
|
|
478
|
+
|
|
479
|
+
# Direction constants
|
|
480
|
+
DIAG, UP, LEFT, STOP = 0, 1, 2, 3
|
|
481
|
+
|
|
482
|
+
# Fill matrix
|
|
483
|
+
for i in range(1, m + 1):
|
|
484
|
+
for j in range(1, n + 1):
|
|
485
|
+
# Match/mismatch
|
|
486
|
+
if seq1[i - 1] == seq2[j - 1]:
|
|
487
|
+
diag_score = score[i - 1, j - 1] + match_bonus
|
|
488
|
+
else:
|
|
489
|
+
diag_score = score[i - 1, j - 1] + mismatch_penalty
|
|
490
|
+
|
|
491
|
+
up_score = score[i - 1, j] + gap_penalty
|
|
492
|
+
left_score = score[i, j - 1] + gap_penalty
|
|
493
|
+
|
|
494
|
+
# Local alignment can restart (score 0)
|
|
495
|
+
best = max(0, diag_score, up_score, left_score)
|
|
496
|
+
score[i, j] = best
|
|
497
|
+
|
|
498
|
+
if best == 0:
|
|
499
|
+
traceback[i, j] = STOP
|
|
500
|
+
elif best == diag_score:
|
|
501
|
+
traceback[i, j] = DIAG
|
|
502
|
+
elif best == up_score:
|
|
503
|
+
traceback[i, j] = UP
|
|
504
|
+
else:
|
|
505
|
+
traceback[i, j] = LEFT
|
|
506
|
+
|
|
507
|
+
if best > max_score:
|
|
508
|
+
max_score = best
|
|
509
|
+
max_i, max_j = i, j
|
|
510
|
+
|
|
511
|
+
# Traceback from max score
|
|
512
|
+
aligned1: list[int] = []
|
|
513
|
+
aligned2: list[int] = []
|
|
514
|
+
i, j = max_i, max_j
|
|
515
|
+
|
|
516
|
+
while i > 0 and j > 0 and traceback[i, j] != STOP:
|
|
517
|
+
if traceback[i, j] == DIAG:
|
|
518
|
+
aligned1.append(seq1[i - 1])
|
|
519
|
+
aligned2.append(seq2[j - 1])
|
|
520
|
+
i -= 1
|
|
521
|
+
j -= 1
|
|
522
|
+
elif traceback[i, j] == UP:
|
|
523
|
+
aligned1.append(seq1[i - 1])
|
|
524
|
+
aligned2.append(GAP_BYTE)
|
|
525
|
+
i -= 1
|
|
526
|
+
else:
|
|
527
|
+
aligned1.append(GAP_BYTE)
|
|
528
|
+
aligned2.append(seq2[j - 1])
|
|
529
|
+
j -= 1
|
|
530
|
+
|
|
531
|
+
aligned1.reverse()
|
|
532
|
+
aligned2.reverse()
|
|
533
|
+
|
|
534
|
+
start1 = i
|
|
535
|
+
start2 = j
|
|
536
|
+
|
|
537
|
+
return bytes(aligned1), bytes(aligned2), float(max_score), start1, start2
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
def align_two_sequences(
|
|
541
|
+
seq1: bytes,
|
|
542
|
+
seq2: bytes,
|
|
543
|
+
method: str = "global",
|
|
544
|
+
gap_open: int = -2,
|
|
545
|
+
gap_extend: int = -1,
|
|
546
|
+
mismatch_penalty: int = -1,
|
|
547
|
+
match_bonus: int = 1,
|
|
548
|
+
) -> tuple[bytes, bytes, float]:
|
|
549
|
+
"""Align two binary sequences.
|
|
550
|
+
|
|
551
|
+
Args:
|
|
552
|
+
seq1: First sequence
|
|
553
|
+
seq2: Second sequence
|
|
554
|
+
method: 'global' (Needleman-Wunsch) or 'local' (Smith-Waterman)
|
|
555
|
+
gap_open: Gap opening penalty
|
|
556
|
+
gap_extend: Gap extension penalty
|
|
557
|
+
mismatch_penalty: Mismatch penalty
|
|
558
|
+
match_bonus: Match bonus
|
|
559
|
+
|
|
560
|
+
Returns:
|
|
561
|
+
(aligned_seq1, aligned_seq2, score)
|
|
562
|
+
|
|
563
|
+
Raises:
|
|
564
|
+
ValueError: If method is unknown.
|
|
565
|
+
|
|
566
|
+
Example:
|
|
567
|
+
>>> seq1 = b'\\x12\\x34\\x56\\x78'
|
|
568
|
+
>>> seq2 = b'\\x12\\x34\\x78'
|
|
569
|
+
>>> aligned1, aligned2, score = align_two_sequences(seq1, seq2)
|
|
570
|
+
|
|
571
|
+
References:
|
|
572
|
+
FUZZY-005: Multiple Binary Sequence Alignment (MSA)
|
|
573
|
+
"""
|
|
574
|
+
if method == "global":
|
|
575
|
+
return _needleman_wunsch(seq1, seq2, match_bonus, mismatch_penalty, gap_open, gap_extend)
|
|
576
|
+
elif method == "local":
|
|
577
|
+
aligned1, aligned2, score, _, _ = _smith_waterman(
|
|
578
|
+
seq1, seq2, match_bonus, mismatch_penalty, gap_open
|
|
579
|
+
)
|
|
580
|
+
return aligned1, aligned2, score
|
|
581
|
+
else:
|
|
582
|
+
raise ValueError(f"Unknown alignment method: {method}")
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def align_sequences(
|
|
586
|
+
sequences: Sequence[bytes],
|
|
587
|
+
method: str = "progressive",
|
|
588
|
+
gap_open: int = -2,
|
|
589
|
+
gap_extend: int = -1,
|
|
590
|
+
) -> AlignmentResult:
|
|
591
|
+
"""Align multiple binary sequences.
|
|
592
|
+
|
|
593
|
+
Performs multiple sequence alignment (MSA) on a collection of
|
|
594
|
+
binary sequences.
|
|
595
|
+
|
|
596
|
+
Args:
|
|
597
|
+
sequences: Sequences to align
|
|
598
|
+
method: 'progressive' or 'iterative'
|
|
599
|
+
gap_open: Gap opening penalty
|
|
600
|
+
gap_extend: Gap extension penalty
|
|
601
|
+
|
|
602
|
+
Returns:
|
|
603
|
+
AlignmentResult with aligned sequences
|
|
604
|
+
|
|
605
|
+
Raises:
|
|
606
|
+
ValueError: If method is unknown.
|
|
607
|
+
|
|
608
|
+
Example:
|
|
609
|
+
>>> seqs = [b'\\x12\\x34\\x56', b'\\x12\\x56', b'\\x12\\x34\\x78']
|
|
610
|
+
>>> result = align_sequences(seqs)
|
|
611
|
+
>>> for seq in result.sequences:
|
|
612
|
+
... print(seq.aligned.hex())
|
|
613
|
+
|
|
614
|
+
References:
|
|
615
|
+
FUZZY-005: Multiple Binary Sequence Alignment (MSA)
|
|
616
|
+
"""
|
|
617
|
+
# Validate method parameter first
|
|
618
|
+
if method not in ("progressive", "iterative"):
|
|
619
|
+
raise ValueError(f"Unknown alignment method: {method}")
|
|
620
|
+
|
|
621
|
+
if not sequences:
|
|
622
|
+
return AlignmentResult(
|
|
623
|
+
sequences=[],
|
|
624
|
+
conservation_scores=[],
|
|
625
|
+
conserved_regions=[],
|
|
626
|
+
gap_positions=[],
|
|
627
|
+
alignment_score=0.0,
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
if len(sequences) == 1:
|
|
631
|
+
return AlignmentResult(
|
|
632
|
+
sequences=[
|
|
633
|
+
AlignedSequence(
|
|
634
|
+
original=sequences[0],
|
|
635
|
+
aligned=sequences[0],
|
|
636
|
+
gaps=[],
|
|
637
|
+
score=0.0,
|
|
638
|
+
)
|
|
639
|
+
],
|
|
640
|
+
conservation_scores=[1.0] * len(sequences[0]),
|
|
641
|
+
conserved_regions=[(0, len(sequences[0]) - 1)] if sequences[0] else [],
|
|
642
|
+
gap_positions=[],
|
|
643
|
+
alignment_score=0.0,
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
if method == "progressive":
|
|
647
|
+
return _progressive_alignment(sequences, gap_open, gap_extend)
|
|
648
|
+
elif method == "iterative":
|
|
649
|
+
# Iterative refinement (simplified)
|
|
650
|
+
result = _progressive_alignment(sequences, gap_open, gap_extend)
|
|
651
|
+
# Could add refinement passes here
|
|
652
|
+
return result
|
|
653
|
+
else:
|
|
654
|
+
raise ValueError(f"Unknown alignment method: {method}")
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
def _progressive_alignment(
|
|
658
|
+
sequences: Sequence[bytes],
|
|
659
|
+
gap_open: int,
|
|
660
|
+
gap_extend: int,
|
|
661
|
+
) -> AlignmentResult:
|
|
662
|
+
"""Progressive multiple sequence alignment."""
|
|
663
|
+
# Start with first sequence as reference
|
|
664
|
+
ref = sequences[0]
|
|
665
|
+
aligned_seqs: list[bytes] = [ref]
|
|
666
|
+
total_score = 0.0
|
|
667
|
+
|
|
668
|
+
# Align each sequence to growing profile
|
|
669
|
+
for seq in sequences[1:]:
|
|
670
|
+
# Align to last aligned sequence (simplified - real MSA uses profiles)
|
|
671
|
+
ref_aligned, seq_aligned, score = _needleman_wunsch(
|
|
672
|
+
aligned_seqs[0],
|
|
673
|
+
seq,
|
|
674
|
+
gap_open=gap_open,
|
|
675
|
+
gap_extend=gap_extend,
|
|
676
|
+
)
|
|
677
|
+
|
|
678
|
+
# Update all previous alignments to match new length
|
|
679
|
+
new_aligned: list[bytes] = []
|
|
680
|
+
for prev in aligned_seqs:
|
|
681
|
+
# Insert gaps where ref got new gaps
|
|
682
|
+
new_prev: list[int] = []
|
|
683
|
+
prev_idx = 0
|
|
684
|
+
for byte in ref_aligned:
|
|
685
|
+
if byte == GAP_BYTE:
|
|
686
|
+
new_prev.append(GAP_BYTE)
|
|
687
|
+
elif prev_idx < len(prev):
|
|
688
|
+
new_prev.append(prev[prev_idx])
|
|
689
|
+
prev_idx += 1
|
|
690
|
+
new_aligned.append(bytes(new_prev))
|
|
691
|
+
|
|
692
|
+
aligned_seqs = new_aligned
|
|
693
|
+
aligned_seqs.append(seq_aligned)
|
|
694
|
+
total_score += score
|
|
695
|
+
|
|
696
|
+
# Build result
|
|
697
|
+
result_seqs: list[AlignedSequence] = []
|
|
698
|
+
for orig, aligned in zip(sequences, aligned_seqs, strict=False):
|
|
699
|
+
gaps = [i for i, b in enumerate(aligned) if b == GAP_BYTE]
|
|
700
|
+
result_seqs.append(
|
|
701
|
+
AlignedSequence(
|
|
702
|
+
original=orig,
|
|
703
|
+
aligned=aligned,
|
|
704
|
+
gaps=gaps,
|
|
705
|
+
score=0.0, # Individual scores not tracked in progressive
|
|
706
|
+
)
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
# Compute conservation scores
|
|
710
|
+
alignment_length = len(aligned_seqs[0]) if aligned_seqs else 0
|
|
711
|
+
conservation = compute_conservation_scores(aligned_seqs)
|
|
712
|
+
|
|
713
|
+
# Find conserved regions
|
|
714
|
+
conserved_regions: list[tuple[int, int]] = []
|
|
715
|
+
in_region = False
|
|
716
|
+
region_start = 0
|
|
717
|
+
|
|
718
|
+
for i, score in enumerate(conservation):
|
|
719
|
+
if score >= 0.8 and not in_region:
|
|
720
|
+
in_region = True
|
|
721
|
+
region_start = i
|
|
722
|
+
elif score < 0.8 and in_region:
|
|
723
|
+
in_region = False
|
|
724
|
+
conserved_regions.append((region_start, i - 1))
|
|
725
|
+
|
|
726
|
+
if in_region:
|
|
727
|
+
conserved_regions.append((region_start, len(conservation) - 1))
|
|
728
|
+
|
|
729
|
+
# Find common gap positions
|
|
730
|
+
gap_positions: list[int] = []
|
|
731
|
+
for pos in range(alignment_length):
|
|
732
|
+
gap_count = sum(1 for seq in aligned_seqs if seq[pos] == GAP_BYTE)
|
|
733
|
+
if gap_count > len(aligned_seqs) // 2:
|
|
734
|
+
gap_positions.append(pos)
|
|
735
|
+
|
|
736
|
+
return AlignmentResult(
|
|
737
|
+
sequences=result_seqs,
|
|
738
|
+
conservation_scores=conservation,
|
|
739
|
+
conserved_regions=conserved_regions,
|
|
740
|
+
gap_positions=gap_positions,
|
|
741
|
+
alignment_score=total_score,
|
|
742
|
+
)
|
|
743
|
+
|
|
744
|
+
|
|
745
|
+
def compute_conservation_scores(
|
|
746
|
+
aligned_sequences: list[bytes],
|
|
747
|
+
) -> list[float]:
|
|
748
|
+
"""Compute per-position conservation scores.
|
|
749
|
+
|
|
750
|
+
Conservation score at position i = frequency of most common byte
|
|
751
|
+
(1.0 = fully conserved, <0.5 = poorly conserved).
|
|
752
|
+
|
|
753
|
+
Args:
|
|
754
|
+
aligned_sequences: Aligned sequences (same length)
|
|
755
|
+
|
|
756
|
+
Returns:
|
|
757
|
+
List of conservation scores
|
|
758
|
+
|
|
759
|
+
References:
|
|
760
|
+
FUZZY-005: Multiple Binary Sequence Alignment (MSA)
|
|
761
|
+
"""
|
|
762
|
+
if not aligned_sequences:
|
|
763
|
+
return []
|
|
764
|
+
|
|
765
|
+
alignment_length = len(aligned_sequences[0])
|
|
766
|
+
len(aligned_sequences)
|
|
767
|
+
scores: list[float] = []
|
|
768
|
+
|
|
769
|
+
for pos in range(alignment_length):
|
|
770
|
+
# Count byte frequencies (excluding gaps)
|
|
771
|
+
counts: dict[int, int] = {}
|
|
772
|
+
non_gap_count = 0
|
|
773
|
+
|
|
774
|
+
for seq in aligned_sequences:
|
|
775
|
+
byte = seq[pos]
|
|
776
|
+
if byte != GAP_BYTE:
|
|
777
|
+
counts[byte] = counts.get(byte, 0) + 1
|
|
778
|
+
non_gap_count += 1
|
|
779
|
+
|
|
780
|
+
if non_gap_count == 0:
|
|
781
|
+
scores.append(0.0)
|
|
782
|
+
else:
|
|
783
|
+
max_count = max(counts.values())
|
|
784
|
+
scores.append(max_count / non_gap_count)
|
|
785
|
+
|
|
786
|
+
return scores
|