oscura 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oscura/__init__.py +813 -8
- oscura/__main__.py +392 -0
- oscura/analyzers/__init__.py +37 -0
- oscura/analyzers/digital/__init__.py +177 -0
- oscura/analyzers/digital/bus.py +691 -0
- oscura/analyzers/digital/clock.py +805 -0
- oscura/analyzers/digital/correlation.py +720 -0
- oscura/analyzers/digital/edges.py +632 -0
- oscura/analyzers/digital/extraction.py +413 -0
- oscura/analyzers/digital/quality.py +878 -0
- oscura/analyzers/digital/signal_quality.py +877 -0
- oscura/analyzers/digital/thresholds.py +708 -0
- oscura/analyzers/digital/timing.py +1104 -0
- oscura/analyzers/eye/__init__.py +46 -0
- oscura/analyzers/eye/diagram.py +434 -0
- oscura/analyzers/eye/metrics.py +555 -0
- oscura/analyzers/jitter/__init__.py +83 -0
- oscura/analyzers/jitter/ber.py +333 -0
- oscura/analyzers/jitter/decomposition.py +759 -0
- oscura/analyzers/jitter/measurements.py +413 -0
- oscura/analyzers/jitter/spectrum.py +220 -0
- oscura/analyzers/measurements.py +40 -0
- oscura/analyzers/packet/__init__.py +171 -0
- oscura/analyzers/packet/daq.py +1077 -0
- oscura/analyzers/packet/metrics.py +437 -0
- oscura/analyzers/packet/parser.py +327 -0
- oscura/analyzers/packet/payload.py +2156 -0
- oscura/analyzers/packet/payload_analysis.py +1312 -0
- oscura/analyzers/packet/payload_extraction.py +236 -0
- oscura/analyzers/packet/payload_patterns.py +670 -0
- oscura/analyzers/packet/stream.py +359 -0
- oscura/analyzers/patterns/__init__.py +266 -0
- oscura/analyzers/patterns/clustering.py +1036 -0
- oscura/analyzers/patterns/discovery.py +539 -0
- oscura/analyzers/patterns/learning.py +797 -0
- oscura/analyzers/patterns/matching.py +1091 -0
- oscura/analyzers/patterns/periodic.py +650 -0
- oscura/analyzers/patterns/sequences.py +767 -0
- oscura/analyzers/power/__init__.py +116 -0
- oscura/analyzers/power/ac_power.py +391 -0
- oscura/analyzers/power/basic.py +383 -0
- oscura/analyzers/power/conduction.py +314 -0
- oscura/analyzers/power/efficiency.py +297 -0
- oscura/analyzers/power/ripple.py +356 -0
- oscura/analyzers/power/soa.py +372 -0
- oscura/analyzers/power/switching.py +479 -0
- oscura/analyzers/protocol/__init__.py +150 -0
- oscura/analyzers/protocols/__init__.py +150 -0
- oscura/analyzers/protocols/base.py +500 -0
- oscura/analyzers/protocols/can.py +620 -0
- oscura/analyzers/protocols/can_fd.py +448 -0
- oscura/analyzers/protocols/flexray.py +405 -0
- oscura/analyzers/protocols/hdlc.py +399 -0
- oscura/analyzers/protocols/i2c.py +368 -0
- oscura/analyzers/protocols/i2s.py +296 -0
- oscura/analyzers/protocols/jtag.py +393 -0
- oscura/analyzers/protocols/lin.py +445 -0
- oscura/analyzers/protocols/manchester.py +333 -0
- oscura/analyzers/protocols/onewire.py +501 -0
- oscura/analyzers/protocols/spi.py +334 -0
- oscura/analyzers/protocols/swd.py +325 -0
- oscura/analyzers/protocols/uart.py +393 -0
- oscura/analyzers/protocols/usb.py +495 -0
- oscura/analyzers/signal_integrity/__init__.py +63 -0
- oscura/analyzers/signal_integrity/embedding.py +294 -0
- oscura/analyzers/signal_integrity/equalization.py +370 -0
- oscura/analyzers/signal_integrity/sparams.py +484 -0
- oscura/analyzers/spectral/__init__.py +53 -0
- oscura/analyzers/spectral/chunked.py +273 -0
- oscura/analyzers/spectral/chunked_fft.py +571 -0
- oscura/analyzers/spectral/chunked_wavelet.py +391 -0
- oscura/analyzers/spectral/fft.py +92 -0
- oscura/analyzers/statistical/__init__.py +250 -0
- oscura/analyzers/statistical/checksum.py +923 -0
- oscura/analyzers/statistical/chunked_corr.py +228 -0
- oscura/analyzers/statistical/classification.py +778 -0
- oscura/analyzers/statistical/entropy.py +1113 -0
- oscura/analyzers/statistical/ngrams.py +614 -0
- oscura/analyzers/statistics/__init__.py +119 -0
- oscura/analyzers/statistics/advanced.py +885 -0
- oscura/analyzers/statistics/basic.py +263 -0
- oscura/analyzers/statistics/correlation.py +630 -0
- oscura/analyzers/statistics/distribution.py +298 -0
- oscura/analyzers/statistics/outliers.py +463 -0
- oscura/analyzers/statistics/streaming.py +93 -0
- oscura/analyzers/statistics/trend.py +520 -0
- oscura/analyzers/validation.py +598 -0
- oscura/analyzers/waveform/__init__.py +36 -0
- oscura/analyzers/waveform/measurements.py +943 -0
- oscura/analyzers/waveform/measurements_with_uncertainty.py +371 -0
- oscura/analyzers/waveform/spectral.py +1689 -0
- oscura/analyzers/waveform/wavelets.py +298 -0
- oscura/api/__init__.py +62 -0
- oscura/api/dsl.py +538 -0
- oscura/api/fluent.py +571 -0
- oscura/api/operators.py +498 -0
- oscura/api/optimization.py +392 -0
- oscura/api/profiling.py +396 -0
- oscura/automotive/__init__.py +73 -0
- oscura/automotive/can/__init__.py +52 -0
- oscura/automotive/can/analysis.py +356 -0
- oscura/automotive/can/checksum.py +250 -0
- oscura/automotive/can/correlation.py +212 -0
- oscura/automotive/can/discovery.py +355 -0
- oscura/automotive/can/message_wrapper.py +375 -0
- oscura/automotive/can/models.py +385 -0
- oscura/automotive/can/patterns.py +381 -0
- oscura/automotive/can/session.py +452 -0
- oscura/automotive/can/state_machine.py +300 -0
- oscura/automotive/can/stimulus_response.py +461 -0
- oscura/automotive/dbc/__init__.py +15 -0
- oscura/automotive/dbc/generator.py +156 -0
- oscura/automotive/dbc/parser.py +146 -0
- oscura/automotive/dtc/__init__.py +30 -0
- oscura/automotive/dtc/database.py +3036 -0
- oscura/automotive/j1939/__init__.py +14 -0
- oscura/automotive/j1939/decoder.py +745 -0
- oscura/automotive/loaders/__init__.py +35 -0
- oscura/automotive/loaders/asc.py +98 -0
- oscura/automotive/loaders/blf.py +77 -0
- oscura/automotive/loaders/csv_can.py +136 -0
- oscura/automotive/loaders/dispatcher.py +136 -0
- oscura/automotive/loaders/mdf.py +331 -0
- oscura/automotive/loaders/pcap.py +132 -0
- oscura/automotive/obd/__init__.py +14 -0
- oscura/automotive/obd/decoder.py +707 -0
- oscura/automotive/uds/__init__.py +48 -0
- oscura/automotive/uds/decoder.py +265 -0
- oscura/automotive/uds/models.py +64 -0
- oscura/automotive/visualization.py +369 -0
- oscura/batch/__init__.py +55 -0
- oscura/batch/advanced.py +627 -0
- oscura/batch/aggregate.py +300 -0
- oscura/batch/analyze.py +139 -0
- oscura/batch/logging.py +487 -0
- oscura/batch/metrics.py +556 -0
- oscura/builders/__init__.py +41 -0
- oscura/builders/signal_builder.py +1131 -0
- oscura/cli/__init__.py +14 -0
- oscura/cli/batch.py +339 -0
- oscura/cli/characterize.py +273 -0
- oscura/cli/compare.py +775 -0
- oscura/cli/decode.py +551 -0
- oscura/cli/main.py +247 -0
- oscura/cli/shell.py +350 -0
- oscura/comparison/__init__.py +66 -0
- oscura/comparison/compare.py +397 -0
- oscura/comparison/golden.py +487 -0
- oscura/comparison/limits.py +391 -0
- oscura/comparison/mask.py +434 -0
- oscura/comparison/trace_diff.py +30 -0
- oscura/comparison/visualization.py +481 -0
- oscura/compliance/__init__.py +70 -0
- oscura/compliance/advanced.py +756 -0
- oscura/compliance/masks.py +363 -0
- oscura/compliance/reporting.py +483 -0
- oscura/compliance/testing.py +298 -0
- oscura/component/__init__.py +38 -0
- oscura/component/impedance.py +365 -0
- oscura/component/reactive.py +598 -0
- oscura/component/transmission_line.py +312 -0
- oscura/config/__init__.py +191 -0
- oscura/config/defaults.py +254 -0
- oscura/config/loader.py +348 -0
- oscura/config/memory.py +271 -0
- oscura/config/migration.py +458 -0
- oscura/config/pipeline.py +1077 -0
- oscura/config/preferences.py +530 -0
- oscura/config/protocol.py +875 -0
- oscura/config/schema.py +713 -0
- oscura/config/settings.py +420 -0
- oscura/config/thresholds.py +599 -0
- oscura/convenience.py +457 -0
- oscura/core/__init__.py +299 -0
- oscura/core/audit.py +457 -0
- oscura/core/backend_selector.py +405 -0
- oscura/core/cache.py +590 -0
- oscura/core/cancellation.py +439 -0
- oscura/core/confidence.py +225 -0
- oscura/core/config.py +506 -0
- oscura/core/correlation.py +216 -0
- oscura/core/cross_domain.py +422 -0
- oscura/core/debug.py +301 -0
- oscura/core/edge_cases.py +541 -0
- oscura/core/exceptions.py +535 -0
- oscura/core/gpu_backend.py +523 -0
- oscura/core/lazy.py +832 -0
- oscura/core/log_query.py +540 -0
- oscura/core/logging.py +931 -0
- oscura/core/logging_advanced.py +952 -0
- oscura/core/memoize.py +171 -0
- oscura/core/memory_check.py +274 -0
- oscura/core/memory_guard.py +290 -0
- oscura/core/memory_limits.py +336 -0
- oscura/core/memory_monitor.py +453 -0
- oscura/core/memory_progress.py +465 -0
- oscura/core/memory_warnings.py +315 -0
- oscura/core/numba_backend.py +362 -0
- oscura/core/performance.py +352 -0
- oscura/core/progress.py +524 -0
- oscura/core/provenance.py +358 -0
- oscura/core/results.py +331 -0
- oscura/core/types.py +504 -0
- oscura/core/uncertainty.py +383 -0
- oscura/discovery/__init__.py +52 -0
- oscura/discovery/anomaly_detector.py +672 -0
- oscura/discovery/auto_decoder.py +415 -0
- oscura/discovery/comparison.py +497 -0
- oscura/discovery/quality_validator.py +528 -0
- oscura/discovery/signal_detector.py +769 -0
- oscura/dsl/__init__.py +73 -0
- oscura/dsl/commands.py +246 -0
- oscura/dsl/interpreter.py +455 -0
- oscura/dsl/parser.py +689 -0
- oscura/dsl/repl.py +172 -0
- oscura/exceptions.py +59 -0
- oscura/exploratory/__init__.py +111 -0
- oscura/exploratory/error_recovery.py +642 -0
- oscura/exploratory/fuzzy.py +513 -0
- oscura/exploratory/fuzzy_advanced.py +786 -0
- oscura/exploratory/legacy.py +831 -0
- oscura/exploratory/parse.py +358 -0
- oscura/exploratory/recovery.py +275 -0
- oscura/exploratory/sync.py +382 -0
- oscura/exploratory/unknown.py +707 -0
- oscura/export/__init__.py +25 -0
- oscura/export/wireshark/README.md +265 -0
- oscura/export/wireshark/__init__.py +47 -0
- oscura/export/wireshark/generator.py +312 -0
- oscura/export/wireshark/lua_builder.py +159 -0
- oscura/export/wireshark/templates/dissector.lua.j2 +92 -0
- oscura/export/wireshark/type_mapping.py +165 -0
- oscura/export/wireshark/validator.py +105 -0
- oscura/exporters/__init__.py +94 -0
- oscura/exporters/csv.py +303 -0
- oscura/exporters/exporters.py +44 -0
- oscura/exporters/hdf5.py +219 -0
- oscura/exporters/html_export.py +701 -0
- oscura/exporters/json_export.py +291 -0
- oscura/exporters/markdown_export.py +367 -0
- oscura/exporters/matlab_export.py +354 -0
- oscura/exporters/npz_export.py +219 -0
- oscura/exporters/spice_export.py +210 -0
- oscura/extensibility/__init__.py +131 -0
- oscura/extensibility/docs.py +752 -0
- oscura/extensibility/extensions.py +1125 -0
- oscura/extensibility/logging.py +259 -0
- oscura/extensibility/measurements.py +485 -0
- oscura/extensibility/plugins.py +414 -0
- oscura/extensibility/registry.py +346 -0
- oscura/extensibility/templates.py +913 -0
- oscura/extensibility/validation.py +651 -0
- oscura/filtering/__init__.py +89 -0
- oscura/filtering/base.py +563 -0
- oscura/filtering/convenience.py +564 -0
- oscura/filtering/design.py +725 -0
- oscura/filtering/filters.py +32 -0
- oscura/filtering/introspection.py +605 -0
- oscura/guidance/__init__.py +24 -0
- oscura/guidance/recommender.py +429 -0
- oscura/guidance/wizard.py +518 -0
- oscura/inference/__init__.py +251 -0
- oscura/inference/active_learning/README.md +153 -0
- oscura/inference/active_learning/__init__.py +38 -0
- oscura/inference/active_learning/lstar.py +257 -0
- oscura/inference/active_learning/observation_table.py +230 -0
- oscura/inference/active_learning/oracle.py +78 -0
- oscura/inference/active_learning/teachers/__init__.py +15 -0
- oscura/inference/active_learning/teachers/simulator.py +192 -0
- oscura/inference/adaptive_tuning.py +453 -0
- oscura/inference/alignment.py +653 -0
- oscura/inference/bayesian.py +943 -0
- oscura/inference/binary.py +1016 -0
- oscura/inference/crc_reverse.py +711 -0
- oscura/inference/logic.py +288 -0
- oscura/inference/message_format.py +1305 -0
- oscura/inference/protocol.py +417 -0
- oscura/inference/protocol_dsl.py +1084 -0
- oscura/inference/protocol_library.py +1230 -0
- oscura/inference/sequences.py +809 -0
- oscura/inference/signal_intelligence.py +1509 -0
- oscura/inference/spectral.py +215 -0
- oscura/inference/state_machine.py +634 -0
- oscura/inference/stream.py +918 -0
- oscura/integrations/__init__.py +59 -0
- oscura/integrations/llm.py +1827 -0
- oscura/jupyter/__init__.py +32 -0
- oscura/jupyter/display.py +268 -0
- oscura/jupyter/magic.py +334 -0
- oscura/loaders/__init__.py +526 -0
- oscura/loaders/binary.py +69 -0
- oscura/loaders/configurable.py +1255 -0
- oscura/loaders/csv.py +26 -0
- oscura/loaders/csv_loader.py +473 -0
- oscura/loaders/hdf5.py +9 -0
- oscura/loaders/hdf5_loader.py +510 -0
- oscura/loaders/lazy.py +370 -0
- oscura/loaders/mmap_loader.py +583 -0
- oscura/loaders/numpy_loader.py +436 -0
- oscura/loaders/pcap.py +432 -0
- oscura/loaders/preprocessing.py +368 -0
- oscura/loaders/rigol.py +287 -0
- oscura/loaders/sigrok.py +321 -0
- oscura/loaders/tdms.py +367 -0
- oscura/loaders/tektronix.py +711 -0
- oscura/loaders/validation.py +584 -0
- oscura/loaders/vcd.py +464 -0
- oscura/loaders/wav.py +233 -0
- oscura/math/__init__.py +45 -0
- oscura/math/arithmetic.py +824 -0
- oscura/math/interpolation.py +413 -0
- oscura/onboarding/__init__.py +39 -0
- oscura/onboarding/help.py +498 -0
- oscura/onboarding/tutorials.py +405 -0
- oscura/onboarding/wizard.py +466 -0
- oscura/optimization/__init__.py +19 -0
- oscura/optimization/parallel.py +440 -0
- oscura/optimization/search.py +532 -0
- oscura/pipeline/__init__.py +43 -0
- oscura/pipeline/base.py +338 -0
- oscura/pipeline/composition.py +242 -0
- oscura/pipeline/parallel.py +448 -0
- oscura/pipeline/pipeline.py +375 -0
- oscura/pipeline/reverse_engineering.py +1119 -0
- oscura/plugins/__init__.py +122 -0
- oscura/plugins/base.py +272 -0
- oscura/plugins/cli.py +497 -0
- oscura/plugins/discovery.py +411 -0
- oscura/plugins/isolation.py +418 -0
- oscura/plugins/lifecycle.py +959 -0
- oscura/plugins/manager.py +493 -0
- oscura/plugins/registry.py +421 -0
- oscura/plugins/versioning.py +372 -0
- oscura/py.typed +0 -0
- oscura/quality/__init__.py +65 -0
- oscura/quality/ensemble.py +740 -0
- oscura/quality/explainer.py +338 -0
- oscura/quality/scoring.py +616 -0
- oscura/quality/warnings.py +456 -0
- oscura/reporting/__init__.py +248 -0
- oscura/reporting/advanced.py +1234 -0
- oscura/reporting/analyze.py +448 -0
- oscura/reporting/argument_preparer.py +596 -0
- oscura/reporting/auto_report.py +507 -0
- oscura/reporting/batch.py +615 -0
- oscura/reporting/chart_selection.py +223 -0
- oscura/reporting/comparison.py +330 -0
- oscura/reporting/config.py +615 -0
- oscura/reporting/content/__init__.py +39 -0
- oscura/reporting/content/executive.py +127 -0
- oscura/reporting/content/filtering.py +191 -0
- oscura/reporting/content/minimal.py +257 -0
- oscura/reporting/content/verbosity.py +162 -0
- oscura/reporting/core.py +508 -0
- oscura/reporting/core_formats/__init__.py +17 -0
- oscura/reporting/core_formats/multi_format.py +210 -0
- oscura/reporting/engine.py +836 -0
- oscura/reporting/export.py +366 -0
- oscura/reporting/formatting/__init__.py +129 -0
- oscura/reporting/formatting/emphasis.py +81 -0
- oscura/reporting/formatting/numbers.py +403 -0
- oscura/reporting/formatting/standards.py +55 -0
- oscura/reporting/formatting.py +466 -0
- oscura/reporting/html.py +578 -0
- oscura/reporting/index.py +590 -0
- oscura/reporting/multichannel.py +296 -0
- oscura/reporting/output.py +379 -0
- oscura/reporting/pdf.py +373 -0
- oscura/reporting/plots.py +731 -0
- oscura/reporting/pptx_export.py +360 -0
- oscura/reporting/renderers/__init__.py +11 -0
- oscura/reporting/renderers/pdf.py +94 -0
- oscura/reporting/sections.py +471 -0
- oscura/reporting/standards.py +680 -0
- oscura/reporting/summary_generator.py +368 -0
- oscura/reporting/tables.py +397 -0
- oscura/reporting/template_system.py +724 -0
- oscura/reporting/templates/__init__.py +15 -0
- oscura/reporting/templates/definition.py +205 -0
- oscura/reporting/templates/index.html +649 -0
- oscura/reporting/templates/index.md +173 -0
- oscura/schemas/__init__.py +158 -0
- oscura/schemas/bus_configuration.json +322 -0
- oscura/schemas/device_mapping.json +182 -0
- oscura/schemas/packet_format.json +418 -0
- oscura/schemas/protocol_definition.json +363 -0
- oscura/search/__init__.py +16 -0
- oscura/search/anomaly.py +292 -0
- oscura/search/context.py +149 -0
- oscura/search/pattern.py +160 -0
- oscura/session/__init__.py +34 -0
- oscura/session/annotations.py +289 -0
- oscura/session/history.py +313 -0
- oscura/session/session.py +445 -0
- oscura/streaming/__init__.py +43 -0
- oscura/streaming/chunked.py +611 -0
- oscura/streaming/progressive.py +393 -0
- oscura/streaming/realtime.py +622 -0
- oscura/testing/__init__.py +54 -0
- oscura/testing/synthetic.py +808 -0
- oscura/triggering/__init__.py +68 -0
- oscura/triggering/base.py +229 -0
- oscura/triggering/edge.py +353 -0
- oscura/triggering/pattern.py +344 -0
- oscura/triggering/pulse.py +581 -0
- oscura/triggering/window.py +453 -0
- oscura/ui/__init__.py +48 -0
- oscura/ui/formatters.py +526 -0
- oscura/ui/progressive_display.py +340 -0
- oscura/utils/__init__.py +99 -0
- oscura/utils/autodetect.py +338 -0
- oscura/utils/buffer.py +389 -0
- oscura/utils/lazy.py +407 -0
- oscura/utils/lazy_imports.py +147 -0
- oscura/utils/memory.py +836 -0
- oscura/utils/memory_advanced.py +1326 -0
- oscura/utils/memory_extensions.py +465 -0
- oscura/utils/progressive.py +352 -0
- oscura/utils/windowing.py +362 -0
- oscura/visualization/__init__.py +321 -0
- oscura/visualization/accessibility.py +526 -0
- oscura/visualization/annotations.py +374 -0
- oscura/visualization/axis_scaling.py +305 -0
- oscura/visualization/colors.py +453 -0
- oscura/visualization/digital.py +337 -0
- oscura/visualization/eye.py +420 -0
- oscura/visualization/histogram.py +281 -0
- oscura/visualization/interactive.py +858 -0
- oscura/visualization/jitter.py +702 -0
- oscura/visualization/keyboard.py +394 -0
- oscura/visualization/layout.py +365 -0
- oscura/visualization/optimization.py +1028 -0
- oscura/visualization/palettes.py +446 -0
- oscura/visualization/plot.py +92 -0
- oscura/visualization/power.py +290 -0
- oscura/visualization/power_extended.py +626 -0
- oscura/visualization/presets.py +467 -0
- oscura/visualization/protocols.py +932 -0
- oscura/visualization/render.py +207 -0
- oscura/visualization/rendering.py +444 -0
- oscura/visualization/reverse_engineering.py +791 -0
- oscura/visualization/signal_integrity.py +808 -0
- oscura/visualization/specialized.py +553 -0
- oscura/visualization/spectral.py +811 -0
- oscura/visualization/styles.py +381 -0
- oscura/visualization/thumbnails.py +311 -0
- oscura/visualization/time_axis.py +351 -0
- oscura/visualization/waveform.py +367 -0
- oscura/workflow/__init__.py +13 -0
- oscura/workflow/dag.py +377 -0
- oscura/workflows/__init__.py +58 -0
- oscura/workflows/compliance.py +280 -0
- oscura/workflows/digital.py +272 -0
- oscura/workflows/multi_trace.py +502 -0
- oscura/workflows/power.py +178 -0
- oscura/workflows/protocol.py +492 -0
- oscura/workflows/reverse_engineering.py +639 -0
- oscura/workflows/signal_integrity.py +227 -0
- oscura-0.1.0.dist-info/METADATA +300 -0
- oscura-0.1.0.dist-info/RECORD +463 -0
- oscura-0.1.0.dist-info/entry_points.txt +2 -0
- {oscura-0.0.1.dist-info → oscura-0.1.0.dist-info}/licenses/LICENSE +1 -1
- oscura-0.0.1.dist-info/METADATA +0 -63
- oscura-0.0.1.dist-info/RECORD +0 -5
- {oscura-0.0.1.dist-info → oscura-0.1.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,653 @@
|
|
|
1
|
+
"""Sequence alignment algorithms for binary message comparison.
|
|
2
|
+
|
|
3
|
+
Requirements addressed: PSI-003
|
|
4
|
+
|
|
5
|
+
This module applies sequence alignment algorithms to compare binary messages
|
|
6
|
+
for identifying common structures and variations.
|
|
7
|
+
|
|
8
|
+
Key capabilities:
|
|
9
|
+
- Needleman-Wunsch for global alignment
|
|
10
|
+
- Smith-Waterman for local alignment
|
|
11
|
+
- Multiple sequence alignment
|
|
12
|
+
- Conserved/variable region detection
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from typing import Any, Literal
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
from numpy.typing import NDArray
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class AlignmentResult:
|
|
24
|
+
"""Result of sequence alignment.
|
|
25
|
+
|
|
26
|
+
: Alignment result representation.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
aligned_a: Aligned sequence A (with gaps as -1)
|
|
30
|
+
aligned_b: Aligned sequence B (with gaps as -1)
|
|
31
|
+
score: Alignment score
|
|
32
|
+
similarity: Similarity ratio (0-1)
|
|
33
|
+
identity: Fraction of identical positions
|
|
34
|
+
gaps: Number of gap positions
|
|
35
|
+
conserved_regions: List of (start, end) tuples for conserved regions
|
|
36
|
+
variable_regions: List of (start, end) tuples for variable regions
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
aligned_a: bytes | list[int] # Aligned sequence A (with gaps as -1)
|
|
40
|
+
aligned_b: bytes | list[int] # Aligned sequence B (with gaps as -1)
|
|
41
|
+
score: float
|
|
42
|
+
similarity: float # 0-1
|
|
43
|
+
identity: float # Fraction of identical positions
|
|
44
|
+
gaps: int # Number of gap positions
|
|
45
|
+
conserved_regions: list[tuple[int, int]] # (start, end) of conserved regions
|
|
46
|
+
variable_regions: list[tuple[int, int]] # (start, end) of variable regions
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def align_global(
|
|
50
|
+
seq_a: bytes | NDArray[Any],
|
|
51
|
+
seq_b: bytes | NDArray[Any],
|
|
52
|
+
gap_penalty: float = -1.0,
|
|
53
|
+
match_score: float = 1.0,
|
|
54
|
+
mismatch_penalty: float = -1.0,
|
|
55
|
+
) -> AlignmentResult:
|
|
56
|
+
"""Global alignment using Needleman-Wunsch algorithm.
|
|
57
|
+
|
|
58
|
+
: Needleman-Wunsch global alignment (O(mn) complexity).
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
seq_a: First sequence (bytes or array)
|
|
62
|
+
seq_b: Second sequence (bytes or array)
|
|
63
|
+
gap_penalty: Penalty for gaps
|
|
64
|
+
match_score: Score for matching positions
|
|
65
|
+
mismatch_penalty: Penalty for mismatches
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
AlignmentResult with aligned sequences and statistics
|
|
69
|
+
"""
|
|
70
|
+
# Convert to arrays
|
|
71
|
+
if isinstance(seq_a, bytes):
|
|
72
|
+
arr_a = np.frombuffer(seq_a, dtype=np.uint8)
|
|
73
|
+
else:
|
|
74
|
+
arr_a = np.array(seq_a, dtype=np.uint8)
|
|
75
|
+
|
|
76
|
+
if isinstance(seq_b, bytes):
|
|
77
|
+
arr_b = np.frombuffer(seq_b, dtype=np.uint8)
|
|
78
|
+
else:
|
|
79
|
+
arr_b = np.array(seq_b, dtype=np.uint8)
|
|
80
|
+
|
|
81
|
+
n, m = len(arr_a), len(arr_b)
|
|
82
|
+
|
|
83
|
+
# Initialize scoring matrix and traceback matrix
|
|
84
|
+
score_matrix = np.zeros((n + 1, m + 1), dtype=np.float32)
|
|
85
|
+
traceback = np.zeros((n + 1, m + 1), dtype=np.int8)
|
|
86
|
+
|
|
87
|
+
# Initialize first row and column with gap penalties
|
|
88
|
+
for i in range(1, n + 1):
|
|
89
|
+
score_matrix[i, 0] = i * gap_penalty
|
|
90
|
+
traceback[i, 0] = 1 # Up (gap in seq_b)
|
|
91
|
+
|
|
92
|
+
for j in range(1, m + 1):
|
|
93
|
+
score_matrix[0, j] = j * gap_penalty
|
|
94
|
+
traceback[0, j] = 2 # Left (gap in seq_a)
|
|
95
|
+
|
|
96
|
+
# Fill the matrices
|
|
97
|
+
for i in range(1, n + 1):
|
|
98
|
+
for j in range(1, m + 1):
|
|
99
|
+
# Match/mismatch
|
|
100
|
+
if arr_a[i - 1] == arr_b[j - 1]:
|
|
101
|
+
diag_score = score_matrix[i - 1, j - 1] + match_score
|
|
102
|
+
else:
|
|
103
|
+
diag_score = score_matrix[i - 1, j - 1] + mismatch_penalty
|
|
104
|
+
|
|
105
|
+
# Gap in seq_b (up)
|
|
106
|
+
up_score = score_matrix[i - 1, j] + gap_penalty
|
|
107
|
+
|
|
108
|
+
# Gap in seq_a (left)
|
|
109
|
+
left_score = score_matrix[i, j - 1] + gap_penalty
|
|
110
|
+
|
|
111
|
+
# Choose best
|
|
112
|
+
max_score = max(diag_score, up_score, left_score)
|
|
113
|
+
score_matrix[i, j] = max_score
|
|
114
|
+
|
|
115
|
+
if max_score == diag_score:
|
|
116
|
+
traceback[i, j] = 0 # Diagonal
|
|
117
|
+
elif max_score == up_score:
|
|
118
|
+
traceback[i, j] = 1 # Up
|
|
119
|
+
else:
|
|
120
|
+
traceback[i, j] = 2 # Left
|
|
121
|
+
|
|
122
|
+
# Traceback to get alignment
|
|
123
|
+
aligned_a = []
|
|
124
|
+
aligned_b = []
|
|
125
|
+
|
|
126
|
+
i, j = n, m
|
|
127
|
+
while i > 0 or j > 0:
|
|
128
|
+
if traceback[i, j] == 0: # Diagonal
|
|
129
|
+
aligned_a.append(int(arr_a[i - 1]))
|
|
130
|
+
aligned_b.append(int(arr_b[j - 1]))
|
|
131
|
+
i -= 1
|
|
132
|
+
j -= 1
|
|
133
|
+
elif traceback[i, j] == 1: # Up
|
|
134
|
+
aligned_a.append(int(arr_a[i - 1]))
|
|
135
|
+
aligned_b.append(-1) # Gap
|
|
136
|
+
i -= 1
|
|
137
|
+
else: # Left
|
|
138
|
+
aligned_a.append(-1) # Gap
|
|
139
|
+
aligned_b.append(int(arr_b[j - 1]))
|
|
140
|
+
j -= 1
|
|
141
|
+
|
|
142
|
+
# Reverse (we traced backwards)
|
|
143
|
+
aligned_a = list(reversed(aligned_a))
|
|
144
|
+
aligned_b = list(reversed(aligned_b))
|
|
145
|
+
|
|
146
|
+
# Calculate statistics
|
|
147
|
+
final_score = float(score_matrix[n, m])
|
|
148
|
+
similarity = compute_similarity(aligned_a, aligned_b)
|
|
149
|
+
|
|
150
|
+
# Handle empty alignments
|
|
151
|
+
if len(aligned_a) == 0:
|
|
152
|
+
identity = 0.0
|
|
153
|
+
gaps = 0
|
|
154
|
+
else:
|
|
155
|
+
identity = sum(
|
|
156
|
+
1 for a, b in zip(aligned_a, aligned_b, strict=True) if a == b and a != -1
|
|
157
|
+
) / len(aligned_a)
|
|
158
|
+
gaps = sum(1 for a, b in zip(aligned_a, aligned_b, strict=True) if a == -1 or b == -1)
|
|
159
|
+
|
|
160
|
+
# Find conserved and variable regions
|
|
161
|
+
conserved = _find_conserved_simple(aligned_a, aligned_b)
|
|
162
|
+
variable = _find_variable_simple(aligned_a, aligned_b)
|
|
163
|
+
|
|
164
|
+
return AlignmentResult(
|
|
165
|
+
aligned_a=aligned_a,
|
|
166
|
+
aligned_b=aligned_b,
|
|
167
|
+
score=final_score,
|
|
168
|
+
similarity=similarity,
|
|
169
|
+
identity=identity,
|
|
170
|
+
gaps=gaps,
|
|
171
|
+
conserved_regions=conserved,
|
|
172
|
+
variable_regions=variable,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def align_local(
|
|
177
|
+
seq_a: bytes | NDArray[Any],
|
|
178
|
+
seq_b: bytes | NDArray[Any],
|
|
179
|
+
gap_penalty: float = -1.0,
|
|
180
|
+
match_score: float = 2.0,
|
|
181
|
+
mismatch_penalty: float = -1.0,
|
|
182
|
+
) -> AlignmentResult:
|
|
183
|
+
"""Local alignment using Smith-Waterman algorithm.
|
|
184
|
+
|
|
185
|
+
: Smith-Waterman local alignment (O(mn) complexity).
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
seq_a: First sequence
|
|
189
|
+
seq_b: Second sequence
|
|
190
|
+
gap_penalty: Penalty for gaps
|
|
191
|
+
match_score: Score for matches
|
|
192
|
+
mismatch_penalty: Penalty for mismatches
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
AlignmentResult with best local alignment
|
|
196
|
+
"""
|
|
197
|
+
# Convert to arrays
|
|
198
|
+
if isinstance(seq_a, bytes):
|
|
199
|
+
arr_a = np.frombuffer(seq_a, dtype=np.uint8)
|
|
200
|
+
else:
|
|
201
|
+
arr_a = np.array(seq_a, dtype=np.uint8)
|
|
202
|
+
|
|
203
|
+
if isinstance(seq_b, bytes):
|
|
204
|
+
arr_b = np.frombuffer(seq_b, dtype=np.uint8)
|
|
205
|
+
else:
|
|
206
|
+
arr_b = np.array(seq_b, dtype=np.uint8)
|
|
207
|
+
|
|
208
|
+
n, m = len(arr_a), len(arr_b)
|
|
209
|
+
|
|
210
|
+
# Initialize scoring matrix and traceback matrix
|
|
211
|
+
score_matrix = np.zeros((n + 1, m + 1), dtype=np.float32)
|
|
212
|
+
traceback = np.zeros((n + 1, m + 1), dtype=np.int8)
|
|
213
|
+
|
|
214
|
+
# Track maximum score position
|
|
215
|
+
max_score = 0.0
|
|
216
|
+
max_i, max_j = 0, 0
|
|
217
|
+
|
|
218
|
+
# Fill the matrices (Smith-Waterman: no negative scores)
|
|
219
|
+
for i in range(1, n + 1):
|
|
220
|
+
for j in range(1, m + 1):
|
|
221
|
+
# Match/mismatch
|
|
222
|
+
if arr_a[i - 1] == arr_b[j - 1]:
|
|
223
|
+
diag_score = score_matrix[i - 1, j - 1] + match_score
|
|
224
|
+
else:
|
|
225
|
+
diag_score = score_matrix[i - 1, j - 1] + mismatch_penalty
|
|
226
|
+
|
|
227
|
+
# Gap in seq_b (up)
|
|
228
|
+
up_score = score_matrix[i - 1, j] + gap_penalty
|
|
229
|
+
|
|
230
|
+
# Gap in seq_a (left)
|
|
231
|
+
left_score = score_matrix[i, j - 1] + gap_penalty
|
|
232
|
+
|
|
233
|
+
# Smith-Waterman: can start fresh (score = 0)
|
|
234
|
+
cell_score = max(0.0, diag_score, up_score, left_score)
|
|
235
|
+
score_matrix[i, j] = cell_score
|
|
236
|
+
|
|
237
|
+
if cell_score == 0:
|
|
238
|
+
traceback[i, j] = -1 # Stop
|
|
239
|
+
elif cell_score == diag_score:
|
|
240
|
+
traceback[i, j] = 0 # Diagonal
|
|
241
|
+
elif cell_score == up_score:
|
|
242
|
+
traceback[i, j] = 1 # Up
|
|
243
|
+
else:
|
|
244
|
+
traceback[i, j] = 2 # Left
|
|
245
|
+
|
|
246
|
+
# Track maximum
|
|
247
|
+
if cell_score > max_score:
|
|
248
|
+
max_score = cell_score
|
|
249
|
+
max_i, max_j = i, j
|
|
250
|
+
|
|
251
|
+
# Traceback from max position
|
|
252
|
+
aligned_a = []
|
|
253
|
+
aligned_b = []
|
|
254
|
+
|
|
255
|
+
i, j = max_i, max_j
|
|
256
|
+
while i > 0 and j > 0 and traceback[i, j] != -1:
|
|
257
|
+
if traceback[i, j] == 0: # Diagonal
|
|
258
|
+
aligned_a.append(int(arr_a[i - 1]))
|
|
259
|
+
aligned_b.append(int(arr_b[j - 1]))
|
|
260
|
+
i -= 1
|
|
261
|
+
j -= 1
|
|
262
|
+
elif traceback[i, j] == 1: # Up
|
|
263
|
+
aligned_a.append(int(arr_a[i - 1]))
|
|
264
|
+
aligned_b.append(-1) # Gap
|
|
265
|
+
i -= 1
|
|
266
|
+
else: # Left
|
|
267
|
+
aligned_a.append(-1) # Gap
|
|
268
|
+
aligned_b.append(int(arr_b[j - 1]))
|
|
269
|
+
j -= 1
|
|
270
|
+
|
|
271
|
+
# Reverse
|
|
272
|
+
aligned_a = list(reversed(aligned_a))
|
|
273
|
+
aligned_b = list(reversed(aligned_b))
|
|
274
|
+
|
|
275
|
+
# Calculate statistics
|
|
276
|
+
if len(aligned_a) > 0:
|
|
277
|
+
similarity = compute_similarity(aligned_a, aligned_b)
|
|
278
|
+
identity = sum(
|
|
279
|
+
1 for a, b in zip(aligned_a, aligned_b, strict=True) if a == b and a != -1
|
|
280
|
+
) / len(aligned_a)
|
|
281
|
+
gaps = sum(1 for a, b in zip(aligned_a, aligned_b, strict=True) if a == -1 or b == -1)
|
|
282
|
+
else:
|
|
283
|
+
similarity = 0.0
|
|
284
|
+
identity = 0.0
|
|
285
|
+
gaps = 0
|
|
286
|
+
|
|
287
|
+
# Find conserved and variable regions
|
|
288
|
+
conserved = _find_conserved_simple(aligned_a, aligned_b)
|
|
289
|
+
variable = _find_variable_simple(aligned_a, aligned_b)
|
|
290
|
+
|
|
291
|
+
return AlignmentResult(
|
|
292
|
+
aligned_a=aligned_a,
|
|
293
|
+
aligned_b=aligned_b,
|
|
294
|
+
score=float(max_score),
|
|
295
|
+
similarity=similarity,
|
|
296
|
+
identity=identity,
|
|
297
|
+
gaps=gaps,
|
|
298
|
+
conserved_regions=conserved,
|
|
299
|
+
variable_regions=variable,
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def align_multiple(
|
|
304
|
+
sequences: list[bytes | NDArray[Any]],
|
|
305
|
+
method: Literal["progressive", "iterative"] = "progressive",
|
|
306
|
+
) -> list[list[int]]:
|
|
307
|
+
"""Multiple sequence alignment.
|
|
308
|
+
|
|
309
|
+
: Progressive MSA using guide tree and pairwise alignment.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
sequences: List of sequences (bytes or arrays)
|
|
313
|
+
method: Alignment method ('progressive' or 'iterative')
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
List of aligned sequences (as lists with -1 for gaps)
|
|
317
|
+
"""
|
|
318
|
+
if len(sequences) == 0:
|
|
319
|
+
return []
|
|
320
|
+
if len(sequences) == 1:
|
|
321
|
+
# Convert to list
|
|
322
|
+
if isinstance(sequences[0], bytes):
|
|
323
|
+
return [list(np.frombuffer(sequences[0], dtype=np.uint8))]
|
|
324
|
+
else:
|
|
325
|
+
return [list(sequences[0])]
|
|
326
|
+
|
|
327
|
+
# Progressive alignment
|
|
328
|
+
if method == "progressive":
|
|
329
|
+
# Start with first two sequences
|
|
330
|
+
result = align_global(sequences[0], sequences[1])
|
|
331
|
+
# Convert to list[int] if needed
|
|
332
|
+
aligned_a_list = (
|
|
333
|
+
list(result.aligned_a) if isinstance(result.aligned_a, bytes) else result.aligned_a
|
|
334
|
+
)
|
|
335
|
+
aligned_b_list = (
|
|
336
|
+
list(result.aligned_b) if isinstance(result.aligned_b, bytes) else result.aligned_b
|
|
337
|
+
)
|
|
338
|
+
aligned: list[list[int]] = [aligned_a_list, aligned_b_list]
|
|
339
|
+
|
|
340
|
+
# Add remaining sequences one by one
|
|
341
|
+
for seq in sequences[2:]:
|
|
342
|
+
# Align seq to consensus of current alignment
|
|
343
|
+
consensus_seq = _compute_consensus(aligned)
|
|
344
|
+
consensus_bytes = bytes([v if v != -1 else 0 for v in consensus_seq])
|
|
345
|
+
result = align_global(consensus_bytes, seq)
|
|
346
|
+
|
|
347
|
+
# Insert gaps in existing alignments
|
|
348
|
+
new_aligned: list[list[int]] = []
|
|
349
|
+
result_a_list = (
|
|
350
|
+
list(result.aligned_a) if isinstance(result.aligned_a, bytes) else result.aligned_a
|
|
351
|
+
)
|
|
352
|
+
for existing in aligned:
|
|
353
|
+
new_seq = _insert_gaps_from_alignment(existing, result_a_list)
|
|
354
|
+
new_aligned.append(new_seq)
|
|
355
|
+
|
|
356
|
+
# Add new sequence
|
|
357
|
+
result_b_list = (
|
|
358
|
+
list(result.aligned_b) if isinstance(result.aligned_b, bytes) else result.aligned_b
|
|
359
|
+
)
|
|
360
|
+
new_aligned.append(result_b_list)
|
|
361
|
+
aligned = new_aligned
|
|
362
|
+
|
|
363
|
+
return aligned
|
|
364
|
+
else:
|
|
365
|
+
# Iterative not implemented, fall back to progressive
|
|
366
|
+
return align_multiple(sequences, method="progressive")
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def compute_similarity(aligned_a: bytes | list[int], aligned_b: bytes | list[int]) -> float:
|
|
370
|
+
"""Compute similarity between aligned sequences.
|
|
371
|
+
|
|
372
|
+
: Similarity calculation.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
aligned_a: First aligned sequence
|
|
376
|
+
aligned_b: Second aligned sequence
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
Similarity ratio (0-1)
|
|
380
|
+
|
|
381
|
+
Raises:
|
|
382
|
+
ValueError: If aligned sequences have different lengths.
|
|
383
|
+
"""
|
|
384
|
+
if len(aligned_a) != len(aligned_b):
|
|
385
|
+
raise ValueError("Aligned sequences must have same length")
|
|
386
|
+
|
|
387
|
+
if len(aligned_a) == 0:
|
|
388
|
+
return 0.0
|
|
389
|
+
|
|
390
|
+
matches = 0
|
|
391
|
+
total = 0
|
|
392
|
+
|
|
393
|
+
for a, b in zip(aligned_a, aligned_b, strict=True):
|
|
394
|
+
# Skip double gaps
|
|
395
|
+
if a == -1 and b == -1:
|
|
396
|
+
continue
|
|
397
|
+
|
|
398
|
+
total += 1
|
|
399
|
+
if a == b and a != -1:
|
|
400
|
+
matches += 1
|
|
401
|
+
|
|
402
|
+
if total == 0:
|
|
403
|
+
return 0.0
|
|
404
|
+
|
|
405
|
+
return matches / total
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def find_conserved_regions(
|
|
409
|
+
aligned_sequences: list[list[int]], min_conservation: float = 0.9, min_length: int = 4
|
|
410
|
+
) -> list[tuple[int, int]]:
|
|
411
|
+
"""Find highly conserved regions in aligned sequences.
|
|
412
|
+
|
|
413
|
+
: Conserved region detection.
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
aligned_sequences: List of aligned sequences
|
|
417
|
+
min_conservation: Minimum conservation ratio (0-1)
|
|
418
|
+
min_length: Minimum region length
|
|
419
|
+
|
|
420
|
+
Returns:
|
|
421
|
+
List of (start, end) tuples for conserved regions
|
|
422
|
+
"""
|
|
423
|
+
if not aligned_sequences:
|
|
424
|
+
return []
|
|
425
|
+
|
|
426
|
+
length = len(aligned_sequences[0])
|
|
427
|
+
_num_seqs = len(aligned_sequences)
|
|
428
|
+
|
|
429
|
+
# Calculate conservation at each position
|
|
430
|
+
conservation = []
|
|
431
|
+
for pos in range(length):
|
|
432
|
+
values = [seq[pos] for seq in aligned_sequences if pos < len(seq)]
|
|
433
|
+
|
|
434
|
+
# Skip gaps
|
|
435
|
+
non_gap_values = [v for v in values if v != -1]
|
|
436
|
+
|
|
437
|
+
if len(non_gap_values) == 0:
|
|
438
|
+
conservation.append(0.0)
|
|
439
|
+
continue
|
|
440
|
+
|
|
441
|
+
# Count most common value
|
|
442
|
+
from collections import Counter
|
|
443
|
+
|
|
444
|
+
counts = Counter(non_gap_values)
|
|
445
|
+
most_common_count = counts.most_common(1)[0][1]
|
|
446
|
+
|
|
447
|
+
cons = most_common_count / len(non_gap_values)
|
|
448
|
+
conservation.append(cons)
|
|
449
|
+
|
|
450
|
+
# Find regions above threshold
|
|
451
|
+
regions = []
|
|
452
|
+
start = None
|
|
453
|
+
|
|
454
|
+
for i, cons in enumerate(conservation):
|
|
455
|
+
if cons >= min_conservation:
|
|
456
|
+
if start is None:
|
|
457
|
+
start = i
|
|
458
|
+
else:
|
|
459
|
+
if start is not None:
|
|
460
|
+
if i - start >= min_length:
|
|
461
|
+
regions.append((start, i))
|
|
462
|
+
start = None
|
|
463
|
+
|
|
464
|
+
# Handle region at end
|
|
465
|
+
if start is not None and length - start >= min_length:
|
|
466
|
+
regions.append((start, length))
|
|
467
|
+
|
|
468
|
+
return regions
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def find_variable_regions(
|
|
472
|
+
aligned_sequences: list[list[int]], max_conservation: float = 0.5, min_length: int = 2
|
|
473
|
+
) -> list[tuple[int, int]]:
|
|
474
|
+
"""Find highly variable regions in aligned sequences.
|
|
475
|
+
|
|
476
|
+
: Variable region detection.
|
|
477
|
+
|
|
478
|
+
Args:
|
|
479
|
+
aligned_sequences: List of aligned sequences
|
|
480
|
+
max_conservation: Maximum conservation ratio (0-1)
|
|
481
|
+
min_length: Minimum region length
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
List of (start, end) tuples for variable regions
|
|
485
|
+
"""
|
|
486
|
+
if not aligned_sequences:
|
|
487
|
+
return []
|
|
488
|
+
|
|
489
|
+
length = len(aligned_sequences[0])
|
|
490
|
+
|
|
491
|
+
# Calculate conservation at each position
|
|
492
|
+
conservation = []
|
|
493
|
+
for pos in range(length):
|
|
494
|
+
values = [seq[pos] for seq in aligned_sequences if pos < len(seq)]
|
|
495
|
+
|
|
496
|
+
# Skip gaps
|
|
497
|
+
non_gap_values = [v for v in values if v != -1]
|
|
498
|
+
|
|
499
|
+
if len(non_gap_values) == 0:
|
|
500
|
+
conservation.append(1.0) # All gaps = conserved
|
|
501
|
+
continue
|
|
502
|
+
|
|
503
|
+
# Count most common value
|
|
504
|
+
from collections import Counter
|
|
505
|
+
|
|
506
|
+
counts = Counter(non_gap_values)
|
|
507
|
+
most_common_count = counts.most_common(1)[0][1]
|
|
508
|
+
|
|
509
|
+
cons = most_common_count / len(non_gap_values)
|
|
510
|
+
conservation.append(cons)
|
|
511
|
+
|
|
512
|
+
# Find regions below threshold
|
|
513
|
+
regions = []
|
|
514
|
+
start = None
|
|
515
|
+
|
|
516
|
+
for i, cons in enumerate(conservation):
|
|
517
|
+
if cons <= max_conservation:
|
|
518
|
+
if start is None:
|
|
519
|
+
start = i
|
|
520
|
+
else:
|
|
521
|
+
if start is not None:
|
|
522
|
+
if i - start >= min_length:
|
|
523
|
+
regions.append((start, i))
|
|
524
|
+
start = None
|
|
525
|
+
|
|
526
|
+
# Handle region at end
|
|
527
|
+
if start is not None and length - start >= min_length:
|
|
528
|
+
regions.append((start, length))
|
|
529
|
+
|
|
530
|
+
return regions
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def _find_conserved_simple(aligned_a: list[int], aligned_b: list[int]) -> list[tuple[int, int]]:
|
|
534
|
+
"""Find conserved regions in pairwise alignment.
|
|
535
|
+
|
|
536
|
+
Args:
|
|
537
|
+
aligned_a: First aligned sequence
|
|
538
|
+
aligned_b: Second aligned sequence
|
|
539
|
+
|
|
540
|
+
Returns:
|
|
541
|
+
List of (start, end) tuples
|
|
542
|
+
"""
|
|
543
|
+
regions = []
|
|
544
|
+
start = None
|
|
545
|
+
|
|
546
|
+
for i, (a, b) in enumerate(zip(aligned_a, aligned_b, strict=True)):
|
|
547
|
+
if a == b and a != -1:
|
|
548
|
+
if start is None:
|
|
549
|
+
start = i
|
|
550
|
+
else:
|
|
551
|
+
if start is not None:
|
|
552
|
+
if i - start >= 4: # Min length 4
|
|
553
|
+
regions.append((start, i))
|
|
554
|
+
start = None
|
|
555
|
+
|
|
556
|
+
# Handle region at end
|
|
557
|
+
if start is not None and len(aligned_a) - start >= 4:
|
|
558
|
+
regions.append((start, len(aligned_a)))
|
|
559
|
+
|
|
560
|
+
return regions
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
def _find_variable_simple(aligned_a: list[int], aligned_b: list[int]) -> list[tuple[int, int]]:
|
|
564
|
+
"""Find variable regions in pairwise alignment.
|
|
565
|
+
|
|
566
|
+
Args:
|
|
567
|
+
aligned_a: First aligned sequence
|
|
568
|
+
aligned_b: Second aligned sequence
|
|
569
|
+
|
|
570
|
+
Returns:
|
|
571
|
+
List of (start, end) tuples
|
|
572
|
+
"""
|
|
573
|
+
regions = []
|
|
574
|
+
start = None
|
|
575
|
+
|
|
576
|
+
for i, (a, b) in enumerate(zip(aligned_a, aligned_b, strict=True)):
|
|
577
|
+
if a != b:
|
|
578
|
+
if start is None:
|
|
579
|
+
start = i
|
|
580
|
+
else:
|
|
581
|
+
if start is not None:
|
|
582
|
+
if i - start >= 2: # Min length 2
|
|
583
|
+
regions.append((start, i))
|
|
584
|
+
start = None
|
|
585
|
+
|
|
586
|
+
# Handle region at end
|
|
587
|
+
if start is not None and len(aligned_a) - start >= 2:
|
|
588
|
+
regions.append((start, len(aligned_a)))
|
|
589
|
+
|
|
590
|
+
return regions
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
def _compute_consensus(aligned_sequences: list[list[int]]) -> list[int]:
|
|
594
|
+
"""Compute consensus sequence from multiple aligned sequences.
|
|
595
|
+
|
|
596
|
+
Args:
|
|
597
|
+
aligned_sequences: List of aligned sequences
|
|
598
|
+
|
|
599
|
+
Returns:
|
|
600
|
+
Consensus sequence
|
|
601
|
+
"""
|
|
602
|
+
if not aligned_sequences:
|
|
603
|
+
return []
|
|
604
|
+
|
|
605
|
+
length = max(len(seq) for seq in aligned_sequences)
|
|
606
|
+
consensus = []
|
|
607
|
+
|
|
608
|
+
for pos in range(length):
|
|
609
|
+
values = [seq[pos] for seq in aligned_sequences if pos < len(seq)]
|
|
610
|
+
|
|
611
|
+
# Skip gaps when computing consensus
|
|
612
|
+
non_gap_values = [v for v in values if v != -1]
|
|
613
|
+
|
|
614
|
+
if non_gap_values:
|
|
615
|
+
# Most common value
|
|
616
|
+
from collections import Counter
|
|
617
|
+
|
|
618
|
+
counts = Counter(non_gap_values)
|
|
619
|
+
consensus_val = counts.most_common(1)[0][0]
|
|
620
|
+
consensus.append(consensus_val)
|
|
621
|
+
else:
|
|
622
|
+
# All gaps
|
|
623
|
+
consensus.append(-1)
|
|
624
|
+
|
|
625
|
+
return consensus
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
def _insert_gaps_from_alignment(sequence: list[int], alignment_template: list[int]) -> list[int]:
|
|
629
|
+
"""Insert gaps into sequence based on alignment template.
|
|
630
|
+
|
|
631
|
+
Args:
|
|
632
|
+
sequence: Original sequence
|
|
633
|
+
alignment_template: Template showing where gaps should be
|
|
634
|
+
|
|
635
|
+
Returns:
|
|
636
|
+
Sequence with gaps inserted
|
|
637
|
+
"""
|
|
638
|
+
result = []
|
|
639
|
+
seq_idx = 0
|
|
640
|
+
|
|
641
|
+
for template_val in alignment_template:
|
|
642
|
+
if template_val == -1:
|
|
643
|
+
# Gap in template, insert gap
|
|
644
|
+
result.append(-1)
|
|
645
|
+
else:
|
|
646
|
+
# Non-gap, copy from sequence
|
|
647
|
+
if seq_idx < len(sequence):
|
|
648
|
+
result.append(sequence[seq_idx])
|
|
649
|
+
seq_idx += 1
|
|
650
|
+
else:
|
|
651
|
+
result.append(-1)
|
|
652
|
+
|
|
653
|
+
return result
|