pyseqalignment 0.1.2__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {pyseqalignment-0.1.2/src/pyseqalignment.egg-info → pyseqalignment-0.1.4}/PKG-INFO +50 -5
  2. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/README.md +49 -4
  3. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/pyproject.toml +9 -2
  4. pyseqalignment-0.1.4/setup.py +58 -0
  5. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/__init__.py +9 -2
  6. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/core/__init__.py +8 -1
  7. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/core/alignment.py +13 -0
  8. pyseqalignment-0.1.4/src/pyseqalign/core/nw_affine.py +202 -0
  9. pyseqalignment-0.1.4/src/pyseqalign/logo/__init__.py +17 -0
  10. pyseqalignment-0.1.4/src/pyseqalign/logo/probability.py +192 -0
  11. pyseqalignment-0.1.4/src/pyseqalign/logo/profile.py +249 -0
  12. pyseqalignment-0.1.4/src/pyseqalign/logo/render.py +254 -0
  13. pyseqalignment-0.1.4/src/pyseqalign/msa/__init__.py +17 -0
  14. pyseqalignment-0.1.4/src/pyseqalign/msa/consensus.py +72 -0
  15. pyseqalignment-0.1.4/src/pyseqalign/msa/distance_matrix.py +118 -0
  16. pyseqalignment-0.1.4/src/pyseqalign/msa/guide_tree.py +191 -0
  17. pyseqalignment-0.1.4/src/pyseqalign/msa/progressive.py +221 -0
  18. pyseqalignment-0.1.4/src/pyseqalign/scoring/protocols.py +28 -0
  19. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4/src/pyseqalignment.egg-info}/PKG-INFO +50 -5
  20. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalignment.egg-info/SOURCES.txt +13 -0
  21. pyseqalignment-0.1.4/tests/test_msa_logo.py +62 -0
  22. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/LICENSE +0 -0
  23. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/setup.cfg +0 -0
  24. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/accel.py +0 -0
  25. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/core/needleman_wunsch.py +0 -0
  26. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/core/smith_waterman.py +0 -0
  27. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/cpp/build_cpp_aligner.sh +0 -0
  28. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/cpp/cpp_aligner.cpp +0 -0
  29. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/learning/__init__.py +0 -0
  30. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/learning/aleph.py +0 -0
  31. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/learning/aleph_files/__init__.py +0 -0
  32. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/learning/aleph_files/aleph_swi_ak.pl +0 -0
  33. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/learning/base.py +0 -0
  34. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/learning/popper.py +0 -0
  35. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/learning/task_builder.py +0 -0
  36. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/prolog/__init__.py +0 -0
  37. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/prolog/engine.py +0 -0
  38. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/prolog/knowledge/__init__.py +0 -0
  39. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/prolog/knowledge/amino_acids.pl +0 -0
  40. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/prolog/knowledge/blosum50.pl +0 -0
  41. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/prolog/knowledge/defaults.pl +0 -0
  42. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/prolog/knowledge/distances.pl +0 -0
  43. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/scoring/__init__.py +0 -0
  44. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/scoring/distance.py +0 -0
  45. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/scoring/matrices.py +0 -0
  46. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/scoring/matrix_data/BLOSUM100 +0 -0
  47. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/scoring/matrix_data/BLOSUM50 +0 -0
  48. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/scoring/matrix_data/BLOSUM60 +0 -0
  49. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/scoring/matrix_data/BLOSUM62 +0 -0
  50. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/scoring/matrix_data/BLOSUM70 +0 -0
  51. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/scoring/matrix_data/BLOSUM80 +0 -0
  52. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/scoring/matrix_data/BLOSUM90 +0 -0
  53. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/scoring/matrix_data/PAM150 +0 -0
  54. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/scoring/matrix_data/PAM200 +0 -0
  55. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/scoring/matrix_data/PAM250 +0 -0
  56. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/scoring/matrix_data/PAM50 +0 -0
  57. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/scoring/matrix_data/__init__.py +0 -0
  58. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/utils/__init__.py +0 -0
  59. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalign/utils/helpers.py +0 -0
  60. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalignment.egg-info/dependency_links.txt +0 -0
  61. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalignment.egg-info/requires.txt +0 -0
  62. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/src/pyseqalignment.egg-info/top_level.txt +0 -0
  63. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/tests/test_learning.py +0 -0
  64. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/tests/test_needleman_wunsch.py +0 -0
  65. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/tests/test_scoring.py +0 -0
  66. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/tests/test_smith_waterman.py +0 -0
  67. {pyseqalignment-0.1.2 → pyseqalignment-0.1.4}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyseqalignment
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: pySeqAlign -- sequence alignment with Prolog-style distance functions and ILP learning
5
5
  Author-email: Andreas Karwath <a.karwath@bham.ac.uk>
6
6
  License-Expression: MIT
@@ -56,7 +56,9 @@ pySeqAlign provides Smith-Waterman (local) and Needleman-Wunsch (global) sequenc
56
56
  ## Features
57
57
 
58
58
  - **Smith-Waterman** local alignment with k-best non-overlapping results
59
- - **Needleman-Wunsch** global alignment
59
+ - **Needleman-Wunsch** global alignment (linear and **affine** gap costs)
60
+ - **Multiple sequence alignment** -- progressive MSA over a neighbor-joining guide tree, parameterised by any scoring function (`pyseqalign.msa`)
61
+ - **Relational sequence logos** -- information-content logos of aligned logical atoms, with per-column least-general generalisation, after Karwath & Kersting (ILP 2006) (`pyseqalign.logo`)
60
62
  - **Prolog-based distance functions** via SWI-Prolog integration (optional)
61
63
  - **Substitution matrices** -- BLOSUM (50, 60, 62, 70, 80, 90, 100) and PAM (50, 150, 200, 250) bundled; any NCBI-format matrix loadable from file or downloaded at runtime
62
64
  - **Nienhuys-Cheng distance** for recursive structural comparison of logical atoms
@@ -309,19 +311,62 @@ For reference, other notable systems in the field include:
309
311
  - [Metagol](https://github.com/metagol/metagol) -- Meta-Interpretive Learning
310
312
  - [DeepStochLog](https://github.com/ML-KULeuven/deepstochlog) -- Neural-symbolic ILP combining logic and neural networks
311
313
 
314
+ ## Multiple alignment & relational logos
315
+
316
+ pySeqAlign can align *and* summarise sequences of structured logical atoms,
317
+ reproducing Karwath & Kersting, *Relational Sequence Alignments and Logos*
318
+ (ILP 2006) — with no learning involved:
319
+
320
+ ```python
321
+ from pyseqalign.msa import progressive_msa
322
+ from pyseqalign.logo import relational_logo
323
+ from pyseqalign.scoring.distance import AtomDistance
324
+
325
+ # atoms as structured tuples: id -> (predicate, *args); 0 = gap
326
+ atom_store = {1: ('h', 'a', 'r', 'm'), 2: ('h', 'a', 'r', 'l'), 3: ('s', 'p', 'm')}
327
+ seqs = {'d1': [1, 2, 3], 'd2': [1, 3, 2], 'd3': [2, 3]}
328
+
329
+ scoring = AtomDistance(atom_store=atom_store, gap_score=-0.5) # Nienhuys-Cheng
330
+ msa = progressive_msa(seqs, scoring, gap_open=-1.0, gap_extend=-0.1)
331
+ rows = list(msa.aligned_sequences.values())
332
+
333
+ relational_logo(rows, atom_store, 'logo.png', title='example fold')
334
+ ```
335
+
336
+ `progressive_msa` accepts **any** scoring function, so a reward matrix learned
337
+ by [pyREAL](https://github.com/athro/pyREAL)'s boosting can drive the alignment
338
+ in place of the fixed distance. Runnable reproductions of the paper's SCOP and
339
+ balloon logos are in [`examples/`](examples/).
340
+
312
341
  ## Fast C++ aligner (optional)
313
342
 
314
343
  The pure-Python aligners are fine for typical use. For heavy workloads (e.g.
315
344
  boosting that re-aligns thousands of sequence pairs per iteration), an optional
316
- **C++ affine-gap Needleman-Wunsch** kernel is provided. It is NOT built by default
317
- (the core stays pure Python); build it once with a C++ compiler + pybind11:
345
+ **C++ affine-gap Needleman-Wunsch** kernel is provided.
346
+
347
+ **It is compiled automatically at install time** (best effort). The package is
348
+ distributed as an sdist, so `pip install pyseqalignment` builds from source and
349
+ tries to compile the accelerator for your Python/ABI/platform using a C++
350
+ compiler + pybind11 (pulled in as a build dependency). On macOS/Linux with a
351
+ compiler present this "just works"; if no compiler is available the install
352
+ still succeeds and the library falls back to the pure-Python aligner. Check with:
353
+
354
+ ```python
355
+ from pyseqalign.accel import cpp_available
356
+ print(cpp_available()) # True if the accelerator compiled at install
357
+ ```
358
+
359
+ If you installed without a compiler and later want the accelerator, install one
360
+ (Xcode Command Line Tools on macOS, `build-essential` on Debian/Ubuntu) and
361
+ either reinstall (`pip install --force-reinstall --no-binary :all: pyseqalignment`)
362
+ or build it in place once:
318
363
 
319
364
  ```bash
320
365
  pip install pybind11
321
366
  src/pyseqalign/cpp/build_cpp_aligner.sh # or: PY=$(which python) src/.../build_cpp_aligner.sh
322
367
  ```
323
368
 
324
- This compiles the extension into the `pyseqalign` package. Then:
369
+ Either way it compiles the extension into the `pyseqalign` package. Then:
325
370
 
326
371
  ```python
327
372
  from pyseqalign.accel import cpp_available, load
@@ -18,7 +18,9 @@ pySeqAlign provides Smith-Waterman (local) and Needleman-Wunsch (global) sequenc
18
18
  ## Features
19
19
 
20
20
  - **Smith-Waterman** local alignment with k-best non-overlapping results
21
- - **Needleman-Wunsch** global alignment
21
+ - **Needleman-Wunsch** global alignment (linear and **affine** gap costs)
22
+ - **Multiple sequence alignment** -- progressive MSA over a neighbor-joining guide tree, parameterised by any scoring function (`pyseqalign.msa`)
23
+ - **Relational sequence logos** -- information-content logos of aligned logical atoms, with per-column least-general generalisation, after Karwath & Kersting (ILP 2006) (`pyseqalign.logo`)
22
24
  - **Prolog-based distance functions** via SWI-Prolog integration (optional)
23
25
  - **Substitution matrices** -- BLOSUM (50, 60, 62, 70, 80, 90, 100) and PAM (50, 150, 200, 250) bundled; any NCBI-format matrix loadable from file or downloaded at runtime
24
26
  - **Nienhuys-Cheng distance** for recursive structural comparison of logical atoms
@@ -271,19 +273,62 @@ For reference, other notable systems in the field include:
271
273
  - [Metagol](https://github.com/metagol/metagol) -- Meta-Interpretive Learning
272
274
  - [DeepStochLog](https://github.com/ML-KULeuven/deepstochlog) -- Neural-symbolic ILP combining logic and neural networks
273
275
 
276
+ ## Multiple alignment & relational logos
277
+
278
+ pySeqAlign can align *and* summarise sequences of structured logical atoms,
279
+ reproducing Karwath & Kersting, *Relational Sequence Alignments and Logos*
280
+ (ILP 2006) — with no learning involved:
281
+
282
+ ```python
283
+ from pyseqalign.msa import progressive_msa
284
+ from pyseqalign.logo import relational_logo
285
+ from pyseqalign.scoring.distance import AtomDistance
286
+
287
+ # atoms as structured tuples: id -> (predicate, *args); 0 = gap
288
+ atom_store = {1: ('h', 'a', 'r', 'm'), 2: ('h', 'a', 'r', 'l'), 3: ('s', 'p', 'm')}
289
+ seqs = {'d1': [1, 2, 3], 'd2': [1, 3, 2], 'd3': [2, 3]}
290
+
291
+ scoring = AtomDistance(atom_store=atom_store, gap_score=-0.5) # Nienhuys-Cheng
292
+ msa = progressive_msa(seqs, scoring, gap_open=-1.0, gap_extend=-0.1)
293
+ rows = list(msa.aligned_sequences.values())
294
+
295
+ relational_logo(rows, atom_store, 'logo.png', title='example fold')
296
+ ```
297
+
298
+ `progressive_msa` accepts **any** scoring function, so a reward matrix learned
299
+ by [pyREAL](https://github.com/athro/pyREAL)'s boosting can drive the alignment
300
+ in place of the fixed distance. Runnable reproductions of the paper's SCOP and
301
+ balloon logos are in [`examples/`](examples/).
302
+
274
303
  ## Fast C++ aligner (optional)
275
304
 
276
305
  The pure-Python aligners are fine for typical use. For heavy workloads (e.g.
277
306
  boosting that re-aligns thousands of sequence pairs per iteration), an optional
278
- **C++ affine-gap Needleman-Wunsch** kernel is provided. It is NOT built by default
279
- (the core stays pure Python); build it once with a C++ compiler + pybind11:
307
+ **C++ affine-gap Needleman-Wunsch** kernel is provided.
308
+
309
+ **It is compiled automatically at install time** (best effort). The package is
310
+ distributed as an sdist, so `pip install pyseqalignment` builds from source and
311
+ tries to compile the accelerator for your Python/ABI/platform using a C++
312
+ compiler + pybind11 (pulled in as a build dependency). On macOS/Linux with a
313
+ compiler present this "just works"; if no compiler is available the install
314
+ still succeeds and the library falls back to the pure-Python aligner. Check with:
315
+
316
+ ```python
317
+ from pyseqalign.accel import cpp_available
318
+ print(cpp_available()) # True if the accelerator compiled at install
319
+ ```
320
+
321
+ If you installed without a compiler and later want the accelerator, install one
322
+ (Xcode Command Line Tools on macOS, `build-essential` on Debian/Ubuntu) and
323
+ either reinstall (`pip install --force-reinstall --no-binary :all: pyseqalignment`)
324
+ or build it in place once:
280
325
 
281
326
  ```bash
282
327
  pip install pybind11
283
328
  src/pyseqalign/cpp/build_cpp_aligner.sh # or: PY=$(which python) src/.../build_cpp_aligner.sh
284
329
  ```
285
330
 
286
- This compiles the extension into the `pyseqalign` package. Then:
331
+ Either way it compiles the extension into the `pyseqalign` package. Then:
287
332
 
288
333
  ```python
289
334
  from pyseqalign.accel import cpp_available, load
@@ -1,12 +1,14 @@
1
1
  [build-system]
2
- requires = ["setuptools>=68.0"]
2
+ # pybind11 is a build-time dep so the OPTIONAL C++ aligner can be compiled at
3
+ # install time (best-effort; see setup.py). It is not a runtime dependency.
4
+ requires = ["setuptools>=68.0", "pybind11>=2.10"]
3
5
  build-backend = "setuptools.build_meta"
4
6
 
5
7
  [project]
6
8
  # PyPI distribution name (the import package is `pyseqalign`; the name
7
9
  # `pyseqalign` was blocked by PyPI's similarity guard vs. an existing project).
8
10
  name = "pyseqalignment"
9
- version = "0.1.2"
11
+ version = "0.1.4"
10
12
  description = "pySeqAlign -- sequence alignment with Prolog-style distance functions and ILP learning"
11
13
  readme = "README.md"
12
14
  license = "MIT"
@@ -78,6 +80,11 @@ line-length = 100
78
80
 
79
81
  [tool.ruff.lint]
80
82
  select = ["E", "F", "W", "I", "N", "UP"]
83
+ ignore = [
84
+ "N803", # uppercase argument names (matrix math convention: M, Ix, Iy)
85
+ "N806", # uppercase local variables in functions (same reason)
86
+ "E741", # ambiguous variable name 'l' (used in tree (m, l, r) unpacking)
87
+ ]
81
88
 
82
89
  [tool.mypy]
83
90
  python_version = "3.10"
@@ -0,0 +1,58 @@
1
+ """Best-effort build of the optional C++ affine-gap aligner.
2
+
3
+ pySeqAlign's core is pure Python. The C++ aligner (``pyseqalign.cpp_aligner``)
4
+ is an OPTIONAL accelerator (~100-270x faster, identical results). We try to
5
+ compile it at install time on any platform that has a C++ compiler + pybind11;
6
+ if that fails (no compiler, no pybind11, unsupported platform, ...) the install
7
+ STILL SUCCEEDS and the library transparently falls back to the pure-Python
8
+ aligner -- see ``pyseqalign.accel``.
9
+
10
+ This is why the project publishes an sdist (not a pure-Python wheel): pip builds
11
+ from source on the target machine, giving every install a chance to compile the
12
+ accelerator locally for its own Python/ABI/platform.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import sys
18
+
19
+ from setuptools import Extension, setup
20
+ from setuptools.command.build_ext import build_ext
21
+
22
+ # Only build on POSIX (macOS/Linux); the hand-tuned flags below are GCC/Clang.
23
+ _CPP = 'src/pyseqalign/cpp/cpp_aligner.cpp'
24
+ ext_modules: list[Extension] = []
25
+ if sys.platform != 'win32':
26
+ try:
27
+ import pybind11
28
+ ext_modules = [
29
+ Extension(
30
+ 'pyseqalign.cpp_aligner',
31
+ [_CPP],
32
+ include_dirs=[pybind11.get_include()],
33
+ language='c++',
34
+ optional=True, # setuptools won't fail the build if this ext won't compile
35
+ extra_compile_args=['-O3', '-std=c++14'],
36
+ )
37
+ ]
38
+ except Exception as exc: # pybind11 missing -> skip the accelerator
39
+ print(f'pyseqalign: skipping optional C++ aligner ({exc}); pure-Python fallback.')
40
+
41
+
42
+ class BestEffortBuildExt(build_ext):
43
+ """Compile the accelerator if possible; never break the install if not."""
44
+
45
+ def run(self) -> None:
46
+ try:
47
+ super().run()
48
+ except Exception as exc: # pragma: no cover - depends on build env
49
+ print(f'pyseqalign: optional C++ aligner not built ({exc}); pure-Python fallback.')
50
+
51
+ def build_extension(self, ext) -> None:
52
+ try:
53
+ super().build_extension(ext)
54
+ except Exception as exc: # pragma: no cover - depends on build env
55
+ print(f'pyseqalign: optional C++ aligner not built ({exc}); pure-Python fallback.')
56
+
57
+
58
+ setup(ext_modules=ext_modules, cmdclass={'build_ext': BestEffortBuildExt})
@@ -1,14 +1,21 @@
1
1
  """pySeqAlign -- Sequence alignment with Prolog-style distance functions and ILP learning."""
2
2
 
3
- from pyseqalign.core.alignment import AlignmentResult, LocalAlignmentResult
3
+ from pyseqalign.core.alignment import (
4
+ AffineAlignmentResult,
5
+ AlignmentResult,
6
+ LocalAlignmentResult,
7
+ )
4
8
  from pyseqalign.core.needleman_wunsch import NeedlemanWunsch
9
+ from pyseqalign.core.nw_affine import NeedlemanWunschAffine
5
10
  from pyseqalign.core.smith_waterman import SmithWaterman
6
11
 
7
- __version__ = "0.1.0"
12
+ __version__ = "0.1.4"
8
13
 
9
14
  __all__ = [
10
15
  "SmithWaterman",
11
16
  "NeedlemanWunsch",
17
+ "NeedlemanWunschAffine",
12
18
  "AlignmentResult",
19
+ "AffineAlignmentResult",
13
20
  "LocalAlignmentResult",
14
21
  ]
@@ -1,12 +1,19 @@
1
1
  """Core alignment algorithms."""
2
2
 
3
- from pyseqalign.core.alignment import AlignmentResult, LocalAlignmentResult
3
+ from pyseqalign.core.alignment import (
4
+ AffineAlignmentResult,
5
+ AlignmentResult,
6
+ LocalAlignmentResult,
7
+ )
4
8
  from pyseqalign.core.needleman_wunsch import NeedlemanWunsch
9
+ from pyseqalign.core.nw_affine import NeedlemanWunschAffine
5
10
  from pyseqalign.core.smith_waterman import SmithWaterman
6
11
 
7
12
  __all__ = [
8
13
  "SmithWaterman",
9
14
  "NeedlemanWunsch",
15
+ "NeedlemanWunschAffine",
10
16
  "AlignmentResult",
17
+ "AffineAlignmentResult",
11
18
  "LocalAlignmentResult",
12
19
  ]
@@ -22,6 +22,19 @@ class AlignmentResult:
22
22
  length: int
23
23
 
24
24
 
25
+ @dataclass
26
+ class AffineAlignmentResult(AlignmentResult):
27
+ """Extended result from affine-gap alignment.
28
+
29
+ Attributes:
30
+ gap_opens: Number of gap-open events in both sequences combined.
31
+ gap_extensions: Number of gap-extension events.
32
+ """
33
+
34
+ gap_opens: int = 0
35
+ gap_extensions: int = 0
36
+
37
+
25
38
  @dataclass
26
39
  class LocalAlignmentResult:
27
40
  """Result of a single local (Smith-Waterman) alignment.
@@ -0,0 +1,202 @@
1
+ """Needleman-Wunsch global alignment with affine gap penalties.
2
+
3
+ Translated from the legacy C++ AlignerAffine::_align() implementation.
4
+ Uses three DP matrices (M, Ix, Iy) to distinguish gap-open from gap-extend.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import numpy as np
10
+
11
+ from pyseqalign.core.alignment import AffineAlignmentResult
12
+ from pyseqalign.scoring.protocols import ScoringFunction
13
+
14
+ # Matrix indices.
15
+ _M = 0 # match/mismatch
16
+ _IX = 1 # gap in target (consuming query element)
17
+ _IY = 2 # gap in query (consuming target element)
18
+
19
+
20
+ class NeedlemanWunschAffine:
21
+ """Needleman-Wunsch with affine gap penalties.
22
+
23
+ Recurrences (similarity mode):
24
+ M[i][j] = score(q[i], t[j]) + max(M[i-1][j-1], Ix[i-1][j-1], Iy[i-1][j-1])
25
+ Ix[i][j] = max(M[i-1][j] + gap_open, Ix[i-1][j] + gap_extend, Iy[i-1][j] + gap_open)
26
+ Iy[i][j] = max(M[i][j-1] + gap_open, Iy[i][j-1] + gap_extend, Ix[i][j-1] + gap_open)
27
+
28
+ Args:
29
+ scoring: Scoring function (element ID 0 = gap).
30
+ gap_open: Cost for opening a new gap (should be negative for penalties).
31
+ gap_extend: Cost for extending an existing gap (should be negative,
32
+ typically less severe than gap_open).
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ scoring: ScoringFunction,
38
+ gap_open: float = -2.5,
39
+ gap_extend: float = -0.25,
40
+ ) -> None:
41
+ self.scoring = scoring
42
+ self.gap_open = gap_open
43
+ self.gap_extend = gap_extend
44
+
45
+ def align(self, seq1: list[int], seq2: list[int]) -> AffineAlignmentResult:
46
+ """Compute the optimal global alignment with affine gap penalties.
47
+
48
+ Args:
49
+ seq1: Query sequence (list of integer element IDs).
50
+ seq2: Target sequence.
51
+
52
+ Returns:
53
+ An ``AffineAlignmentResult`` with aligned sequences and gap statistics.
54
+ """
55
+ n = len(seq1)
56
+ m = len(seq2)
57
+
58
+ NEG_INF = -np.inf
59
+
60
+ # F[k, i, j] for k in {M=0, Ix=1, Iy=2}
61
+ F = np.full((3, n + 1, m + 1), NEG_INF, dtype=np.float64)
62
+ # Traceback: B[k, i, j, :] = (from_k, from_i, from_j)
63
+ B = np.full((3, n + 1, m + 1, 3), -1, dtype=np.int32)
64
+
65
+ F[_M, 0, 0] = 0.0
66
+
67
+ d = self.gap_open
68
+ e = self.gap_extend
69
+
70
+ # --- Border initialization: gaps along query (Ix column) ---
71
+ for i0 in range(n):
72
+ i = i0 + 1
73
+ if i > 1:
74
+ F[_IX, i, 0] = F[_IX, i - 1, 0] + e
75
+ else:
76
+ F[_IX, i, 0] = d
77
+ B[_IX, i, 0] = [_IX, i - 1, 0]
78
+ # M and Iy are -inf along this border (already set).
79
+
80
+ # --- Border initialization: gaps along target (Iy row) ---
81
+ for j0 in range(m):
82
+ j = j0 + 1
83
+ if j > 1:
84
+ F[_IY, 0, j] = F[_IY, 0, j - 1] + e
85
+ else:
86
+ F[_IY, 0, j] = d
87
+ B[_IY, 0, j] = [_IY, 0, j - 1]
88
+ # M and Ix are -inf along this border (already set).
89
+
90
+ # --- Main DP fill ---
91
+ for i0 in range(n):
92
+ i = i0 + 1
93
+ for j0 in range(m):
94
+ j = j0 + 1
95
+
96
+ # Match/mismatch: diagonal transition.
97
+ s = self.scoring.score(seq1[i - 1], seq2[j - 1])
98
+ candidates_m = (
99
+ F[_M, i - 1, j - 1] + s,
100
+ F[_IX, i - 1, j - 1] + s,
101
+ F[_IY, i - 1, j - 1] + s,
102
+ )
103
+ best_k = _argmax3(candidates_m)
104
+ F[_M, i, j] = candidates_m[best_k]
105
+ B[_M, i, j] = [best_k, i - 1, j - 1]
106
+
107
+ # Ix: gap in target (consume query[i], skip target).
108
+ candidates_ix = (
109
+ F[_M, i - 1, j] + d, # new gap
110
+ F[_IX, i - 1, j] + e, # extend gap
111
+ F[_IY, i - 1, j] + d, # new gap
112
+ )
113
+ best_k = _argmax3(candidates_ix)
114
+ F[_IX, i, j] = candidates_ix[best_k]
115
+ B[_IX, i, j] = [best_k, i - 1, j]
116
+
117
+ # Iy: gap in query (skip query, consume target[j]).
118
+ candidates_iy = (
119
+ F[_M, i, j - 1] + d, # new gap
120
+ F[_IY, i, j - 1] + e, # extend gap
121
+ F[_IX, i, j - 1] + d, # new gap
122
+ )
123
+ best_k = _argmax3(candidates_iy)
124
+ F[_IY, i, j] = candidates_iy[best_k]
125
+ B[_IY, i, j] = [best_k, i, j - 1]
126
+
127
+ # --- Find best endpoint ---
128
+ end_scores = (F[_M, n, m], F[_IX, n, m], F[_IY, n, m])
129
+ best_end = _argmax3(end_scores)
130
+ score = end_scores[best_end]
131
+
132
+ # --- Traceback ---
133
+ align1, align2, gap_opens, gap_extensions = self._traceback(B, seq1, seq2, best_end, n, m)
134
+
135
+ return AffineAlignmentResult(
136
+ query=align1,
137
+ target=align2,
138
+ score=float(score),
139
+ length=len(align1),
140
+ gap_opens=gap_opens,
141
+ gap_extensions=gap_extensions,
142
+ )
143
+
144
+ @staticmethod
145
+ def _traceback(
146
+ B: np.ndarray,
147
+ seq1: list[int],
148
+ seq2: list[int],
149
+ start_k: int,
150
+ start_i: int,
151
+ start_j: int,
152
+ ) -> tuple[list[int], list[int], int, int]:
153
+ """Walk the traceback matrix to produce aligned sequences."""
154
+ align1: list[int] = []
155
+ align2: list[int] = []
156
+ gap_opens = 0
157
+ gap_extensions = 0
158
+
159
+ k, i, j = start_k, start_i, start_j
160
+ prev_k = -1
161
+
162
+ while i > 0 or j > 0:
163
+ from_k, from_i, from_j = int(B[k, i, j, 0]), int(B[k, i, j, 1]), int(B[k, i, j, 2])
164
+
165
+ if from_i < 0 or from_j < 0:
166
+ # Reached uninitialised border — shouldn't happen.
167
+ break
168
+
169
+ if k == _M:
170
+ # Diagonal: match/mismatch.
171
+ align1.append(seq1[i - 1])
172
+ align2.append(seq2[j - 1])
173
+ elif k == _IX:
174
+ # Gap in target.
175
+ align1.append(seq1[i - 1])
176
+ align2.append(0)
177
+ if prev_k != _IX:
178
+ gap_opens += 1
179
+ else:
180
+ gap_extensions += 1
181
+ else: # _IY
182
+ # Gap in query.
183
+ align1.append(0)
184
+ align2.append(seq2[j - 1])
185
+ if prev_k != _IY:
186
+ gap_opens += 1
187
+ else:
188
+ gap_extensions += 1
189
+
190
+ prev_k = k
191
+ k, i, j = from_k, from_i, from_j
192
+
193
+ align1.reverse()
194
+ align2.reverse()
195
+ return align1, align2, gap_opens, gap_extensions
196
+
197
+
198
+ def _argmax3(vals: tuple[float, float, float]) -> int:
199
+ """Return index of maximum among exactly three values."""
200
+ if vals[0] >= vals[1]:
201
+ return 0 if vals[0] >= vals[2] else 2
202
+ return 1 if vals[1] >= vals[2] else 2
@@ -0,0 +1,17 @@
1
+ """Relational sequence logos — position-specific profiles of logical atoms."""
2
+
3
+ from pyseqalign.logo.probability import FreqDist, LidstoneProbDist, MLEProbDist
4
+ from pyseqalign.logo.profile import PositionProfile, RelationalProfile
5
+ from pyseqalign.logo.render import column_ic, lgg_atoms, relational_logo, term_str
6
+
7
+ __all__ = [
8
+ 'FreqDist',
9
+ 'MLEProbDist',
10
+ 'LidstoneProbDist',
11
+ 'PositionProfile',
12
+ 'RelationalProfile',
13
+ 'relational_logo',
14
+ 'column_ic',
15
+ 'lgg_atoms',
16
+ 'term_str',
17
+ ]