pyseqalignment 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyseqalign/__init__.py +14 -0
- pyseqalign/core/__init__.py +12 -0
- pyseqalign/core/alignment.py +67 -0
- pyseqalign/core/needleman_wunsch.py +122 -0
- pyseqalign/core/smith_waterman.py +173 -0
- pyseqalign/learning/__init__.py +20 -0
- pyseqalign/learning/aleph.py +212 -0
- pyseqalign/learning/aleph_files/__init__.py +0 -0
- pyseqalign/learning/aleph_files/aleph_swi_ak.pl +10420 -0
- pyseqalign/learning/base.py +68 -0
- pyseqalign/learning/popper.py +215 -0
- pyseqalign/learning/task_builder.py +213 -0
- pyseqalign/prolog/__init__.py +5 -0
- pyseqalign/prolog/engine.py +102 -0
- pyseqalign/prolog/knowledge/__init__.py +0 -0
- pyseqalign/prolog/knowledge/amino_acids.pl +53 -0
- pyseqalign/prolog/knowledge/blosum50.pl +800 -0
- pyseqalign/prolog/knowledge/defaults.pl +15 -0
- pyseqalign/prolog/knowledge/distances.pl +119 -0
- pyseqalign/scoring/__init__.py +11 -0
- pyseqalign/scoring/distance.py +100 -0
- pyseqalign/scoring/matrices.py +362 -0
- pyseqalign/scoring/matrix_data/BLOSUM100 +31 -0
- pyseqalign/scoring/matrix_data/BLOSUM50 +31 -0
- pyseqalign/scoring/matrix_data/BLOSUM60 +31 -0
- pyseqalign/scoring/matrix_data/BLOSUM62 +31 -0
- pyseqalign/scoring/matrix_data/BLOSUM70 +31 -0
- pyseqalign/scoring/matrix_data/BLOSUM80 +31 -0
- pyseqalign/scoring/matrix_data/BLOSUM90 +31 -0
- pyseqalign/scoring/matrix_data/PAM150 +34 -0
- pyseqalign/scoring/matrix_data/PAM200 +34 -0
- pyseqalign/scoring/matrix_data/PAM250 +34 -0
- pyseqalign/scoring/matrix_data/PAM50 +34 -0
- pyseqalign/scoring/matrix_data/__init__.py +0 -0
- pyseqalign/utils/__init__.py +9 -0
- pyseqalign/utils/helpers.py +47 -0
- pyseqalignment-0.1.0.dist-info/METADATA +317 -0
- pyseqalignment-0.1.0.dist-info/RECORD +41 -0
- pyseqalignment-0.1.0.dist-info/WHEEL +5 -0
- pyseqalignment-0.1.0.dist-info/licenses/LICENSE +21 -0
- pyseqalignment-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Base types and protocol for ILP learning backends.
|
|
2
|
+
|
|
3
|
+
Defines the common interface that both Aleph and Popper backends implement.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Protocol, runtime_checkable
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class ILPTask:
|
|
15
|
+
"""Specification of an ILP learning task.
|
|
16
|
+
|
|
17
|
+
An ILP task consists of background knowledge, positive/negative examples,
|
|
18
|
+
and a language bias that constrains the hypothesis space.
|
|
19
|
+
|
|
20
|
+
Attributes:
|
|
21
|
+
background: Prolog clauses providing background knowledge (facts & rules).
|
|
22
|
+
positive: Positive example facts (goals that should succeed).
|
|
23
|
+
negative: Negative example facts (goals that should fail).
|
|
24
|
+
bias: Language bias declarations (mode declarations for Aleph,
|
|
25
|
+
head_pred/body_pred for Popper).
|
|
26
|
+
settings: Additional ILP system settings as key-value pairs.
|
|
27
|
+
work_dir: Directory for intermediate files. Created automatically
|
|
28
|
+
if not provided.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
background: list[str] = field(default_factory=list)
|
|
32
|
+
positive: list[str] = field(default_factory=list)
|
|
33
|
+
negative: list[str] = field(default_factory=list)
|
|
34
|
+
bias: list[str] = field(default_factory=list)
|
|
35
|
+
settings: dict[str, str] = field(default_factory=dict)
|
|
36
|
+
work_dir: Path | None = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class LearnedProgram:
|
|
41
|
+
"""Result of an ILP learning run.
|
|
42
|
+
|
|
43
|
+
Attributes:
|
|
44
|
+
clauses: The learned Prolog clauses (rules).
|
|
45
|
+
score: Quality score assigned by the learner (interpretation depends
|
|
46
|
+
on the backend -- accuracy for Popper, coverage for Aleph).
|
|
47
|
+
stats: Backend-specific statistics (runtime, nodes explored, etc.).
|
|
48
|
+
raw_output: Full textual output from the ILP system.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
clauses: list[str] = field(default_factory=list)
|
|
52
|
+
score: float = 0.0
|
|
53
|
+
stats: dict[str, object] = field(default_factory=dict)
|
|
54
|
+
raw_output: str = ""
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def program_text(self) -> str:
|
|
58
|
+
"""Return the learned clauses as a single Prolog program string."""
|
|
59
|
+
return "\n".join(self.clauses)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@runtime_checkable
|
|
63
|
+
class ILPLearner(Protocol):
|
|
64
|
+
"""Protocol that all ILP backends must implement."""
|
|
65
|
+
|
|
66
|
+
def learn(self, task: ILPTask) -> LearnedProgram:
|
|
67
|
+
"""Run the ILP learner on the given task and return the result."""
|
|
68
|
+
...
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""Popper ILP backend.
|
|
2
|
+
|
|
3
|
+
Popper (Cropper & Morel, 2021) is a modern ILP system that learns from
|
|
4
|
+
failures. It combines answer set programming (ASP) with Prolog to search
|
|
5
|
+
the hypothesis space efficiently.
|
|
6
|
+
|
|
7
|
+
Popper is the **recommended** backend for new projects. It can learn
|
|
8
|
+
recursive and optimal programs, handles noise, and does not require
|
|
9
|
+
metarules.
|
|
10
|
+
|
|
11
|
+
Install Popper with::
|
|
12
|
+
|
|
13
|
+
pip install popper-ilp
|
|
14
|
+
|
|
15
|
+
Or from the repository::
|
|
16
|
+
|
|
17
|
+
pip install git+https://github.com/logic-and-learning-lab/Popper@main
|
|
18
|
+
|
|
19
|
+
Requires SWI-Prolog (>=9.2.0) and Clingo (>=5.6.2).
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import importlib
|
|
25
|
+
import tempfile
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
from pyseqalign.learning.base import ILPTask, LearnedProgram
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class PopperLearner:
|
|
32
|
+
"""Popper ILP backend.
|
|
33
|
+
|
|
34
|
+
Uses the Popper Python API to learn Prolog clauses from alignment
|
|
35
|
+
examples.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
timeout: Maximum learning time in seconds (default 300).
|
|
39
|
+
max_literals: Maximum number of literals per clause (default 6).
|
|
40
|
+
max_vars: Maximum number of variables per clause (default 6).
|
|
41
|
+
noisy: Enable noise-tolerant learning (learns MDL programs).
|
|
42
|
+
eval_timeout: Timeout for evaluating each hypothesis in seconds.
|
|
43
|
+
extra_args: Additional keyword arguments passed to Popper's
|
|
44
|
+
``Settings`` constructor.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
timeout: int = 300,
|
|
50
|
+
max_literals: int = 6,
|
|
51
|
+
max_vars: int = 6,
|
|
52
|
+
noisy: bool = False,
|
|
53
|
+
eval_timeout: int = 10,
|
|
54
|
+
**extra_args: object,
|
|
55
|
+
) -> None:
|
|
56
|
+
self.timeout = timeout
|
|
57
|
+
self.max_literals = max_literals
|
|
58
|
+
self.max_vars = max_vars
|
|
59
|
+
self.noisy = noisy
|
|
60
|
+
self.eval_timeout = eval_timeout
|
|
61
|
+
self.extra_args = extra_args
|
|
62
|
+
self._check_available()
|
|
63
|
+
|
|
64
|
+
@staticmethod
|
|
65
|
+
def _check_available() -> None:
|
|
66
|
+
"""Verify that Popper is installed."""
|
|
67
|
+
try:
|
|
68
|
+
importlib.import_module("popper")
|
|
69
|
+
except ImportError as exc:
|
|
70
|
+
raise ImportError(
|
|
71
|
+
"Popper is not installed. Install with:\n"
|
|
72
|
+
" pip install popper-ilp\n"
|
|
73
|
+
"or:\n"
|
|
74
|
+
" pip install git+https://github.com/logic-and-learning-lab/Popper@main"
|
|
75
|
+
) from exc
|
|
76
|
+
|
|
77
|
+
def learn(self, task: ILPTask) -> LearnedProgram:
|
|
78
|
+
"""Run Popper on the given task.
|
|
79
|
+
|
|
80
|
+
Writes the task to Popper's expected directory format, calls the
|
|
81
|
+
Popper learning loop, and returns the result.
|
|
82
|
+
"""
|
|
83
|
+
from popper.loop import learn_solution
|
|
84
|
+
from popper.util import Settings
|
|
85
|
+
|
|
86
|
+
work_dir = task.work_dir or Path(tempfile.mkdtemp(prefix="pyseqalign_popper_"))
|
|
87
|
+
work_dir = Path(work_dir)
|
|
88
|
+
work_dir.mkdir(parents=True, exist_ok=True)
|
|
89
|
+
|
|
90
|
+
# Write Popper-format files.
|
|
91
|
+
self._write_popper_files(task, work_dir)
|
|
92
|
+
|
|
93
|
+
# Configure and run Popper.
|
|
94
|
+
settings = Settings(
|
|
95
|
+
kbpath=str(work_dir),
|
|
96
|
+
timeout=self.timeout,
|
|
97
|
+
eval_timeout=self.eval_timeout,
|
|
98
|
+
**self.extra_args,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
prog, score, stats = learn_solution(settings)
|
|
102
|
+
|
|
103
|
+
# Format results.
|
|
104
|
+
clauses: list[str] = []
|
|
105
|
+
raw_output = ""
|
|
106
|
+
if prog is not None:
|
|
107
|
+
import io
|
|
108
|
+
buf = io.StringIO()
|
|
109
|
+
settings.print_prog_score(prog, score, file=buf)
|
|
110
|
+
raw_output = buf.getvalue()
|
|
111
|
+
clauses = [str(clause) for clause in prog]
|
|
112
|
+
|
|
113
|
+
return LearnedProgram(
|
|
114
|
+
clauses=clauses,
|
|
115
|
+
score=score if score is not None else 0.0,
|
|
116
|
+
stats=dict(stats) if stats else {},
|
|
117
|
+
raw_output=raw_output,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# ------------------------------------------------------------------
|
|
121
|
+
# File writing
|
|
122
|
+
# ------------------------------------------------------------------
|
|
123
|
+
|
|
124
|
+
@staticmethod
|
|
125
|
+
def _write_popper_files(task: ILPTask, directory: Path) -> None:
|
|
126
|
+
"""Write Popper-format input files to *directory*."""
|
|
127
|
+
# Background knowledge (bk.pl).
|
|
128
|
+
(directory / "bk.pl").write_text("\n".join(task.background) + "\n")
|
|
129
|
+
|
|
130
|
+
# Examples (exs.pl) -- Popper uses pos()/neg() wrappers.
|
|
131
|
+
exs_lines: list[str] = []
|
|
132
|
+
for p in task.positive:
|
|
133
|
+
fact = p.rstrip(".")
|
|
134
|
+
exs_lines.append(f"pos({fact}).")
|
|
135
|
+
for n in task.negative:
|
|
136
|
+
fact = n.rstrip(".")
|
|
137
|
+
exs_lines.append(f"neg({fact}).")
|
|
138
|
+
(directory / "exs.pl").write_text("\n".join(exs_lines) + "\n")
|
|
139
|
+
|
|
140
|
+
# Bias (bias.pl) -- Popper's hypothesis language spec.
|
|
141
|
+
(directory / "bias.pl").write_text("\n".join(task.bias) + "\n")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class PopperFallbackLearner:
|
|
145
|
+
"""Fallback Popper learner that calls Popper via subprocess.
|
|
146
|
+
|
|
147
|
+
Use this when you cannot import Popper directly (e.g., version
|
|
148
|
+
conflicts) but have the ``popper.py`` script available on PATH.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
popper_cmd: Command to run Popper (default ``"python -m popper"``).
|
|
152
|
+
timeout: Maximum learning time in seconds.
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
def __init__(
|
|
156
|
+
self,
|
|
157
|
+
popper_cmd: str = "python -m popper",
|
|
158
|
+
timeout: int = 300,
|
|
159
|
+
) -> None:
|
|
160
|
+
self.popper_cmd = popper_cmd
|
|
161
|
+
self.timeout = timeout
|
|
162
|
+
|
|
163
|
+
def learn(self, task: ILPTask) -> LearnedProgram:
|
|
164
|
+
"""Run Popper via subprocess."""
|
|
165
|
+
import subprocess
|
|
166
|
+
|
|
167
|
+
work_dir = task.work_dir or Path(tempfile.mkdtemp(prefix="pyseqalign_popper_"))
|
|
168
|
+
work_dir = Path(work_dir)
|
|
169
|
+
work_dir.mkdir(parents=True, exist_ok=True)
|
|
170
|
+
|
|
171
|
+
PopperLearner._write_popper_files(task, work_dir)
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
result = subprocess.run(
|
|
175
|
+
self.popper_cmd.split() + [str(work_dir)],
|
|
176
|
+
capture_output=True,
|
|
177
|
+
text=True,
|
|
178
|
+
timeout=self.timeout,
|
|
179
|
+
)
|
|
180
|
+
raw_output = result.stdout + result.stderr
|
|
181
|
+
except FileNotFoundError:
|
|
182
|
+
raise RuntimeError(
|
|
183
|
+
f"Popper not found via '{self.popper_cmd}'. "
|
|
184
|
+
"Install with: pip install popper-ilp"
|
|
185
|
+
)
|
|
186
|
+
except subprocess.TimeoutExpired:
|
|
187
|
+
return LearnedProgram(
|
|
188
|
+
raw_output=f"Popper timed out after {self.timeout}s",
|
|
189
|
+
stats={"timeout": True},
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Parse stdout for learned clauses.
|
|
193
|
+
clauses = self._parse_output(raw_output)
|
|
194
|
+
return LearnedProgram(
|
|
195
|
+
clauses=clauses,
|
|
196
|
+
raw_output=raw_output,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
@staticmethod
|
|
200
|
+
def _parse_output(raw_output: str) -> list[str]:
|
|
201
|
+
"""Extract clauses from Popper's stdout."""
|
|
202
|
+
clauses: list[str] = []
|
|
203
|
+
in_program = False
|
|
204
|
+
for line in raw_output.splitlines():
|
|
205
|
+
stripped = line.strip()
|
|
206
|
+
if stripped.startswith("% ") or stripped == "":
|
|
207
|
+
if in_program and stripped == "":
|
|
208
|
+
in_program = False
|
|
209
|
+
continue
|
|
210
|
+
if ":-" in stripped or (stripped.endswith(".") and not stripped.startswith("%")):
|
|
211
|
+
# Looks like a Prolog clause.
|
|
212
|
+
if not stripped.startswith("pos(") and not stripped.startswith("neg("):
|
|
213
|
+
clauses.append(stripped)
|
|
214
|
+
in_program = True
|
|
215
|
+
return clauses
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
"""Helpers for constructing ILP tasks from alignment data.
|
|
2
|
+
|
|
3
|
+
Converts alignment examples into the background knowledge, positive/negative
|
|
4
|
+
examples, and bias declarations needed by ILP learners.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from collections.abc import Sequence
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from pyseqalign.learning.base import ILPTask
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _seq_to_prolog_list(seq: Sequence[int]) -> str:
|
|
16
|
+
"""Convert a Python integer list to a Prolog list string."""
|
|
17
|
+
return "[" + ",".join(str(x) for x in seq) + "]"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AlignmentTaskBuilder:
|
|
21
|
+
"""Build ILP tasks from alignment training data.
|
|
22
|
+
|
|
23
|
+
This builder generates the Prolog facts and bias declarations required
|
|
24
|
+
to learn distance/scoring predicates or alignment classification rules
|
|
25
|
+
from labelled sequence pairs.
|
|
26
|
+
|
|
27
|
+
Example usage::
|
|
28
|
+
|
|
29
|
+
builder = AlignmentTaskBuilder()
|
|
30
|
+
builder.add_positive_pair([1, 2, 3], [1, 2, 4], label="similar")
|
|
31
|
+
builder.add_negative_pair([1, 2, 3], [10, 11, 12], label="similar")
|
|
32
|
+
builder.add_background_fact("amino_acid(1, a).")
|
|
33
|
+
task = builder.build()
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self) -> None:
|
|
37
|
+
self._positive: list[str] = []
|
|
38
|
+
self._negative: list[str] = []
|
|
39
|
+
self._background: list[str] = []
|
|
40
|
+
self._bias: list[str] = []
|
|
41
|
+
self._settings: dict[str, str] = {}
|
|
42
|
+
self._pair_id: int = 0
|
|
43
|
+
|
|
44
|
+
# ------------------------------------------------------------------
|
|
45
|
+
# Adding examples
|
|
46
|
+
# ------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
def add_positive_pair(
|
|
49
|
+
self,
|
|
50
|
+
seq1: Sequence[int],
|
|
51
|
+
seq2: Sequence[int],
|
|
52
|
+
label: str = "similar",
|
|
53
|
+
) -> AlignmentTaskBuilder:
|
|
54
|
+
"""Add a positive example pair (sequences that *should* be related).
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
seq1: First sequence as integer IDs.
|
|
58
|
+
seq2: Second sequence as integer IDs.
|
|
59
|
+
label: Predicate name for the relation being learned.
|
|
60
|
+
"""
|
|
61
|
+
self._pair_id += 1
|
|
62
|
+
pid = self._pair_id
|
|
63
|
+
s1 = _seq_to_prolog_list(seq1)
|
|
64
|
+
s2 = _seq_to_prolog_list(seq2)
|
|
65
|
+
self._positive.append(f"{label}(p{pid}).")
|
|
66
|
+
self._background.append(f"seq1(p{pid},{s1}).")
|
|
67
|
+
self._background.append(f"seq2(p{pid},{s2}).")
|
|
68
|
+
return self
|
|
69
|
+
|
|
70
|
+
def add_negative_pair(
|
|
71
|
+
self,
|
|
72
|
+
seq1: Sequence[int],
|
|
73
|
+
seq2: Sequence[int],
|
|
74
|
+
label: str = "similar",
|
|
75
|
+
) -> AlignmentTaskBuilder:
|
|
76
|
+
"""Add a negative example pair (sequences that should *not* be related)."""
|
|
77
|
+
self._pair_id += 1
|
|
78
|
+
pid = self._pair_id
|
|
79
|
+
s1 = _seq_to_prolog_list(seq1)
|
|
80
|
+
s2 = _seq_to_prolog_list(seq2)
|
|
81
|
+
self._negative.append(f"{label}(p{pid}).")
|
|
82
|
+
self._background.append(f"seq1(p{pid},{s1}).")
|
|
83
|
+
self._background.append(f"seq2(p{pid},{s2}).")
|
|
84
|
+
return self
|
|
85
|
+
|
|
86
|
+
def add_background_fact(self, fact: str) -> AlignmentTaskBuilder:
|
|
87
|
+
"""Add a raw Prolog fact to the background knowledge."""
|
|
88
|
+
self._background.append(fact)
|
|
89
|
+
return self
|
|
90
|
+
|
|
91
|
+
def add_background_rule(self, rule: str) -> AlignmentTaskBuilder:
|
|
92
|
+
"""Add a raw Prolog rule to the background knowledge."""
|
|
93
|
+
self._background.append(rule)
|
|
94
|
+
return self
|
|
95
|
+
|
|
96
|
+
# ------------------------------------------------------------------
|
|
97
|
+
# Bias / language declarations
|
|
98
|
+
# ------------------------------------------------------------------
|
|
99
|
+
|
|
100
|
+
def add_bias(self, declaration: str) -> AlignmentTaskBuilder:
|
|
101
|
+
"""Add a raw bias declaration (mode, determination, head_pred, etc.)."""
|
|
102
|
+
self._bias.append(declaration)
|
|
103
|
+
return self
|
|
104
|
+
|
|
105
|
+
def set_setting(self, key: str, value: str) -> AlignmentTaskBuilder:
|
|
106
|
+
"""Set an ILP system parameter."""
|
|
107
|
+
self._settings[key] = value
|
|
108
|
+
return self
|
|
109
|
+
|
|
110
|
+
def use_default_alignment_bias_aleph(
|
|
111
|
+
self, label: str = "similar"
|
|
112
|
+
) -> AlignmentTaskBuilder:
|
|
113
|
+
"""Add default Aleph mode/determination declarations for sequence alignment.
|
|
114
|
+
|
|
115
|
+
Sets up modes to learn rules about when two sequences are *label*.
|
|
116
|
+
"""
|
|
117
|
+
self._bias.extend([
|
|
118
|
+
f":- modeh(1, {label}(+pair)).",
|
|
119
|
+
":- modeb(*, seq1(+pair, -list)).",
|
|
120
|
+
":- modeb(*, seq2(+pair, -list)).",
|
|
121
|
+
":- modeb(*, member(-int, +list)).",
|
|
122
|
+
":- modeb(1, length(+list, -int)).",
|
|
123
|
+
":- modeb(1, sw_score(+pair, -float)).",
|
|
124
|
+
":- modeb(1, nw_score(+pair, -float)).",
|
|
125
|
+
":- modeb(1, score_above(+float, #float)).",
|
|
126
|
+
f":- determination({label}/1, seq1/2).",
|
|
127
|
+
f":- determination({label}/1, seq2/2).",
|
|
128
|
+
f":- determination({label}/1, member/2).",
|
|
129
|
+
f":- determination({label}/1, length/2).",
|
|
130
|
+
f":- determination({label}/1, sw_score/2).",
|
|
131
|
+
f":- determination({label}/1, nw_score/2).",
|
|
132
|
+
f":- determination({label}/1, score_above/2).",
|
|
133
|
+
])
|
|
134
|
+
return self
|
|
135
|
+
|
|
136
|
+
def use_default_alignment_bias_popper(
|
|
137
|
+
self, label: str = "similar"
|
|
138
|
+
) -> AlignmentTaskBuilder:
|
|
139
|
+
"""Add default Popper bias declarations for sequence alignment."""
|
|
140
|
+
self._bias.extend([
|
|
141
|
+
f"head_pred({label},1).",
|
|
142
|
+
"body_pred(seq1,2).",
|
|
143
|
+
"body_pred(seq2,2).",
|
|
144
|
+
"body_pred(member,2).",
|
|
145
|
+
"body_pred(length,2).",
|
|
146
|
+
"body_pred(sw_score,2).",
|
|
147
|
+
"body_pred(nw_score,2).",
|
|
148
|
+
"body_pred(score_above,2).",
|
|
149
|
+
"max_body(6).",
|
|
150
|
+
"max_vars(6).",
|
|
151
|
+
])
|
|
152
|
+
return self
|
|
153
|
+
|
|
154
|
+
# ------------------------------------------------------------------
|
|
155
|
+
# Build
|
|
156
|
+
# ------------------------------------------------------------------
|
|
157
|
+
|
|
158
|
+
def build(self, work_dir: Path | None = None) -> ILPTask:
|
|
159
|
+
"""Construct the ILP task from the accumulated data."""
|
|
160
|
+
return ILPTask(
|
|
161
|
+
background=list(self._background),
|
|
162
|
+
positive=list(self._positive),
|
|
163
|
+
negative=list(self._negative),
|
|
164
|
+
bias=list(self._bias),
|
|
165
|
+
settings=dict(self._settings),
|
|
166
|
+
work_dir=work_dir,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
def write_files(self, directory: Path, name: str = "alignment") -> ILPTask:
|
|
170
|
+
"""Write .b, .f, .n, and bias files to *directory* and return the task.
|
|
171
|
+
|
|
172
|
+
This produces files in the format expected by both Aleph and Popper.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
directory: Target directory (created if it doesn't exist).
|
|
176
|
+
name: Base filename (without extension).
|
|
177
|
+
"""
|
|
178
|
+
directory = Path(directory)
|
|
179
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
180
|
+
|
|
181
|
+
task = self.build(work_dir=directory)
|
|
182
|
+
|
|
183
|
+
# Background knowledge
|
|
184
|
+
bk_path = directory / f"{name}.b"
|
|
185
|
+
bk_lines = list(task.bias) + [""] + list(task.background)
|
|
186
|
+
if task.settings:
|
|
187
|
+
for k, v in task.settings.items():
|
|
188
|
+
bk_lines.insert(0, f":- set({k},{v}).")
|
|
189
|
+
bk_path.write_text("\n".join(bk_lines) + "\n")
|
|
190
|
+
|
|
191
|
+
# Positive examples
|
|
192
|
+
(directory / f"{name}.f").write_text("\n".join(task.positive) + "\n")
|
|
193
|
+
|
|
194
|
+
# Negative examples
|
|
195
|
+
(directory / f"{name}.n").write_text("\n".join(task.negative) + "\n")
|
|
196
|
+
|
|
197
|
+
# Popper-style separate bias file (also useful for Aleph via .b)
|
|
198
|
+
(directory / "bias.pl").write_text("\n".join(task.bias) + "\n")
|
|
199
|
+
|
|
200
|
+
# Popper exs.pl (combines pos/neg)
|
|
201
|
+
popper_exs = []
|
|
202
|
+
for p in task.positive:
|
|
203
|
+
fact = p.rstrip(".")
|
|
204
|
+
popper_exs.append(f"pos({fact}).")
|
|
205
|
+
for n in task.negative:
|
|
206
|
+
fact = n.rstrip(".")
|
|
207
|
+
popper_exs.append(f"neg({fact}).")
|
|
208
|
+
(directory / "exs.pl").write_text("\n".join(popper_exs) + "\n")
|
|
209
|
+
|
|
210
|
+
# Popper bk.pl (background only, no bias)
|
|
211
|
+
(directory / "bk.pl").write_text("\n".join(task.background) + "\n")
|
|
212
|
+
|
|
213
|
+
return task
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""SWI-Prolog engine wrapper for computing distance/scoring via Prolog predicates.
|
|
2
|
+
|
|
3
|
+
This replaces the legacy YAP Prolog bindings (yapBind.c) with SWI-Prolog
|
|
4
|
+
via the ``janus_swi`` package (the modern Python↔SWI-Prolog bridge).
|
|
5
|
+
The engine implements the ``ScoringFunction`` protocol so it can be passed
|
|
6
|
+
directly to ``SmithWaterman`` or ``NeedlemanWunsch``.
|
|
7
|
+
|
|
8
|
+
Requires the ``prolog`` optional dependency::
|
|
9
|
+
|
|
10
|
+
pip install pyseqalign[prolog]
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import importlib
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PrologEngine:
|
|
20
|
+
"""SWI-Prolog based scoring function using Janus.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
dist_mode: ``"sym"`` for similarity mode (1 - distance) or
|
|
24
|
+
``"dist"`` for raw distance. Matches the legacy ``dist/7``
|
|
25
|
+
predicate convention.
|
|
26
|
+
distance_type: Name of the distance type predicate (default
|
|
27
|
+
``"atomDistance"``).
|
|
28
|
+
distance_method: Distance method identifier (default ``"nc"`` for
|
|
29
|
+
Nienhuys-Cheng).
|
|
30
|
+
iteration: Iteration parameter passed to ``dist/7`` (default ``0``).
|
|
31
|
+
gap_default: Default score returned for gap characters.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
dist_mode: str = "sym",
|
|
37
|
+
distance_type: str = "atomDistance",
|
|
38
|
+
distance_method: str = "nc",
|
|
39
|
+
iteration: int = 0,
|
|
40
|
+
gap_default: float = -8.0,
|
|
41
|
+
) -> None:
|
|
42
|
+
self.dist_mode = dist_mode
|
|
43
|
+
self.distance_type = distance_type
|
|
44
|
+
self.distance_method = distance_method
|
|
45
|
+
self.iteration = iteration
|
|
46
|
+
self.gap_default = gap_default
|
|
47
|
+
|
|
48
|
+
# Lazy import so the package works without janus_swi installed.
|
|
49
|
+
try:
|
|
50
|
+
self._janus = importlib.import_module("janus_swi")
|
|
51
|
+
except ImportError as exc:
|
|
52
|
+
raise ImportError(
|
|
53
|
+
"janus_swi is required for Prolog support. "
|
|
54
|
+
"Install with: pip install pyseqalign[prolog]"
|
|
55
|
+
) from exc
|
|
56
|
+
|
|
57
|
+
def consult(self, path: str | Path) -> None:
|
|
58
|
+
"""Load a Prolog file into the engine."""
|
|
59
|
+
self._janus.consult(str(path))
|
|
60
|
+
|
|
61
|
+
def assert_fact(self, term: str) -> None:
|
|
62
|
+
"""Assert a Prolog fact."""
|
|
63
|
+
self._janus.query_once(f"assert({term})")
|
|
64
|
+
|
|
65
|
+
def retract_fact(self, term: str) -> None:
|
|
66
|
+
"""Retract a Prolog fact."""
|
|
67
|
+
self._janus.query_once(f"retract({term})")
|
|
68
|
+
|
|
69
|
+
def call(self, goal: str) -> list[dict]:
|
|
70
|
+
"""Execute a Prolog goal and return all solution bindings."""
|
|
71
|
+
return list(self._janus.query(goal))
|
|
72
|
+
|
|
73
|
+
def call_once(self, goal: str) -> dict:
|
|
74
|
+
"""Execute a Prolog goal and return the first solution."""
|
|
75
|
+
return self._janus.query_once(goal)
|
|
76
|
+
|
|
77
|
+
def score(self, a: int, b: int) -> float:
|
|
78
|
+
"""Compute the score between element IDs *a* and *b* via Prolog.
|
|
79
|
+
|
|
80
|
+
Calls the ``dist/7`` predicate:
|
|
81
|
+
``dist(Mode, Type, Method, Iteration, A, B, D)``
|
|
82
|
+
and returns the bound value of ``D``.
|
|
83
|
+
"""
|
|
84
|
+
if a == 0 or b == 0:
|
|
85
|
+
return self.gap_default
|
|
86
|
+
|
|
87
|
+
query = (
|
|
88
|
+
f"dist({self.dist_mode},{self.distance_type},"
|
|
89
|
+
f"{self.distance_method},{self.iteration},{a},{b},D)"
|
|
90
|
+
)
|
|
91
|
+
result = self._janus.query_once(query)
|
|
92
|
+
if result["truth"]:
|
|
93
|
+
return float(result["D"])
|
|
94
|
+
return -100.0
|
|
95
|
+
|
|
96
|
+
def consult_knowledge_base(self) -> None:
|
|
97
|
+
"""Load the bundled Prolog knowledge base files (amino acids, BLOSUM50, distances)."""
|
|
98
|
+
kb_dir = Path(__file__).parent / "knowledge"
|
|
99
|
+
for pl_file in ["amino_acids.pl", "distances.pl", "blosum50.pl"]:
|
|
100
|
+
path = kb_dir / pl_file
|
|
101
|
+
if path.exists():
|
|
102
|
+
self.consult(path)
|
|
File without changes
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
example(0,'-').
|
|
2
|
+
example(1,'a').
|
|
3
|
+
example(2,'r').
|
|
4
|
+
example(3,'n').
|
|
5
|
+
example(4,'d').
|
|
6
|
+
example(5,'c').
|
|
7
|
+
example(6,'q').
|
|
8
|
+
example(7,'e').
|
|
9
|
+
example(8,'g').
|
|
10
|
+
example(9,'h').
|
|
11
|
+
example(10,'i').
|
|
12
|
+
example(11,'l').
|
|
13
|
+
example(12,'k').
|
|
14
|
+
example(13,'m').
|
|
15
|
+
example(14,'f').
|
|
16
|
+
example(15,'p').
|
|
17
|
+
example(16,'s').
|
|
18
|
+
example(17,'t').
|
|
19
|
+
example(18,'w').
|
|
20
|
+
example(19,'y').
|
|
21
|
+
example(20,'v').
|
|
22
|
+
|
|
23
|
+
getExampleIDs([],[]).
|
|
24
|
+
getExampleIDs([Haa|Taa],[Hid|Tid]) :-
|
|
25
|
+
example(Hid,Haa),
|
|
26
|
+
getExampleIDs(Taa,Tid).
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
%MAPFQSNKDL
|
|
30
|
+
%A = [m,a,p,f,q,s,n,k,d,l],
|
|
31
|
+
%S1 = [13,1,15,14,6,16,3,12,4,11] ?
|
|
32
|
+
%A = ['m', 'a', 'p', 'f', 'q', 's', 'n', 'k', 'd', 'l'], getExampleIDs(A,S1).
|
|
33
|
+
sequence(1,[13,1,15,14,6,16,3,12,4,11]).
|
|
34
|
+
|
|
35
|
+
% MLAPFEKTAAARSII
|
|
36
|
+
%getExampleIDs(['m', 'l', 'a', 'p', 'f', 'e', 'k', 't', 'a', 'a', 'a', 'r', 's', 'i', 'i'],Seq2).
|
|
37
|
+
sequence(2,[13,11,1,15,14,7,12,17,1,1,1,2,16,10,10]).
|
|
38
|
+
|
|
39
|
+
%getExampleIDs(['h', 'e', 'a', 'g', 'a', 'w', 'g', 'h', 'e', 'e'],Seq3)
|
|
40
|
+
sequence(3,[9,7,1,8,1,18,8,9,7,7]).
|
|
41
|
+
%getExampleIDs(['p', 'a', 'w', 'h', 'e', 'a', 'e'],Seq4)
|
|
42
|
+
sequence(4,[15,1,18,9,7,1,7]).
|
|
43
|
+
|
|
44
|
+
ppAAList([]) :- nl.
|
|
45
|
+
ppAAList([H|T]) :-
|
|
46
|
+
format("~w",[H]),
|
|
47
|
+
ppAAList(T).
|
|
48
|
+
|
|
49
|
+
convertToString(TheIndexList) :-
|
|
50
|
+
getExampleIDs(TheAAList,TheIndexList),
|
|
51
|
+
ppAAList(TheAAList).
|
|
52
|
+
|
|
53
|
+
|