abi-agent 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abi/__init__.py +57 -0
- abi/_compat/__init__.py +1 -0
- abi/_compat/errors.py +7 -0
- abi/_compat/filesystem.py +7 -0
- abi/_compat/logger.py +105 -0
- abi/_compat/progress.py +219 -0
- abi/_compat/skills/__init__.py +1 -0
- abi/_compat/skills/base.py +307 -0
- abi/_compat/skills/registry.py +154 -0
- abi/agent/__init__.py +5 -0
- abi/agent/interface.py +588 -0
- abi/cli.py +604 -0
- abi/config.py +73 -0
- abi/dag.py +150 -0
- abi/dev_setup.py +59 -0
- abi/errors.py +26 -0
- abi/executor.py +416 -0
- abi/exporters/__init__.py +5 -0
- abi/exporters/nextflow.py +362 -0
- abi/filesystem.py +20 -0
- abi/interfaces.py +58 -0
- abi/openai_contracts.py +186 -0
- abi/plugins/__init__.py +131 -0
- abi/plugins/metagenomic_plasmid.py +130 -0
- abi/plugins/metatranscriptomics.py +273 -0
- abi/provenance.py +19 -0
- abi/py.typed +0 -0
- abi/report.py +106 -0
- abi/results.py +230 -0
- abi/runtimes/__init__.py +14 -0
- abi/runtimes/base.py +36 -0
- abi/runtimes/local.py +46 -0
- abi/runtimes/nextflow.py +316 -0
- abi/schemas.py +99 -0
- abi/tables.py +97 -0
- abi/testing.py +42 -0
- abi/tools.py +13 -0
- abi_agent-0.1.0.dist-info/METADATA +269 -0
- abi_agent-0.1.0.dist-info/RECORD +51 -0
- abi_agent-0.1.0.dist-info/WHEEL +4 -0
- abi_agent-0.1.0.dist-info/entry_points.txt +7 -0
- abi_agent-0.1.0.dist-info/licenses/LICENSE +21 -0
- plugins/metagenomic_plasmid/README.md +12 -0
- plugins/metatranscriptomics/config_default.yaml +21 -0
- plugins/metatranscriptomics/sample_sheet_template.tsv +2 -0
- plugins/metatranscriptomics/skills/fastp/SKILL.md +33 -0
- plugins/metatranscriptomics/skills/featurecounts/SKILL.md +32 -0
- plugins/metatranscriptomics/skills/hisat2/SKILL.md +32 -0
- plugins/metatranscriptomics/skills/star/SKILL.md +31 -0
- plugins/metatranscriptomics/standard_tables.yaml +22 -0
- plugins/metatranscriptomics/tool_registry.yaml +41 -0
abi/__init__.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Agent-Bioinformatics Interface package."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
__all__ = ["__version__"]
|
|
10
|
+
|
|
11
|
+
__version__ = "0.1.0"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _warn_if_wrong_location() -> None:
|
|
15
|
+
"""Emit a warning when a conflicting ``abi`` package shadows this one.
|
|
16
|
+
|
|
17
|
+
This can happen when another project that also contains an ``abi``
|
|
18
|
+
package (e.g. an older editable install of ``autoplasm`` from
|
|
19
|
+
PlasimSkillsForAgent) appears earlier on ``sys.path``.
|
|
20
|
+
|
|
21
|
+
Run ``python scripts/dev_setup.py`` (or ``abi-dev-setup``) to
|
|
22
|
+
install a priority ``.pth`` file that guarantees the correct
|
|
23
|
+
package is found first.
|
|
24
|
+
"""
|
|
25
|
+
expected_marker = os.environ.get("ABI_SRC_ROOT", "")
|
|
26
|
+
if expected_marker:
|
|
27
|
+
expected = Path(expected_marker) / "abi" / "__init__.py"
|
|
28
|
+
actual = Path(__file__).resolve()
|
|
29
|
+
if expected.resolve() != actual:
|
|
30
|
+
import warnings
|
|
31
|
+
|
|
32
|
+
warnings.warn(
|
|
33
|
+
f"abi package loaded from unexpected location:\n"
|
|
34
|
+
f" loaded : {actual}\n"
|
|
35
|
+
f" expected: {expected}\n"
|
|
36
|
+
f"Run: python scripts/dev_setup.py (or: abi-dev-setup)",
|
|
37
|
+
stacklevel=2,
|
|
38
|
+
)
|
|
39
|
+
# Best-effort detection without ABI_SRC_ROOT: look for PlasimSkillsForAgent shadowing.
|
|
40
|
+
current = Path(__file__).resolve()
|
|
41
|
+
for entry in sys.path:
|
|
42
|
+
entry_path = Path(entry)
|
|
43
|
+
if entry_path.name == "src" and "PlasimSkillsForAgent" in str(entry_path):
|
|
44
|
+
candidate_init = entry_path / "abi" / "__init__.py"
|
|
45
|
+
if candidate_init.resolve() == current:
|
|
46
|
+
import warnings
|
|
47
|
+
|
|
48
|
+
warnings.warn(
|
|
49
|
+
f"abi was loaded from PlasimSkillsForAgent ({current}), "
|
|
50
|
+
f"not from the standalone abi-agent project.\n"
|
|
51
|
+
f"Run: python scripts/dev_setup.py (or: abi-dev-setup)",
|
|
52
|
+
stacklevel=2,
|
|
53
|
+
)
|
|
54
|
+
break
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
_warn_if_wrong_location()
|
abi/_compat/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Internal compatibility modules for ABI."""
|
abi/_compat/errors.py
ADDED
abi/_compat/logger.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Structured logging and provenance helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import shlex
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Dict, Iterable, Mapping
|
|
10
|
+
|
|
11
|
+
from abi.filesystem import ensure_directory
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RunLogger:
|
|
15
|
+
def __init__(self, log_dir: str | Path) -> None:
|
|
16
|
+
self.log_dir = ensure_directory(log_dir, label="Log directory")
|
|
17
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
18
|
+
self.log_file = self.log_dir / f"log_abi_{timestamp}.log"
|
|
19
|
+
|
|
20
|
+
def log_event(self, event: str, payload: Mapping[str, Any]) -> None:
|
|
21
|
+
record = {
|
|
22
|
+
"timestamp": datetime.now().isoformat(timespec="seconds"),
|
|
23
|
+
"event": event,
|
|
24
|
+
"payload": dict(payload),
|
|
25
|
+
}
|
|
26
|
+
with self.log_file.open("a", encoding="utf-8") as handle:
|
|
27
|
+
handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True) + "\n")
|
|
28
|
+
|
|
29
|
+
def log_step(
|
|
30
|
+
self,
|
|
31
|
+
step: Any,
|
|
32
|
+
*,
|
|
33
|
+
command: Iterable[str] | str,
|
|
34
|
+
status: str,
|
|
35
|
+
error_message: str | None = None,
|
|
36
|
+
) -> None:
|
|
37
|
+
command_text = command if isinstance(command, str) else _display_command(command)
|
|
38
|
+
payload: Dict[str, Any] = {
|
|
39
|
+
"sample_id": getattr(step, "sample_id", None),
|
|
40
|
+
"step_name": getattr(step, "step_name", None),
|
|
41
|
+
"tool_name": getattr(step, "tool_id", None),
|
|
42
|
+
"command": command_text,
|
|
43
|
+
"input_files": getattr(step, "inputs", {}),
|
|
44
|
+
"output_files": getattr(step, "outputs", {}),
|
|
45
|
+
"parameters": getattr(step, "params", {}),
|
|
46
|
+
"status": status,
|
|
47
|
+
"duration": 0,
|
|
48
|
+
"error_message": error_message,
|
|
49
|
+
}
|
|
50
|
+
self.log_event("pipeline_step", payload)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def write_commands_tsv(rows: Iterable[Mapping[str, Any]], path: str | Path) -> Path:
|
|
54
|
+
commands_path = Path(path)
|
|
55
|
+
commands_path.parent.mkdir(parents=True, exist_ok=True)
|
|
56
|
+
fields = [
|
|
57
|
+
"step_id",
|
|
58
|
+
"sample_id",
|
|
59
|
+
"step_name",
|
|
60
|
+
"tool_id",
|
|
61
|
+
"category",
|
|
62
|
+
"command",
|
|
63
|
+
"status",
|
|
64
|
+
"return_code",
|
|
65
|
+
"reason",
|
|
66
|
+
"parsed_status",
|
|
67
|
+
"standard_tables",
|
|
68
|
+
]
|
|
69
|
+
with commands_path.open("w", encoding="utf-8") as handle:
|
|
70
|
+
handle.write("\t".join(fields) + "\n")
|
|
71
|
+
for row in rows:
|
|
72
|
+
handle.write("\t".join(_tsv_value(row.get(field, "")) for field in fields) + "\n")
|
|
73
|
+
return commands_path
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def write_tool_versions(rows: Iterable[Mapping[str, Any]], path: str | Path) -> Path:
|
|
77
|
+
versions_path = Path(path)
|
|
78
|
+
versions_path.parent.mkdir(parents=True, exist_ok=True)
|
|
79
|
+
fields = ["tool_id", "executable", "env_name", "version", "status"]
|
|
80
|
+
with versions_path.open("w", encoding="utf-8") as handle:
|
|
81
|
+
handle.write("\t".join(fields) + "\n")
|
|
82
|
+
for row in rows:
|
|
83
|
+
handle.write("\t".join(_tsv_value(row.get(field, "")) for field in fields) + "\n")
|
|
84
|
+
return versions_path
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def write_resolved_inputs_tsv(rows: Iterable[Mapping[str, Any]], path: str | Path) -> Path:
|
|
88
|
+
inputs_path = Path(path)
|
|
89
|
+
inputs_path.parent.mkdir(parents=True, exist_ok=True)
|
|
90
|
+
fields = ["step_id", "tool_id", "sample_id", "input_name", "path", "exists", "source"]
|
|
91
|
+
with inputs_path.open("w", encoding="utf-8") as handle:
|
|
92
|
+
handle.write("\t".join(fields) + "\n")
|
|
93
|
+
for row in rows:
|
|
94
|
+
handle.write("\t".join(_tsv_value(row.get(field, "")) for field in fields) + "\n")
|
|
95
|
+
return inputs_path
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _tsv_value(value: Any) -> str:
|
|
99
|
+
if value is None:
|
|
100
|
+
return ""
|
|
101
|
+
return str(value)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _display_command(command: Iterable[str]) -> str:
|
|
105
|
+
return " ".join(">" if token == ">" else shlex.quote(token) for token in command)
|
abi/_compat/progress.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
"""Progress event recording for pipeline execution."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import threading
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Dict, Mapping
|
|
10
|
+
|
|
11
|
+
from abi.filesystem import ensure_directory
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PipelineProgressRecorder:
|
|
15
|
+
"""Thread-safe writer for live pipeline progress.
|
|
16
|
+
|
|
17
|
+
The JSONL stream is append-only for auditability. The JSON snapshot is for
|
|
18
|
+
dashboards and other polling clients that need the current state quickly.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, provenance_dir: str | Path) -> None:
|
|
22
|
+
self.provenance_dir = ensure_directory(provenance_dir, label="Provenance directory")
|
|
23
|
+
self.events_path = self.provenance_dir / "progress.jsonl"
|
|
24
|
+
self.snapshot_path = self.provenance_dir / "progress.json"
|
|
25
|
+
self._lock = threading.Lock()
|
|
26
|
+
self._snapshot: Dict[str, Any] = {}
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def paths(self) -> Dict[str, Path]:
|
|
30
|
+
return {"events": self.events_path, "snapshot": self.snapshot_path}
|
|
31
|
+
|
|
32
|
+
def start_run(
|
|
33
|
+
self,
|
|
34
|
+
plan: Any,
|
|
35
|
+
*,
|
|
36
|
+
dry_run: bool,
|
|
37
|
+
parallel: bool,
|
|
38
|
+
workers: int,
|
|
39
|
+
) -> None:
|
|
40
|
+
steps = [
|
|
41
|
+
{
|
|
42
|
+
"step_id": step.step_id,
|
|
43
|
+
"sample_id": step.sample_id or "",
|
|
44
|
+
"step_name": step.step_name,
|
|
45
|
+
"tool_id": step.tool_id,
|
|
46
|
+
"category": step.category,
|
|
47
|
+
"status": "pending",
|
|
48
|
+
"reason": step.reason or "",
|
|
49
|
+
"return_code": "",
|
|
50
|
+
"parsed_status": "",
|
|
51
|
+
"standard_tables": "",
|
|
52
|
+
"started_at": "",
|
|
53
|
+
"finished_at": "",
|
|
54
|
+
}
|
|
55
|
+
for step in plan.steps
|
|
56
|
+
]
|
|
57
|
+
sample_status = {
|
|
58
|
+
sample.sample_id: {
|
|
59
|
+
"sample_id": sample.sample_id,
|
|
60
|
+
"platform": sample.platform,
|
|
61
|
+
"status": "pending",
|
|
62
|
+
"current_step_id": "",
|
|
63
|
+
"completed_step_count": 0,
|
|
64
|
+
"failed_step_count": 0,
|
|
65
|
+
}
|
|
66
|
+
for sample in plan.samples
|
|
67
|
+
}
|
|
68
|
+
self._snapshot = {
|
|
69
|
+
"project_name": plan.project_name,
|
|
70
|
+
"status": "running",
|
|
71
|
+
"dry_run": dry_run,
|
|
72
|
+
"parallel": parallel,
|
|
73
|
+
"workers": workers,
|
|
74
|
+
"started_at": _timestamp(),
|
|
75
|
+
"finished_at": "",
|
|
76
|
+
"total_step_count": len(plan.steps),
|
|
77
|
+
"completed_step_count": 0,
|
|
78
|
+
"failed_step_count": 0,
|
|
79
|
+
"running_step_count": 0,
|
|
80
|
+
"current_steps": [],
|
|
81
|
+
"samples": sample_status,
|
|
82
|
+
"steps": steps,
|
|
83
|
+
"last_event": {},
|
|
84
|
+
}
|
|
85
|
+
self.record("run_started", {"dry_run": dry_run, "parallel": parallel, "workers": workers})
|
|
86
|
+
|
|
87
|
+
def step_started(self, step: Any) -> None:
|
|
88
|
+
self.record("step_started", _step_payload(step))
|
|
89
|
+
|
|
90
|
+
def step_completed(
|
|
91
|
+
self,
|
|
92
|
+
step: Any,
|
|
93
|
+
*,
|
|
94
|
+
status: str,
|
|
95
|
+
reason: str = "",
|
|
96
|
+
return_code: int | str = "",
|
|
97
|
+
parsed_status: str = "",
|
|
98
|
+
standard_tables: str = "",
|
|
99
|
+
) -> None:
|
|
100
|
+
self.record(
|
|
101
|
+
"step_completed" if status != "failed" else "step_failed",
|
|
102
|
+
{
|
|
103
|
+
**_step_payload(step),
|
|
104
|
+
"status": status,
|
|
105
|
+
"reason": reason,
|
|
106
|
+
"return_code": return_code,
|
|
107
|
+
"parsed_status": parsed_status,
|
|
108
|
+
"standard_tables": standard_tables,
|
|
109
|
+
},
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
def finish_run(self, *, status: str) -> None:
|
|
113
|
+
self.record("run_completed", {"status": status})
|
|
114
|
+
|
|
115
|
+
def record(self, event: str, payload: Mapping[str, Any]) -> None:
|
|
116
|
+
timestamp = _timestamp()
|
|
117
|
+
record = {
|
|
118
|
+
"timestamp": timestamp,
|
|
119
|
+
"event": event,
|
|
120
|
+
"payload": dict(payload),
|
|
121
|
+
}
|
|
122
|
+
with self._lock:
|
|
123
|
+
self._apply_event(event, payload, timestamp)
|
|
124
|
+
with self.events_path.open("a", encoding="utf-8") as handle:
|
|
125
|
+
handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True) + "\n")
|
|
126
|
+
self._snapshot["last_event"] = record
|
|
127
|
+
self._write_snapshot()
|
|
128
|
+
|
|
129
|
+
def _apply_event(self, event: str, payload: Mapping[str, Any], timestamp: str) -> None:
|
|
130
|
+
if event == "run_completed":
|
|
131
|
+
self._snapshot["status"] = str(payload.get("status", "completed"))
|
|
132
|
+
self._snapshot["finished_at"] = timestamp
|
|
133
|
+
self._snapshot["running_step_count"] = 0
|
|
134
|
+
self._snapshot["current_steps"] = []
|
|
135
|
+
for sample in self._snapshot.get("samples", {}).values():
|
|
136
|
+
if sample.get("status") == "running":
|
|
137
|
+
sample["status"] = "completed"
|
|
138
|
+
sample["current_step_id"] = ""
|
|
139
|
+
return
|
|
140
|
+
if event not in {"step_started", "step_completed", "step_failed"}:
|
|
141
|
+
return
|
|
142
|
+
|
|
143
|
+
step_id = str(payload.get("step_id", ""))
|
|
144
|
+
sample_id = str(payload.get("sample_id", ""))
|
|
145
|
+
step_state = self._step_state(step_id)
|
|
146
|
+
if not step_state:
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
if event == "step_started":
|
|
150
|
+
step_state["status"] = "running"
|
|
151
|
+
step_state["started_at"] = timestamp
|
|
152
|
+
current_steps = list(self._snapshot.get("current_steps", []))
|
|
153
|
+
if step_id not in current_steps:
|
|
154
|
+
current_steps.append(step_id)
|
|
155
|
+
self._snapshot["current_steps"] = current_steps
|
|
156
|
+
self._snapshot["running_step_count"] = len(current_steps)
|
|
157
|
+
if sample_id and sample_id in self._snapshot.get("samples", {}):
|
|
158
|
+
sample = self._snapshot["samples"][sample_id]
|
|
159
|
+
sample["status"] = "running"
|
|
160
|
+
sample["current_step_id"] = step_id
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
status = str(payload.get("status", "success"))
|
|
164
|
+
step_state["status"] = status
|
|
165
|
+
step_state["reason"] = str(payload.get("reason", ""))
|
|
166
|
+
step_state["return_code"] = payload.get("return_code", "")
|
|
167
|
+
step_state["parsed_status"] = str(payload.get("parsed_status", ""))
|
|
168
|
+
step_state["standard_tables"] = str(payload.get("standard_tables", ""))
|
|
169
|
+
step_state["finished_at"] = timestamp
|
|
170
|
+
|
|
171
|
+
current_steps = [
|
|
172
|
+
current for current in self._snapshot.get("current_steps", []) if current != step_id
|
|
173
|
+
]
|
|
174
|
+
self._snapshot["current_steps"] = current_steps
|
|
175
|
+
self._snapshot["running_step_count"] = len(current_steps)
|
|
176
|
+
self._snapshot["completed_step_count"] = (
|
|
177
|
+
int(self._snapshot.get("completed_step_count", 0)) + 1
|
|
178
|
+
)
|
|
179
|
+
if status == "failed":
|
|
180
|
+
self._snapshot["failed_step_count"] = (
|
|
181
|
+
int(self._snapshot.get("failed_step_count", 0)) + 1
|
|
182
|
+
)
|
|
183
|
+
self._snapshot["status"] = "failed"
|
|
184
|
+
if sample_id and sample_id in self._snapshot.get("samples", {}):
|
|
185
|
+
sample = self._snapshot["samples"][sample_id]
|
|
186
|
+
sample["completed_step_count"] = int(sample.get("completed_step_count", 0)) + 1
|
|
187
|
+
if status == "failed":
|
|
188
|
+
sample["failed_step_count"] = int(sample.get("failed_step_count", 0)) + 1
|
|
189
|
+
sample["status"] = "failed"
|
|
190
|
+
elif sample.get("status") == "running":
|
|
191
|
+
sample["current_step_id"] = ""
|
|
192
|
+
|
|
193
|
+
def _step_state(self, step_id: str) -> Dict[str, Any] | None:
|
|
194
|
+
for step in self._snapshot.get("steps", []):
|
|
195
|
+
if isinstance(step, dict) and step.get("step_id") == step_id:
|
|
196
|
+
return step
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
def _write_snapshot(self) -> None:
|
|
200
|
+
tmp_path = self.snapshot_path.with_suffix(".json.tmp")
|
|
201
|
+
tmp_path.write_text(
|
|
202
|
+
json.dumps(self._snapshot, indent=2, ensure_ascii=False, sort_keys=True) + "\n",
|
|
203
|
+
encoding="utf-8",
|
|
204
|
+
)
|
|
205
|
+
tmp_path.replace(self.snapshot_path)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _step_payload(step: Any) -> Dict[str, Any]:
|
|
209
|
+
return {
|
|
210
|
+
"step_id": step.step_id,
|
|
211
|
+
"sample_id": step.sample_id or "",
|
|
212
|
+
"step_name": step.step_name,
|
|
213
|
+
"tool_id": step.tool_id,
|
|
214
|
+
"category": step.category,
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _timestamp() -> str:
|
|
219
|
+
return datetime.now().isoformat(timespec="seconds")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Skill base classes and tool registry."""
|