abi-agent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. abi/__init__.py +57 -0
  2. abi/_compat/__init__.py +1 -0
  3. abi/_compat/errors.py +7 -0
  4. abi/_compat/filesystem.py +7 -0
  5. abi/_compat/logger.py +105 -0
  6. abi/_compat/progress.py +219 -0
  7. abi/_compat/skills/__init__.py +1 -0
  8. abi/_compat/skills/base.py +307 -0
  9. abi/_compat/skills/registry.py +154 -0
  10. abi/agent/__init__.py +5 -0
  11. abi/agent/interface.py +588 -0
  12. abi/cli.py +604 -0
  13. abi/config.py +73 -0
  14. abi/dag.py +150 -0
  15. abi/dev_setup.py +59 -0
  16. abi/errors.py +26 -0
  17. abi/executor.py +416 -0
  18. abi/exporters/__init__.py +5 -0
  19. abi/exporters/nextflow.py +362 -0
  20. abi/filesystem.py +20 -0
  21. abi/interfaces.py +58 -0
  22. abi/openai_contracts.py +186 -0
  23. abi/plugins/__init__.py +131 -0
  24. abi/plugins/metagenomic_plasmid.py +130 -0
  25. abi/plugins/metatranscriptomics.py +273 -0
  26. abi/provenance.py +19 -0
  27. abi/py.typed +0 -0
  28. abi/report.py +106 -0
  29. abi/results.py +230 -0
  30. abi/runtimes/__init__.py +14 -0
  31. abi/runtimes/base.py +36 -0
  32. abi/runtimes/local.py +46 -0
  33. abi/runtimes/nextflow.py +316 -0
  34. abi/schemas.py +99 -0
  35. abi/tables.py +97 -0
  36. abi/testing.py +42 -0
  37. abi/tools.py +13 -0
  38. abi_agent-0.1.0.dist-info/METADATA +269 -0
  39. abi_agent-0.1.0.dist-info/RECORD +51 -0
  40. abi_agent-0.1.0.dist-info/WHEEL +4 -0
  41. abi_agent-0.1.0.dist-info/entry_points.txt +7 -0
  42. abi_agent-0.1.0.dist-info/licenses/LICENSE +21 -0
  43. plugins/metagenomic_plasmid/README.md +12 -0
  44. plugins/metatranscriptomics/config_default.yaml +21 -0
  45. plugins/metatranscriptomics/sample_sheet_template.tsv +2 -0
  46. plugins/metatranscriptomics/skills/fastp/SKILL.md +33 -0
  47. plugins/metatranscriptomics/skills/featurecounts/SKILL.md +32 -0
  48. plugins/metatranscriptomics/skills/hisat2/SKILL.md +32 -0
  49. plugins/metatranscriptomics/skills/star/SKILL.md +31 -0
  50. plugins/metatranscriptomics/standard_tables.yaml +22 -0
  51. plugins/metatranscriptomics/tool_registry.yaml +41 -0
abi/__init__.py ADDED
@@ -0,0 +1,57 @@
1
+ """Agent-Bioinformatics Interface package."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ __all__ = ["__version__"]
10
+
11
+ __version__ = "0.1.0"
12
+
13
+
14
+ def _warn_if_wrong_location() -> None:
15
+ """Emit a warning when a conflicting ``abi`` package shadows this one.
16
+
17
+ This can happen when another project that also contains an ``abi``
18
+ package (e.g. an older editable install of ``autoplasm`` from
19
+ PlasimSkillsForAgent) appears earlier on ``sys.path``.
20
+
21
+ Run ``python scripts/dev_setup.py`` (or ``abi-dev-setup``) to
22
+ install a priority ``.pth`` file that guarantees the correct
23
+ package is found first.
24
+ """
25
+ expected_marker = os.environ.get("ABI_SRC_ROOT", "")
26
+ if expected_marker:
27
+ expected = Path(expected_marker) / "abi" / "__init__.py"
28
+ actual = Path(__file__).resolve()
29
+ if expected.resolve() != actual:
30
+ import warnings
31
+
32
+ warnings.warn(
33
+ f"abi package loaded from unexpected location:\n"
34
+ f" loaded : {actual}\n"
35
+ f" expected: {expected}\n"
36
+ f"Run: python scripts/dev_setup.py (or: abi-dev-setup)",
37
+ stacklevel=2,
38
+ )
39
+ # Best-effort detection without ABI_SRC_ROOT: look for PlasimSkillsForAgent shadowing.
40
+ current = Path(__file__).resolve()
41
+ for entry in sys.path:
42
+ entry_path = Path(entry)
43
+ if entry_path.name == "src" and "PlasimSkillsForAgent" in str(entry_path):
44
+ candidate_init = entry_path / "abi" / "__init__.py"
45
+ if candidate_init.resolve() == current:
46
+ import warnings
47
+
48
+ warnings.warn(
49
+ f"abi was loaded from PlasimSkillsForAgent ({current}), "
50
+ f"not from the standalone abi-agent project.\n"
51
+ f"Run: python scripts/dev_setup.py (or: abi-dev-setup)",
52
+ stacklevel=2,
53
+ )
54
+ break
55
+
56
+
57
+ _warn_if_wrong_location()
@@ -0,0 +1 @@
1
+ """Internal compatibility modules for ABI."""
abi/_compat/errors.py ADDED
@@ -0,0 +1,7 @@
1
+ """Compatibility re-exports for legacy internal ABI imports."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abi.errors import ABIError, ConfigError, SampleSheetError, ToolError
6
+
7
+ __all__ = ["ABIError", "ConfigError", "SampleSheetError", "ToolError"]
@@ -0,0 +1,7 @@
1
+ """Compatibility wrapper for filesystem helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abi.filesystem import ensure_directory
6
+
7
+ __all__ = ["ensure_directory"]
abi/_compat/logger.py ADDED
@@ -0,0 +1,105 @@
1
+ """Structured logging and provenance helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import shlex
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+ from typing import Any, Dict, Iterable, Mapping
10
+
11
+ from abi.filesystem import ensure_directory
12
+
13
+
14
+ class RunLogger:
15
+ def __init__(self, log_dir: str | Path) -> None:
16
+ self.log_dir = ensure_directory(log_dir, label="Log directory")
17
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
18
+ self.log_file = self.log_dir / f"log_abi_{timestamp}.log"
19
+
20
+ def log_event(self, event: str, payload: Mapping[str, Any]) -> None:
21
+ record = {
22
+ "timestamp": datetime.now().isoformat(timespec="seconds"),
23
+ "event": event,
24
+ "payload": dict(payload),
25
+ }
26
+ with self.log_file.open("a", encoding="utf-8") as handle:
27
+ handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True) + "\n")
28
+
29
+ def log_step(
30
+ self,
31
+ step: Any,
32
+ *,
33
+ command: Iterable[str] | str,
34
+ status: str,
35
+ error_message: str | None = None,
36
+ ) -> None:
37
+ command_text = command if isinstance(command, str) else _display_command(command)
38
+ payload: Dict[str, Any] = {
39
+ "sample_id": getattr(step, "sample_id", None),
40
+ "step_name": getattr(step, "step_name", None),
41
+ "tool_name": getattr(step, "tool_id", None),
42
+ "command": command_text,
43
+ "input_files": getattr(step, "inputs", {}),
44
+ "output_files": getattr(step, "outputs", {}),
45
+ "parameters": getattr(step, "params", {}),
46
+ "status": status,
47
+ "duration": 0,
48
+ "error_message": error_message,
49
+ }
50
+ self.log_event("pipeline_step", payload)
51
+
52
+
53
+ def write_commands_tsv(rows: Iterable[Mapping[str, Any]], path: str | Path) -> Path:
54
+ commands_path = Path(path)
55
+ commands_path.parent.mkdir(parents=True, exist_ok=True)
56
+ fields = [
57
+ "step_id",
58
+ "sample_id",
59
+ "step_name",
60
+ "tool_id",
61
+ "category",
62
+ "command",
63
+ "status",
64
+ "return_code",
65
+ "reason",
66
+ "parsed_status",
67
+ "standard_tables",
68
+ ]
69
+ with commands_path.open("w", encoding="utf-8") as handle:
70
+ handle.write("\t".join(fields) + "\n")
71
+ for row in rows:
72
+ handle.write("\t".join(_tsv_value(row.get(field, "")) for field in fields) + "\n")
73
+ return commands_path
74
+
75
+
76
+ def write_tool_versions(rows: Iterable[Mapping[str, Any]], path: str | Path) -> Path:
77
+ versions_path = Path(path)
78
+ versions_path.parent.mkdir(parents=True, exist_ok=True)
79
+ fields = ["tool_id", "executable", "env_name", "version", "status"]
80
+ with versions_path.open("w", encoding="utf-8") as handle:
81
+ handle.write("\t".join(fields) + "\n")
82
+ for row in rows:
83
+ handle.write("\t".join(_tsv_value(row.get(field, "")) for field in fields) + "\n")
84
+ return versions_path
85
+
86
+
87
+ def write_resolved_inputs_tsv(rows: Iterable[Mapping[str, Any]], path: str | Path) -> Path:
88
+ inputs_path = Path(path)
89
+ inputs_path.parent.mkdir(parents=True, exist_ok=True)
90
+ fields = ["step_id", "tool_id", "sample_id", "input_name", "path", "exists", "source"]
91
+ with inputs_path.open("w", encoding="utf-8") as handle:
92
+ handle.write("\t".join(fields) + "\n")
93
+ for row in rows:
94
+ handle.write("\t".join(_tsv_value(row.get(field, "")) for field in fields) + "\n")
95
+ return inputs_path
96
+
97
+
98
+ def _tsv_value(value: Any) -> str:
99
+ if value is None:
100
+ return ""
101
+ return str(value)
102
+
103
+
104
+ def _display_command(command: Iterable[str]) -> str:
105
+ return " ".join(">" if token == ">" else shlex.quote(token) for token in command)
@@ -0,0 +1,219 @@
1
+ """Progress event recording for pipeline execution."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import threading
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+ from typing import Any, Dict, Mapping
10
+
11
+ from abi.filesystem import ensure_directory
12
+
13
+
14
+ class PipelineProgressRecorder:
15
+ """Thread-safe writer for live pipeline progress.
16
+
17
+ The JSONL stream is append-only for auditability. The JSON snapshot is for
18
+ dashboards and other polling clients that need the current state quickly.
19
+ """
20
+
21
+ def __init__(self, provenance_dir: str | Path) -> None:
22
+ self.provenance_dir = ensure_directory(provenance_dir, label="Provenance directory")
23
+ self.events_path = self.provenance_dir / "progress.jsonl"
24
+ self.snapshot_path = self.provenance_dir / "progress.json"
25
+ self._lock = threading.Lock()
26
+ self._snapshot: Dict[str, Any] = {}
27
+
28
+ @property
29
+ def paths(self) -> Dict[str, Path]:
30
+ return {"events": self.events_path, "snapshot": self.snapshot_path}
31
+
32
+ def start_run(
33
+ self,
34
+ plan: Any,
35
+ *,
36
+ dry_run: bool,
37
+ parallel: bool,
38
+ workers: int,
39
+ ) -> None:
40
+ steps = [
41
+ {
42
+ "step_id": step.step_id,
43
+ "sample_id": step.sample_id or "",
44
+ "step_name": step.step_name,
45
+ "tool_id": step.tool_id,
46
+ "category": step.category,
47
+ "status": "pending",
48
+ "reason": step.reason or "",
49
+ "return_code": "",
50
+ "parsed_status": "",
51
+ "standard_tables": "",
52
+ "started_at": "",
53
+ "finished_at": "",
54
+ }
55
+ for step in plan.steps
56
+ ]
57
+ sample_status = {
58
+ sample.sample_id: {
59
+ "sample_id": sample.sample_id,
60
+ "platform": sample.platform,
61
+ "status": "pending",
62
+ "current_step_id": "",
63
+ "completed_step_count": 0,
64
+ "failed_step_count": 0,
65
+ }
66
+ for sample in plan.samples
67
+ }
68
+ self._snapshot = {
69
+ "project_name": plan.project_name,
70
+ "status": "running",
71
+ "dry_run": dry_run,
72
+ "parallel": parallel,
73
+ "workers": workers,
74
+ "started_at": _timestamp(),
75
+ "finished_at": "",
76
+ "total_step_count": len(plan.steps),
77
+ "completed_step_count": 0,
78
+ "failed_step_count": 0,
79
+ "running_step_count": 0,
80
+ "current_steps": [],
81
+ "samples": sample_status,
82
+ "steps": steps,
83
+ "last_event": {},
84
+ }
85
+ self.record("run_started", {"dry_run": dry_run, "parallel": parallel, "workers": workers})
86
+
87
+ def step_started(self, step: Any) -> None:
88
+ self.record("step_started", _step_payload(step))
89
+
90
+ def step_completed(
91
+ self,
92
+ step: Any,
93
+ *,
94
+ status: str,
95
+ reason: str = "",
96
+ return_code: int | str = "",
97
+ parsed_status: str = "",
98
+ standard_tables: str = "",
99
+ ) -> None:
100
+ self.record(
101
+ "step_completed" if status != "failed" else "step_failed",
102
+ {
103
+ **_step_payload(step),
104
+ "status": status,
105
+ "reason": reason,
106
+ "return_code": return_code,
107
+ "parsed_status": parsed_status,
108
+ "standard_tables": standard_tables,
109
+ },
110
+ )
111
+
112
+ def finish_run(self, *, status: str) -> None:
113
+ self.record("run_completed", {"status": status})
114
+
115
+ def record(self, event: str, payload: Mapping[str, Any]) -> None:
116
+ timestamp = _timestamp()
117
+ record = {
118
+ "timestamp": timestamp,
119
+ "event": event,
120
+ "payload": dict(payload),
121
+ }
122
+ with self._lock:
123
+ self._apply_event(event, payload, timestamp)
124
+ with self.events_path.open("a", encoding="utf-8") as handle:
125
+ handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True) + "\n")
126
+ self._snapshot["last_event"] = record
127
+ self._write_snapshot()
128
+
129
+ def _apply_event(self, event: str, payload: Mapping[str, Any], timestamp: str) -> None:
130
+ if event == "run_completed":
131
+ self._snapshot["status"] = str(payload.get("status", "completed"))
132
+ self._snapshot["finished_at"] = timestamp
133
+ self._snapshot["running_step_count"] = 0
134
+ self._snapshot["current_steps"] = []
135
+ for sample in self._snapshot.get("samples", {}).values():
136
+ if sample.get("status") == "running":
137
+ sample["status"] = "completed"
138
+ sample["current_step_id"] = ""
139
+ return
140
+ if event not in {"step_started", "step_completed", "step_failed"}:
141
+ return
142
+
143
+ step_id = str(payload.get("step_id", ""))
144
+ sample_id = str(payload.get("sample_id", ""))
145
+ step_state = self._step_state(step_id)
146
+ if not step_state:
147
+ return
148
+
149
+ if event == "step_started":
150
+ step_state["status"] = "running"
151
+ step_state["started_at"] = timestamp
152
+ current_steps = list(self._snapshot.get("current_steps", []))
153
+ if step_id not in current_steps:
154
+ current_steps.append(step_id)
155
+ self._snapshot["current_steps"] = current_steps
156
+ self._snapshot["running_step_count"] = len(current_steps)
157
+ if sample_id and sample_id in self._snapshot.get("samples", {}):
158
+ sample = self._snapshot["samples"][sample_id]
159
+ sample["status"] = "running"
160
+ sample["current_step_id"] = step_id
161
+ return
162
+
163
+ status = str(payload.get("status", "success"))
164
+ step_state["status"] = status
165
+ step_state["reason"] = str(payload.get("reason", ""))
166
+ step_state["return_code"] = payload.get("return_code", "")
167
+ step_state["parsed_status"] = str(payload.get("parsed_status", ""))
168
+ step_state["standard_tables"] = str(payload.get("standard_tables", ""))
169
+ step_state["finished_at"] = timestamp
170
+
171
+ current_steps = [
172
+ current for current in self._snapshot.get("current_steps", []) if current != step_id
173
+ ]
174
+ self._snapshot["current_steps"] = current_steps
175
+ self._snapshot["running_step_count"] = len(current_steps)
176
+ self._snapshot["completed_step_count"] = (
177
+ int(self._snapshot.get("completed_step_count", 0)) + 1
178
+ )
179
+ if status == "failed":
180
+ self._snapshot["failed_step_count"] = (
181
+ int(self._snapshot.get("failed_step_count", 0)) + 1
182
+ )
183
+ self._snapshot["status"] = "failed"
184
+ if sample_id and sample_id in self._snapshot.get("samples", {}):
185
+ sample = self._snapshot["samples"][sample_id]
186
+ sample["completed_step_count"] = int(sample.get("completed_step_count", 0)) + 1
187
+ if status == "failed":
188
+ sample["failed_step_count"] = int(sample.get("failed_step_count", 0)) + 1
189
+ sample["status"] = "failed"
190
+ elif sample.get("status") == "running":
191
+ sample["current_step_id"] = ""
192
+
193
+ def _step_state(self, step_id: str) -> Dict[str, Any] | None:
194
+ for step in self._snapshot.get("steps", []):
195
+ if isinstance(step, dict) and step.get("step_id") == step_id:
196
+ return step
197
+ return None
198
+
199
+ def _write_snapshot(self) -> None:
200
+ tmp_path = self.snapshot_path.with_suffix(".json.tmp")
201
+ tmp_path.write_text(
202
+ json.dumps(self._snapshot, indent=2, ensure_ascii=False, sort_keys=True) + "\n",
203
+ encoding="utf-8",
204
+ )
205
+ tmp_path.replace(self.snapshot_path)
206
+
207
+
208
+ def _step_payload(step: Any) -> Dict[str, Any]:
209
+ return {
210
+ "step_id": step.step_id,
211
+ "sample_id": step.sample_id or "",
212
+ "step_name": step.step_name,
213
+ "tool_id": step.tool_id,
214
+ "category": step.category,
215
+ }
216
+
217
+
218
+ def _timestamp() -> str:
219
+ return datetime.now().isoformat(timespec="seconds")
@@ -0,0 +1 @@
1
+ """Skill base classes and tool registry."""