matensemble 0.4.2__tar.gz → 0.4.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {matensemble-0.4.2 → matensemble-0.4.4}/PKG-INFO +2 -2
- {matensemble-0.4.2 → matensemble-0.4.4}/README.md +1 -1
- {matensemble-0.4.2 → matensemble-0.4.4}/pyproject.toml +4 -1
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/TODO.md +18 -11
- matensemble-0.4.4/src/matensemble/__main__.py +3 -0
- matensemble-0.4.4/src/matensemble/cli.py +45 -0
- matensemble-0.4.4/src/matensemble/dashboard/__init__.py +6 -0
- matensemble-0.4.4/src/matensemble/dashboard/app.py +312 -0
- matensemble-0.4.4/src/matensemble/dashboard/discovery.py +363 -0
- matensemble-0.4.4/src/matensemble/dashboard/models.py +42 -0
- matensemble-0.4.4/src/matensemble/dashboard/static/dashboard.css +198 -0
- matensemble-0.4.4/src/matensemble/dashboard/static/dashboard.js +325 -0
- matensemble-0.4.4/src/matensemble/dashboard/static/index.html +16 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/fluxlet.py +2 -0
- matensemble-0.4.4/src/matensemble/logger.py +368 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/manager.py +3 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/runtime_worker.py +3 -2
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/strategy.py +10 -2
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/utils.py +63 -28
- matensemble-0.4.2/src/matensemble/dash/assets/index-1X2cLUgt.js +0 -50
- matensemble-0.4.2/src/matensemble/dash/assets/index-DRkGfWlx.css +0 -1
- matensemble-0.4.2/src/matensemble/dash/index.html +0 -14
- matensemble-0.4.2/src/matensemble/dash/vite.svg +0 -1
- matensemble-0.4.2/src/matensemble/logger.py +0 -131
- {matensemble-0.4.2 → matensemble-0.4.4}/LICENSE +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/.python-version +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/README.md +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/__init__.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/chore.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/__init__.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/driver.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/ensemble.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/postprocessors/__init__.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/postprocessors/bispectrum_calculator.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/postprocessors/compute_diffraction.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/postprocessors/compute_order_from_pairs.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/postprocessors/compute_twist.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/postprocessors/correlations.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/postprocessors/ovito_calculators.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/task_lib/AnalysisSubprocess.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/task_lib/AnalysysDescriptor.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/task_lib/MDSubprocess.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/task_lib/__init__.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/task_lib/analysis_registry.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/utils/__init__.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/utils/lammps_init.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/utils/preprocessors.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/utils/stat.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/dynopro/utils/stress_rotate_z_theta.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/model.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/pipeline.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/redis/__init__.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/redis/service.py +0 -0
- {matensemble-0.4.2 → matensemble-0.4.4}/src/matensemble/redis/test.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: matensemble
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.4
|
|
4
4
|
Summary: An adaptive and highly asynchronous ensemble simulation workflow manager MatEnsemble (https://github.com/Q-CAD/MatEnsemble) built jointly on top of the hierarchical graph based scheduler FLUX and concurrent-futures infrastructure of python
|
|
5
5
|
Author: Soumendu Bagchi, Kaleb Duchesneau
|
|
6
6
|
Author-email: Soumendu Bagchi <soumendubagchi@gmail.com>, Kaleb Duchesneau <kalebduchesneau@gmail.com>
|
|
@@ -44,7 +44,7 @@ An optional in-tree **dynopro** stack supports streaming dynamics and on-the-fly
|
|
|
44
44
|
- **Adaptive scheduling** that back-fills the allocation as chores finish (with a non-adaptive available)
|
|
45
45
|
- **Two chore types**: Python chores (remotely unpickled and executed by `matensemble.runtime_worker`) and argv-style **executable** chores
|
|
46
46
|
- **Resource requests**: tasks, cores per task, GPUs per task, optional MPI (`pmi2`) via Flux
|
|
47
|
-
- **Observability**: `status.json`, `matensemble_workflow.log`, per-chore `stdout` / `stderr`, pickle and JSON result artifacts; optional **web dashboard**
|
|
47
|
+
- **Observability**: `status.json` summaries, append-only `status_history.jsonl`, `matensemble_workflow.log`, per-chore `stdout` / `stderr`, pickle and JSON result artifacts; optional **web dashboard**
|
|
48
48
|
|
|
49
49
|
<p align="center">
|
|
50
50
|
<img src="media/Cap_1_adaptive_task_management.png" alt="Adaptive task management" width="620" />
|
|
@@ -19,7 +19,7 @@ An optional in-tree **dynopro** stack supports streaming dynamics and on-the-fly
|
|
|
19
19
|
- **Adaptive scheduling** that back-fills the allocation as chores finish (with a non-adaptive available)
|
|
20
20
|
- **Two chore types**: Python chores (remotely unpickled and executed by `matensemble.runtime_worker`) and argv-style **executable** chores
|
|
21
21
|
- **Resource requests**: tasks, cores per task, GPUs per task, optional MPI (`pmi2`) via Flux
|
|
22
|
-
- **Observability**: `status.json`, `matensemble_workflow.log`, per-chore `stdout` / `stderr`, pickle and JSON result artifacts; optional **web dashboard**
|
|
22
|
+
- **Observability**: `status.json` summaries, append-only `status_history.jsonl`, `matensemble_workflow.log`, per-chore `stdout` / `stderr`, pickle and JSON result artifacts; optional **web dashboard**
|
|
23
23
|
|
|
24
24
|
<p align="center">
|
|
25
25
|
<img src="media/Cap_1_adaptive_task_management.png" alt="Adaptive task management" width="620" />
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "matensemble"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.4"
|
|
4
4
|
description = "An adaptive and highly asynchronous ensemble simulation workflow manager MatEnsemble (https://github.com/Q-CAD/MatEnsemble) built jointly on top of the hierarchical graph based scheduler FLUX and concurrent-futures infrastructure of python"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license-files = ["LICENSE"]
|
|
@@ -25,6 +25,9 @@ dependencies = [
|
|
|
25
25
|
"uvicorn>=0.42.0",
|
|
26
26
|
]
|
|
27
27
|
|
|
28
|
+
[project.scripts]
|
|
29
|
+
matensemble = "matensemble.cli:main"
|
|
30
|
+
|
|
28
31
|
[project.optional-dependencies]
|
|
29
32
|
flux = [
|
|
30
33
|
"flux-python==0.66.0",
|
|
@@ -131,12 +131,12 @@
|
|
|
131
131
|
- [x] If that doesn't work break it down into smaller pieces
|
|
132
132
|
## --- Smaller pieces ---
|
|
133
133
|
- [x] Maybe start with an nvidia image rather than Neil's image
|
|
134
|
-
- [
|
|
135
|
-
- [
|
|
136
|
-
- [
|
|
137
|
-
- [
|
|
138
|
-
- [
|
|
139
|
-
- [
|
|
134
|
+
- [x] Make sure that flux works in the container
|
|
135
|
+
- [x] Create a container that just has flux and have some different tests for that
|
|
136
|
+
- [x] Create a container that has just MPI and test that make sure it works
|
|
137
|
+
- [x] Combine flux and MPI and see if that works
|
|
138
|
+
- [x] Create a container that has lammps and make sure that that is working
|
|
139
|
+
- [x] Combine all the pieces
|
|
140
140
|
|
|
141
141
|
## --- Test Frontier Apptainer container ---
|
|
142
142
|
- [x] Need lots more help here
|
|
@@ -219,7 +219,7 @@
|
|
|
219
219
|
- [x] ???
|
|
220
220
|
- [x] Test the server locally
|
|
221
221
|
- [x] Test the server on an HPC cluster
|
|
222
|
-
- [
|
|
222
|
+
- [x] Create documentation for setting it up
|
|
223
223
|
|
|
224
224
|
## --- Frontier Dynopro Fix ---
|
|
225
225
|
|
|
@@ -227,7 +227,8 @@
|
|
|
227
227
|
- [x] Patch MatEnsemble to ignore the double free error: 134 or 137 idk whichever one it is
|
|
228
228
|
- [x] Update the Base image or retag the one that I used to build this john
|
|
229
229
|
- [x] The double free is likely from symmetrix so forget fixing it
|
|
230
|
-
- [
|
|
230
|
+
- [x] Run the release script and do a PR
|
|
231
|
+
- [ ] Make sure that the Pathfinder CLI tool works
|
|
231
232
|
|
|
232
233
|
## --- Demo ---
|
|
233
234
|
- [ ] Video demonstrating the MCP server
|
|
@@ -236,10 +237,16 @@
|
|
|
236
237
|
- [ ] Dashboard connection
|
|
237
238
|
- [ ] Creation of the environment
|
|
238
239
|
|
|
240
|
+
## --- Dynopro Upgrades ---
|
|
241
|
+
|
|
242
|
+
- [ ] Add things into the MatEnsemble API to allow users to define the two subprocesses
|
|
243
|
+
- [ ] Split the jobs between the ranks
|
|
244
|
+
- [ ] Define them as chores
|
|
245
|
+
|
|
239
246
|
|
|
240
247
|
## --- Reading List ---
|
|
241
|
-
- [ ] [Agentic Orchestration of HPC Applications](https://vsoch.github.io/assets/posts/agentic-orchestration-hpc-workloads-cloud-sochat-milroy.pdf)
|
|
242
248
|
- [x] [Container Training Slides](https://drive.google.com/drive/folders/1_mTBBc98TEX3XFpNp0rqoqj1VjN9TKoO)
|
|
243
|
-
- [
|
|
249
|
+
- [x] [Containers as Jupyter Kernels](https://docs.nersc.gov/services/jupyter/how-to-guides/#how-to-use-a-container-to-run-a-jupyter-kernel)
|
|
250
|
+
- [x] [Using uv to package lammps and flux into pip install???](https://sgoel.dev/posts/building-cython-or-c-extensions-using-uv/)
|
|
251
|
+
- [ ] [Agentic Orchestration of HPC Applications](https://vsoch.github.io/assets/posts/agentic-orchestration-hpc-workloads-cloud-sochat-milroy.pdf)
|
|
244
252
|
- [ ] [Using SPIN to Run Persistent Containers](https://docs.nersc.gov/services/spin/)
|
|
245
|
-
- [ ] [Using uv to package lammps and flux into pip install???](https://sgoel.dev/posts/building-cython-or-c-extensions-using-uv/)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Sequence
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
9
|
+
parser = argparse.ArgumentParser(prog="matensemble")
|
|
10
|
+
subcommands = parser.add_subparsers(dest="command", required=True)
|
|
11
|
+
dashboard = subcommands.add_parser(
|
|
12
|
+
"dashboard", help="serve the multi-workflow monitoring dashboard"
|
|
13
|
+
)
|
|
14
|
+
dashboard.add_argument("root", nargs="?", default=".")
|
|
15
|
+
dashboard.add_argument("--host", default="127.0.0.1")
|
|
16
|
+
dashboard.add_argument("--port", type=int, default=8000)
|
|
17
|
+
dashboard.add_argument("--scan-interval", type=float, default=5.0)
|
|
18
|
+
dashboard.add_argument("--stale-after", type=float, default=30.0)
|
|
19
|
+
return parser
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
23
|
+
args = build_parser().parse_args(argv)
|
|
24
|
+
if args.command == "dashboard":
|
|
25
|
+
try:
|
|
26
|
+
import uvicorn
|
|
27
|
+
except ImportError as exc:
|
|
28
|
+
raise RuntimeError(
|
|
29
|
+
"The dashboard command requires the uvicorn dependency."
|
|
30
|
+
) from exc
|
|
31
|
+
from matensemble.dashboard import create_dashboard_app
|
|
32
|
+
|
|
33
|
+
root = Path(args.root).expanduser().resolve()
|
|
34
|
+
app = create_dashboard_app(
|
|
35
|
+
root,
|
|
36
|
+
scan_interval=args.scan_interval,
|
|
37
|
+
stale_after=args.stale_after,
|
|
38
|
+
)
|
|
39
|
+
uvicorn.run(app, host=args.host, port=args.port)
|
|
40
|
+
return 0
|
|
41
|
+
return 2
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
if __name__ == "__main__":
|
|
45
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
from contextlib import asynccontextmanager
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from .discovery import WorkflowCatalog
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
CHORE_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]{0,127}$")
|
|
14
|
+
DEFAULT_MAX_POINTS = 1000
|
|
15
|
+
MAX_HISTORY_POINTS = 5000
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _error(code: str, message: str, status_code: int):
|
|
19
|
+
from starlette.responses import JSONResponse
|
|
20
|
+
|
|
21
|
+
return JSONResponse(
|
|
22
|
+
{"error": {"code": code, "message": message}},
|
|
23
|
+
status_code=status_code,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _downsample(records: list[dict[str, Any]], limit: int) -> list[dict[str, Any]]:
|
|
28
|
+
if len(records) <= limit:
|
|
29
|
+
return records
|
|
30
|
+
if limit == 1:
|
|
31
|
+
return [records[-1]]
|
|
32
|
+
last = len(records) - 1
|
|
33
|
+
indexes = {round(index * last / (limit - 1)) for index in range(limit)}
|
|
34
|
+
return [records[index] for index in sorted(indexes)]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def read_history(
|
|
38
|
+
workflow_path: Path,
|
|
39
|
+
status: dict[str, Any],
|
|
40
|
+
*,
|
|
41
|
+
after_sequence: int | None = None,
|
|
42
|
+
max_points: int = DEFAULT_MAX_POINTS,
|
|
43
|
+
) -> dict[str, Any]:
|
|
44
|
+
history_name = status.get("history_file")
|
|
45
|
+
if not history_name:
|
|
46
|
+
current = status.get("current", {})
|
|
47
|
+
workflow = status.get("workflow", {})
|
|
48
|
+
records = [
|
|
49
|
+
{
|
|
50
|
+
"sequence": current.get("sequence", 0),
|
|
51
|
+
"timestamp": workflow.get("updated_at"),
|
|
52
|
+
"elapsed_seconds": workflow.get("elapsed_seconds"),
|
|
53
|
+
"state": workflow.get("state"),
|
|
54
|
+
**{
|
|
55
|
+
key: current.get(key, 0)
|
|
56
|
+
for key in (
|
|
57
|
+
"pending",
|
|
58
|
+
"ready",
|
|
59
|
+
"blocked",
|
|
60
|
+
"running",
|
|
61
|
+
"completed",
|
|
62
|
+
"failed",
|
|
63
|
+
"free_cores",
|
|
64
|
+
"free_gpus",
|
|
65
|
+
)
|
|
66
|
+
},
|
|
67
|
+
}
|
|
68
|
+
]
|
|
69
|
+
if after_sequence is not None:
|
|
70
|
+
records = [
|
|
71
|
+
row for row in records if int(row.get("sequence", -1)) > after_sequence
|
|
72
|
+
]
|
|
73
|
+
return {
|
|
74
|
+
"records": records,
|
|
75
|
+
"first_sequence": records[0]["sequence"] if records else None,
|
|
76
|
+
"last_sequence": records[-1]["sequence"] if records else None,
|
|
77
|
+
"truncated": False,
|
|
78
|
+
"ignored_incomplete_final_line": False,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
if (
|
|
82
|
+
not isinstance(history_name, str)
|
|
83
|
+
or Path(history_name).name != history_name
|
|
84
|
+
or history_name in {".", ".."}
|
|
85
|
+
):
|
|
86
|
+
raise ValueError("status history_file must be a file name")
|
|
87
|
+
history_path = workflow_path / history_name
|
|
88
|
+
try:
|
|
89
|
+
resolved_history = history_path.resolve()
|
|
90
|
+
resolved_history.relative_to(workflow_path.resolve())
|
|
91
|
+
except (OSError, ValueError):
|
|
92
|
+
raise ValueError("history file resolves outside the workflow directory")
|
|
93
|
+
if not resolved_history.is_file():
|
|
94
|
+
records = []
|
|
95
|
+
ignored = False
|
|
96
|
+
else:
|
|
97
|
+
text = resolved_history.read_text(encoding="utf-8")
|
|
98
|
+
lines = text.splitlines()
|
|
99
|
+
records = []
|
|
100
|
+
ignored = False
|
|
101
|
+
for index, line in enumerate(lines):
|
|
102
|
+
if not line.strip():
|
|
103
|
+
continue
|
|
104
|
+
try:
|
|
105
|
+
record = json.loads(line)
|
|
106
|
+
except json.JSONDecodeError:
|
|
107
|
+
is_incomplete_final = index == len(lines) - 1 and not text.endswith("\n")
|
|
108
|
+
if is_incomplete_final:
|
|
109
|
+
ignored = True
|
|
110
|
+
continue
|
|
111
|
+
raise ValueError(f"invalid JSON history record on line {index + 1}")
|
|
112
|
+
if not isinstance(record, dict):
|
|
113
|
+
raise ValueError(f"history record on line {index + 1} is not an object")
|
|
114
|
+
records.append(record)
|
|
115
|
+
|
|
116
|
+
records.sort(key=lambda row: int(row.get("sequence", -1)))
|
|
117
|
+
if after_sequence is not None:
|
|
118
|
+
records = [
|
|
119
|
+
row for row in records if int(row.get("sequence", -1)) > after_sequence
|
|
120
|
+
]
|
|
121
|
+
truncated = len(records) > max_points
|
|
122
|
+
records = _downsample(records, max_points)
|
|
123
|
+
return {
|
|
124
|
+
"records": records,
|
|
125
|
+
"first_sequence": records[0].get("sequence") if records else None,
|
|
126
|
+
"last_sequence": records[-1].get("sequence") if records else None,
|
|
127
|
+
"truncated": truncated,
|
|
128
|
+
"ignored_incomplete_final_line": ignored,
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def create_dashboard_app(
|
|
133
|
+
root: str | Path,
|
|
134
|
+
*,
|
|
135
|
+
scan_interval: float = 5.0,
|
|
136
|
+
stale_after: float = 30.0,
|
|
137
|
+
compatibility_workflow_id: str | None = None,
|
|
138
|
+
):
|
|
139
|
+
try:
|
|
140
|
+
from starlette.applications import Starlette
|
|
141
|
+
from starlette.responses import FileResponse, JSONResponse
|
|
142
|
+
from starlette.routing import Mount, Route
|
|
143
|
+
from starlette.staticfiles import StaticFiles
|
|
144
|
+
except ImportError as exc:
|
|
145
|
+
raise RuntimeError(
|
|
146
|
+
"The MatEnsemble dashboard requires starlette and uvicorn."
|
|
147
|
+
) from exc
|
|
148
|
+
|
|
149
|
+
catalog = WorkflowCatalog(root, stale_after=stale_after)
|
|
150
|
+
interval = max(0.1, float(scan_interval))
|
|
151
|
+
|
|
152
|
+
async def scanner() -> None:
|
|
153
|
+
while True:
|
|
154
|
+
await asyncio.sleep(interval)
|
|
155
|
+
await asyncio.to_thread(catalog.refresh)
|
|
156
|
+
|
|
157
|
+
@asynccontextmanager
|
|
158
|
+
async def lifespan(_app):
|
|
159
|
+
await asyncio.to_thread(catalog.refresh)
|
|
160
|
+
task = asyncio.create_task(scanner())
|
|
161
|
+
try:
|
|
162
|
+
yield
|
|
163
|
+
finally:
|
|
164
|
+
task.cancel()
|
|
165
|
+
try:
|
|
166
|
+
await task
|
|
167
|
+
except asyncio.CancelledError:
|
|
168
|
+
pass
|
|
169
|
+
|
|
170
|
+
async def get_catalog(_request):
|
|
171
|
+
return JSONResponse(catalog.catalog())
|
|
172
|
+
|
|
173
|
+
async def get_status(request):
|
|
174
|
+
identifier = request.path_params["workflow_id"]
|
|
175
|
+
record = await asyncio.to_thread(catalog.status, identifier)
|
|
176
|
+
if record is None:
|
|
177
|
+
return _error(
|
|
178
|
+
"workflow_not_found",
|
|
179
|
+
"The workflow is no longer available.",
|
|
180
|
+
404,
|
|
181
|
+
)
|
|
182
|
+
return JSONResponse(
|
|
183
|
+
{
|
|
184
|
+
"workflow_id": record.id,
|
|
185
|
+
"relative_path": record.relative_path,
|
|
186
|
+
"health": record.health,
|
|
187
|
+
"error": record.error,
|
|
188
|
+
"status": record.status,
|
|
189
|
+
}
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
async def get_history(request):
|
|
193
|
+
identifier = request.path_params["workflow_id"]
|
|
194
|
+
record = await asyncio.to_thread(catalog.status, identifier)
|
|
195
|
+
if record is None or record.health == "missing":
|
|
196
|
+
return _error(
|
|
197
|
+
"workflow_not_found",
|
|
198
|
+
"The workflow is no longer available.",
|
|
199
|
+
404,
|
|
200
|
+
)
|
|
201
|
+
if record.status is None:
|
|
202
|
+
return _error(
|
|
203
|
+
"status_unavailable",
|
|
204
|
+
record.error or "Workflow status is not available yet.",
|
|
205
|
+
404,
|
|
206
|
+
)
|
|
207
|
+
try:
|
|
208
|
+
after_raw = request.query_params.get("after_sequence")
|
|
209
|
+
after = int(after_raw) if after_raw is not None else None
|
|
210
|
+
max_points = int(
|
|
211
|
+
request.query_params.get("max_points", DEFAULT_MAX_POINTS)
|
|
212
|
+
)
|
|
213
|
+
if max_points < 1 or max_points > MAX_HISTORY_POINTS:
|
|
214
|
+
raise ValueError
|
|
215
|
+
except ValueError:
|
|
216
|
+
return _error(
|
|
217
|
+
"invalid_history_query",
|
|
218
|
+
f"max_points must be between 1 and {MAX_HISTORY_POINTS}, and "
|
|
219
|
+
"after_sequence must be an integer.",
|
|
220
|
+
400,
|
|
221
|
+
)
|
|
222
|
+
try:
|
|
223
|
+
payload = await asyncio.to_thread(
|
|
224
|
+
read_history,
|
|
225
|
+
record.path,
|
|
226
|
+
record.status,
|
|
227
|
+
after_sequence=after,
|
|
228
|
+
max_points=max_points,
|
|
229
|
+
)
|
|
230
|
+
except OSError as exc:
|
|
231
|
+
return _error(
|
|
232
|
+
"history_unreadable",
|
|
233
|
+
exc.strerror or "The history file could not be read.",
|
|
234
|
+
500,
|
|
235
|
+
)
|
|
236
|
+
except (UnicodeError, ValueError) as exc:
|
|
237
|
+
return _error("history_unreadable", str(exc), 500)
|
|
238
|
+
return JSONResponse({"workflow_id": identifier, **payload})
|
|
239
|
+
|
|
240
|
+
async def get_stderr(request):
|
|
241
|
+
identifier = request.path_params["workflow_id"]
|
|
242
|
+
chore_id = request.path_params["chore_id"]
|
|
243
|
+
if not CHORE_ID_RE.fullmatch(chore_id) or chore_id in {".", ".."}:
|
|
244
|
+
return _error("invalid_chore_id", "The chore ID is invalid.", 400)
|
|
245
|
+
record = catalog.get(identifier)
|
|
246
|
+
if record is None or record.health == "missing":
|
|
247
|
+
return _error(
|
|
248
|
+
"workflow_not_found",
|
|
249
|
+
"The workflow is no longer available.",
|
|
250
|
+
404,
|
|
251
|
+
)
|
|
252
|
+
stderr_path = record.path / "out" / chore_id / "stderr"
|
|
253
|
+
try:
|
|
254
|
+
resolved = stderr_path.resolve()
|
|
255
|
+
resolved.relative_to(record.path.resolve())
|
|
256
|
+
resolved.relative_to(catalog.root)
|
|
257
|
+
except (OSError, ValueError):
|
|
258
|
+
return _error(
|
|
259
|
+
"artifact_outside_workflow",
|
|
260
|
+
"The requested artifact is outside the workflow directory.",
|
|
261
|
+
400,
|
|
262
|
+
)
|
|
263
|
+
if not resolved.is_file():
|
|
264
|
+
return _error("artifact_not_found", "stderr was not found.", 404)
|
|
265
|
+
return FileResponse(resolved, media_type="text/plain; charset=utf-8")
|
|
266
|
+
|
|
267
|
+
routes = [
|
|
268
|
+
Route("/api/catalog", get_catalog),
|
|
269
|
+
Route("/api/workflows/{workflow_id:str}/status", get_status),
|
|
270
|
+
Route("/api/workflows/{workflow_id:str}/history", get_history),
|
|
271
|
+
Route(
|
|
272
|
+
"/api/workflows/{workflow_id:str}/artifacts/{chore_id:str}/stderr",
|
|
273
|
+
get_stderr,
|
|
274
|
+
),
|
|
275
|
+
]
|
|
276
|
+
|
|
277
|
+
if compatibility_workflow_id:
|
|
278
|
+
async def legacy_status(request):
|
|
279
|
+
request.path_params["workflow_id"] = compatibility_workflow_id
|
|
280
|
+
response = await get_status(request)
|
|
281
|
+
if response.status_code != 200:
|
|
282
|
+
return response
|
|
283
|
+
payload = json.loads(response.body)
|
|
284
|
+
return JSONResponse(payload["status"] or {})
|
|
285
|
+
|
|
286
|
+
async def legacy_history(request):
|
|
287
|
+
request.path_params["workflow_id"] = compatibility_workflow_id
|
|
288
|
+
response = await get_history(request)
|
|
289
|
+
if response.status_code != 200:
|
|
290
|
+
return response
|
|
291
|
+
payload = json.loads(response.body)
|
|
292
|
+
return JSONResponse(payload["records"])
|
|
293
|
+
|
|
294
|
+
async def legacy_stderr(request):
|
|
295
|
+
request.path_params["workflow_id"] = compatibility_workflow_id
|
|
296
|
+
return await get_stderr(request)
|
|
297
|
+
|
|
298
|
+
routes.extend(
|
|
299
|
+
[
|
|
300
|
+
Route("/api/status", legacy_status),
|
|
301
|
+
Route("/api/history", legacy_history),
|
|
302
|
+
Route("/api/artifacts/{chore_id:str}/stderr", legacy_stderr),
|
|
303
|
+
]
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
static_dir = Path(__file__).resolve().parent / "static"
|
|
307
|
+
routes.append(
|
|
308
|
+
Mount("/", StaticFiles(directory=static_dir, html=True), name="static")
|
|
309
|
+
)
|
|
310
|
+
app = Starlette(routes=routes, lifespan=lifespan)
|
|
311
|
+
app.state.catalog = catalog
|
|
312
|
+
return app
|