@desplega.ai/agent-swarm 1.91.0 → 1.92.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/openapi.json +585 -5
- package/package.json +1 -1
- package/src/be/db.ts +337 -1
- package/src/be/migrations/083_script_workflows.sql +51 -0
- package/src/be/modelsdev-cache.json +42352 -38595
- package/src/be/scripts/typecheck.ts +49 -0
- package/src/be/seed-scripts/catalog/compound-insights.ts +216 -6
- package/src/be/seed-scripts/catalog/ops-catalog-audit.ts +911 -0
- package/src/be/seed-scripts/catalog/task-context-gathering.ts +92 -0
- package/src/be/seed-scripts/catalog/tool-usage.ts +6 -3
- package/src/be/seed-scripts/index.ts +20 -2
- package/src/be/seed-skills/index.ts +7 -0
- package/src/be/swarm-config-guard.ts +17 -0
- package/src/commands/runner.ts +43 -2
- package/src/http/db-query.ts +20 -5
- package/src/http/index.ts +10 -0
- package/src/http/script-runs.ts +555 -0
- package/src/prompts/session-templates.ts +24 -4
- package/src/providers/claude-adapter.ts +60 -13
- package/src/script-workflows/executor.ts +110 -0
- package/src/script-workflows/harness.ts +73 -0
- package/src/script-workflows/label-lint.ts +51 -0
- package/src/script-workflows/limits.ts +22 -0
- package/src/script-workflows/supervisor.ts +139 -0
- package/src/script-workflows/workflow-ctx.ts +205 -0
- package/src/scripts-runtime/sdk-allowlist.ts +3 -0
- package/src/scripts-runtime/types/stdlib.d.ts +60 -0
- package/src/scripts-runtime/types/swarm-sdk.d.ts +60 -0
- package/src/server.ts +2 -0
- package/src/slack/handlers.ts +11 -4
- package/src/slack/message-text.ts +98 -0
- package/src/slack/thread-buffer.ts +5 -3
- package/src/tests/claude-adapter-binary.test.ts +147 -4
- package/src/tests/db-query.test.ts +28 -0
- package/src/tests/error-tracker.test.ts +121 -0
- package/src/tests/harness-provider-resolution.test.ts +33 -0
- package/src/tests/mcp-tools.test.ts +6 -0
- package/src/tests/prompt-template-session.test.ts +34 -5
- package/src/tests/script-runs-http.test.ts +278 -0
- package/src/tests/script-workflows-label-lint.test.ts +43 -0
- package/src/tests/script-workflows-runtime-e2e.test.ts +170 -0
- package/src/tests/scripts-mcp-e2e.test.ts +49 -2
- package/src/tests/seed-scripts.test.ts +347 -2
- package/src/tests/slack-message-text.test.ts +250 -0
- package/src/tests/system-default-skills.test.ts +40 -0
- package/src/tools/db-query.ts +16 -6
- package/src/tools/script-runs.ts +123 -0
- package/src/tools/slack-read.ts +12 -3
- package/src/tools/tool-config.ts +4 -1
- package/src/types.ts +52 -0
- package/src/utils/error-tracker.ts +40 -1
- package/src/utils/internal-ai/complete-structured.ts +10 -4
- package/src/workflows/executors/raw-llm.ts +76 -59
- package/templates/skills/pages/content.md +205 -55
- package/templates/skills/script-workflows/config.json +14 -0
- package/templates/skills/script-workflows/content.md +68 -0
- package/templates/skills/swarm-scripts/content.md +2 -3
|
@@ -108,6 +108,45 @@ export function resolveClaudeBinary(
|
|
|
108
108
|
return candidate || "claude";
|
|
109
109
|
}
|
|
110
110
|
|
|
111
|
+
const CLAUDE_BRIDGE_BINARY = "claude-bridge";
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Parse a boolean env toggle. Only true/1 enable and false/0 disable; unset
|
|
115
|
+
* and invalid values are treated as disabled.
|
|
116
|
+
*
|
|
117
|
+
* Exported for unit testing.
|
|
118
|
+
*/
|
|
119
|
+
export function parseClaudeBridgeEnabled(raw: string | undefined): boolean {
|
|
120
|
+
const normalized = raw?.trim().toLowerCase();
|
|
121
|
+
return normalized === "true" || normalized === "1";
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Resolve the reloadable claude-bridge toggle from the same resolved-env
|
|
126
|
+
* overlay used for `CLAUDE_BINARY`.
|
|
127
|
+
*
|
|
128
|
+
* Exported for unit testing.
|
|
129
|
+
*/
|
|
130
|
+
export function resolveClaudeBridgeEnabled(
|
|
131
|
+
resolvedEnv: Record<string, string | undefined>,
|
|
132
|
+
fallbackEnv: Record<string, string | undefined> = process.env,
|
|
133
|
+
): boolean {
|
|
134
|
+
const candidate =
|
|
135
|
+
resolvedEnv.SWARM_USE_CLAUDE_BRIDGE?.trim() || fallbackEnv.SWARM_USE_CLAUDE_BRIDGE?.trim();
|
|
136
|
+
return parseClaudeBridgeEnabled(candidate);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function resolveClaudeBinaryArgv(
|
|
140
|
+
resolvedEnv: Record<string, string | undefined>,
|
|
141
|
+
fallbackEnv: Record<string, string | undefined> = process.env,
|
|
142
|
+
): { raw: string; argv: string[]; useClaudeBridge: boolean } {
|
|
143
|
+
const useClaudeBridge = resolveClaudeBridgeEnabled(resolvedEnv, fallbackEnv);
|
|
144
|
+
const raw = useClaudeBridge
|
|
145
|
+
? CLAUDE_BRIDGE_BINARY
|
|
146
|
+
: resolveClaudeBinary(resolvedEnv, fallbackEnv);
|
|
147
|
+
return { raw, argv: parseClaudeBinary(raw), useClaudeBridge };
|
|
148
|
+
}
|
|
149
|
+
|
|
111
150
|
/**
|
|
112
151
|
* Pre-seed `~/.claude.json` so the per-project trust-dialog ("Quick safety
|
|
113
152
|
* check: Is this a project you trust?") doesn't block on first run.
|
|
@@ -797,12 +836,9 @@ export class ClaudeAdapter implements ProviderAdapter {
|
|
|
797
836
|
console.log(`\x1b[2m[claude]\x1b[0m Using credential: ${credType}`);
|
|
798
837
|
|
|
799
838
|
// Resolve the argv prefix. Same flags (`-p`, `--model`, ...) work across
|
|
800
|
-
// alternates; only argv[0..n] changes.
|
|
801
|
-
//
|
|
802
|
-
//
|
|
803
|
-
// Setting it to anything containing `shannon` opts into the dexhorthy/shannon
|
|
804
|
-
// variant, which drives `claude` interactively in tmux to stay on the
|
|
805
|
-
// subscription credit pool after the 2026-06-15 programmatic-credit split.
|
|
839
|
+
// alternates; only argv[0..n] changes. Prefer SWARM_USE_CLAUDE_BRIDGE=true
|
|
840
|
+
// for the Desplega-owned bridge. CLAUDE_BINARY remains as the low-level
|
|
841
|
+
// override for custom binaries and the deprecated shannon path.
|
|
806
842
|
//
|
|
807
843
|
// `config.env` carries the swarm_config overlay (resolved repo > agent > global
|
|
808
844
|
// by `fetchResolvedEnv` in src/commands/runner.ts), so operators can flip
|
|
@@ -811,19 +847,30 @@ export class ClaudeAdapter implements ProviderAdapter {
|
|
|
811
847
|
//
|
|
812
848
|
// See `docs-site/.../shannon-experimental.mdx` for the user-facing guide
|
|
813
849
|
// and `runbooks/harness-providers.md` for engineering notes.
|
|
814
|
-
const
|
|
815
|
-
|
|
850
|
+
const {
|
|
851
|
+
raw: claudeBinaryRaw,
|
|
852
|
+
argv: claudeBinaryArgv,
|
|
853
|
+
useClaudeBridge,
|
|
854
|
+
} = resolveClaudeBinaryArgv(config.env || process.env);
|
|
816
855
|
const isShannon = claudeBinaryRaw.toLowerCase().includes("shannon");
|
|
856
|
+
const configuredClaudeBinaryRaw = resolveClaudeBinary(config.env || process.env);
|
|
857
|
+
if (configuredClaudeBinaryRaw.toLowerCase().includes("shannon")) {
|
|
858
|
+
console.warn(
|
|
859
|
+
"\x1b[33m[claude]\x1b[0m CLAUDE_BINARY=shannon is deprecated; set SWARM_USE_CLAUDE_BRIDGE=true to use @desplega.ai/claude-bridge.",
|
|
860
|
+
);
|
|
861
|
+
}
|
|
817
862
|
|
|
818
863
|
console.log(
|
|
819
|
-
`\x1b[2m[${config.role}]\x1b[0m Resolved
|
|
864
|
+
`\x1b[2m[${config.role}]\x1b[0m Resolved claude binary: ${claudeBinaryArgv.join(" ")} (useClaudeBridge: ${useClaudeBridge}, isShannon: ${isShannon})`,
|
|
820
865
|
);
|
|
821
866
|
|
|
822
|
-
// Fail fast: shannon
|
|
823
|
-
// clear error here rather than letting
|
|
824
|
-
|
|
867
|
+
// Fail fast: shannon and claude-bridge both shell out to tmux. If it's
|
|
868
|
+
// missing, surface a clear error here rather than letting startup fail
|
|
869
|
+
// opaquely.
|
|
870
|
+
if ((isShannon || useClaudeBridge) && !Bun.which("tmux")) {
|
|
871
|
+
const label = useClaudeBridge ? "SWARM_USE_CLAUDE_BRIDGE=true" : "CLAUDE_BINARY=shannon";
|
|
825
872
|
throw new Error(
|
|
826
|
-
|
|
873
|
+
`${label} requires 'tmux' on PATH (install via apt/brew). See runbooks/harness-providers.md.`,
|
|
827
874
|
);
|
|
828
875
|
}
|
|
829
876
|
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import { existsSync } from "node:fs";
|
|
2
|
+
import { mkdir, rm } from "node:fs/promises";
|
|
3
|
+
import { resolve } from "node:path";
|
|
4
|
+
import type { ScriptRun } from "../types";
|
|
5
|
+
|
|
6
|
+
export type ScriptExecutionResult = {
|
|
7
|
+
exitCode: number | null;
|
|
8
|
+
stderr: string;
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
export type ScriptExecutionHandle = {
|
|
12
|
+
pid: number | null;
|
|
13
|
+
tmpdir: string;
|
|
14
|
+
startedAtMs: number;
|
|
15
|
+
exited: Promise<ScriptExecutionResult>;
|
|
16
|
+
terminate(signal?: NodeJS.Signals): void;
|
|
17
|
+
cleanup(): Promise<void>;
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
export type StartScriptExecutionInput = {
|
|
21
|
+
run: ScriptRun;
|
|
22
|
+
baseUrl: string;
|
|
23
|
+
apiKey: string;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
export interface ScriptExecutor {
|
|
27
|
+
start(input: StartScriptExecutionInput): Promise<ScriptExecutionHandle>;
|
|
28
|
+
isRunning(pid: number): boolean;
|
|
29
|
+
terminatePid(pid: number, signal?: NodeJS.Signals): void;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function getScriptWorkflowHarnessPath(): string {
|
|
33
|
+
const runtimeDir = process.env.SCRIPT_WORKFLOW_RUNTIME_DIR;
|
|
34
|
+
if (!runtimeDir) return new URL("./harness.ts", import.meta.url).pathname;
|
|
35
|
+
|
|
36
|
+
const bundledHarness = `${resolve(runtimeDir)}/harness.bundle.js`;
|
|
37
|
+
if (!existsSync(bundledHarness)) {
|
|
38
|
+
throw new Error(
|
|
39
|
+
`Script workflow harness bundle not found at ${bundledHarness}. ` +
|
|
40
|
+
"Build/copy harness.bundle.js and set SCRIPT_WORKFLOW_RUNTIME_DIR to its directory.",
|
|
41
|
+
);
|
|
42
|
+
}
|
|
43
|
+
return bundledHarness;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export class LocalProcessScriptExecutor implements ScriptExecutor {
|
|
47
|
+
async start(input: StartScriptExecutionInput): Promise<ScriptExecutionHandle> {
|
|
48
|
+
const { run, baseUrl, apiKey } = input;
|
|
49
|
+
const tmpdir = `${process.env.TMPDIR ?? "/tmp"}/script-workflow-${run.id}`;
|
|
50
|
+
await mkdir(tmpdir, { recursive: true });
|
|
51
|
+
const sourceFile = `${tmpdir}/source.ts`;
|
|
52
|
+
const argsFile = `${tmpdir}/args.json`;
|
|
53
|
+
await Bun.write(sourceFile, run.source);
|
|
54
|
+
await Bun.write(argsFile, JSON.stringify(run.args ?? null));
|
|
55
|
+
|
|
56
|
+
const proc = Bun.spawn(["bun", "run", getScriptWorkflowHarnessPath()], {
|
|
57
|
+
cwd: tmpdir,
|
|
58
|
+
stdin: "ignore",
|
|
59
|
+
stdout: "ignore",
|
|
60
|
+
stderr: "pipe",
|
|
61
|
+
env: {
|
|
62
|
+
PATH: process.env.PATH ?? "/usr/bin:/bin",
|
|
63
|
+
HOME: process.env.HOME ?? "/tmp",
|
|
64
|
+
LANG: process.env.LANG ?? "C.UTF-8",
|
|
65
|
+
LC_ALL: process.env.LC_ALL ?? "C.UTF-8",
|
|
66
|
+
TMPDIR: tmpdir,
|
|
67
|
+
AGENT_SWARM_API_KEY: apiKey,
|
|
68
|
+
MCP_BASE_URL: baseUrl,
|
|
69
|
+
SCRIPT_RUN_ID: run.id,
|
|
70
|
+
SCRIPT_RUN_AGENT_ID: run.agentId,
|
|
71
|
+
SCRIPT_RUN_TMPDIR: tmpdir,
|
|
72
|
+
SCRIPT_RUN_SOURCE_FILE: sourceFile,
|
|
73
|
+
SCRIPT_RUN_ARGS_FILE: argsFile,
|
|
74
|
+
},
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
const stderrPromise = new Response(proc.stderr).text().catch(() => "");
|
|
78
|
+
|
|
79
|
+
return {
|
|
80
|
+
pid: proc.pid,
|
|
81
|
+
tmpdir,
|
|
82
|
+
startedAtMs: Date.now(),
|
|
83
|
+
exited: proc.exited.then(async (exitCode) => ({
|
|
84
|
+
exitCode,
|
|
85
|
+
stderr: await stderrPromise,
|
|
86
|
+
})),
|
|
87
|
+
terminate: (signal = "SIGTERM") => {
|
|
88
|
+
proc.kill(signal);
|
|
89
|
+
},
|
|
90
|
+
cleanup: async () => {
|
|
91
|
+
await rm(tmpdir, { recursive: true, force: true });
|
|
92
|
+
},
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
isRunning(pid: number): boolean {
|
|
97
|
+
try {
|
|
98
|
+
process.kill(pid, 0);
|
|
99
|
+
return true;
|
|
100
|
+
} catch {
|
|
101
|
+
return false;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
terminatePid(pid: number, signal: NodeJS.Signals = "SIGTERM"): void {
|
|
106
|
+
process.kill(pid, signal);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export const localProcessScriptExecutor = new LocalProcessScriptExecutor();
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { buildWorkflowCtx } from "./workflow-ctx";
|
|
2
|
+
|
|
3
|
+
function requiredEnv(name: string): string {
|
|
4
|
+
const value = process.env[name];
|
|
5
|
+
if (!value) throw new Error(`Missing required env ${name}`);
|
|
6
|
+
return value;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
async function postStatus(
|
|
10
|
+
runId: string,
|
|
11
|
+
baseUrl: string,
|
|
12
|
+
agentId: string,
|
|
13
|
+
apiKey: string,
|
|
14
|
+
body: Record<string, unknown>,
|
|
15
|
+
): Promise<void> {
|
|
16
|
+
const res = await fetch(`${baseUrl}/api/internal/script-runs/${runId}/status`, {
|
|
17
|
+
method: "POST",
|
|
18
|
+
headers: {
|
|
19
|
+
Authorization: `Bearer ${apiKey}`,
|
|
20
|
+
"X-Agent-ID": agentId,
|
|
21
|
+
"Content-Type": "application/json",
|
|
22
|
+
},
|
|
23
|
+
body: JSON.stringify(body),
|
|
24
|
+
});
|
|
25
|
+
if (!res.ok) {
|
|
26
|
+
throw new Error(`status callback failed with ${res.status}: ${await res.text()}`);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const runId = requiredEnv("SCRIPT_RUN_ID");
|
|
31
|
+
const agentId = requiredEnv("SCRIPT_RUN_AGENT_ID");
|
|
32
|
+
const apiKey = requiredEnv("AGENT_SWARM_API_KEY");
|
|
33
|
+
const baseUrl = requiredEnv("MCP_BASE_URL").replace(/\/$/, "");
|
|
34
|
+
const sourceFile = requiredEnv("SCRIPT_RUN_SOURCE_FILE");
|
|
35
|
+
const argsFile = requiredEnv("SCRIPT_RUN_ARGS_FILE");
|
|
36
|
+
const userModulePath = `${requiredEnv("SCRIPT_RUN_TMPDIR")}/user-script.ts`;
|
|
37
|
+
|
|
38
|
+
const heartbeat = setInterval(() => {
|
|
39
|
+
fetch(`${baseUrl}/api/internal/script-runs/${runId}/heartbeat`, {
|
|
40
|
+
method: "POST",
|
|
41
|
+
headers: {
|
|
42
|
+
Authorization: `Bearer ${apiKey}`,
|
|
43
|
+
"X-Agent-ID": agentId,
|
|
44
|
+
},
|
|
45
|
+
}).catch(() => {});
|
|
46
|
+
}, 10_000);
|
|
47
|
+
heartbeat.unref?.();
|
|
48
|
+
|
|
49
|
+
try {
|
|
50
|
+
const source = await Bun.file(sourceFile).text();
|
|
51
|
+
const args = JSON.parse(await Bun.file(argsFile).text());
|
|
52
|
+
await Bun.write(userModulePath, source);
|
|
53
|
+
const mod = await import(userModulePath);
|
|
54
|
+
if (typeof mod.default !== "function") {
|
|
55
|
+
throw new Error("Script workflow must export a default function");
|
|
56
|
+
}
|
|
57
|
+
const ctx = buildWorkflowCtx({ runId, agentId, apiKey, baseUrl, args });
|
|
58
|
+
const output = await mod.default(args, ctx);
|
|
59
|
+
await postStatus(runId, baseUrl, agentId, apiKey, {
|
|
60
|
+
status: "completed",
|
|
61
|
+
output: output ?? null,
|
|
62
|
+
});
|
|
63
|
+
process.exit(0);
|
|
64
|
+
} catch (err) {
|
|
65
|
+
console.error(err instanceof Error ? err.stack || err.message : String(err));
|
|
66
|
+
await postStatus(runId, baseUrl, agentId, apiKey, {
|
|
67
|
+
status: "failed",
|
|
68
|
+
error: err instanceof Error ? err.message : String(err),
|
|
69
|
+
});
|
|
70
|
+
process.exit(1);
|
|
71
|
+
} finally {
|
|
72
|
+
clearInterval(heartbeat);
|
|
73
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
export type LabelLintError = {
|
|
2
|
+
label: string;
|
|
3
|
+
lineNumber: number | null;
|
|
4
|
+
detail: string;
|
|
5
|
+
};
|
|
6
|
+
|
|
7
|
+
export type LabelLintResult = { ok: true } | { ok: false; errors: LabelLintError[] };
|
|
8
|
+
|
|
9
|
+
const CTX_STEP_LITERAL_LABEL_PATTERN = /ctx\.step\.\w+\(\s*"([^"]+)"/g;
|
|
10
|
+
const LOOP_PATTERNS = [
|
|
11
|
+
/\bfor\s*\(/,
|
|
12
|
+
/\bwhile\s*\(/,
|
|
13
|
+
/\.map\s*\(/,
|
|
14
|
+
/\.forEach\s*\(/,
|
|
15
|
+
/\.reduce\s*\(/,
|
|
16
|
+
/\.flatMap\s*\(/,
|
|
17
|
+
];
|
|
18
|
+
|
|
19
|
+
export function lintWorkflowLabels(source: string): LabelLintResult {
|
|
20
|
+
const errors: LabelLintError[] = [];
|
|
21
|
+
const lines = source.split("\n");
|
|
22
|
+
|
|
23
|
+
for (let i = 0; i < lines.length; i++) {
|
|
24
|
+
const line = lines[i] ?? "";
|
|
25
|
+
CTX_STEP_LITERAL_LABEL_PATTERN.lastIndex = 0;
|
|
26
|
+
let match = CTX_STEP_LITERAL_LABEL_PATTERN.exec(line);
|
|
27
|
+
while (match !== null) {
|
|
28
|
+
const label = match[1];
|
|
29
|
+
if (!label) {
|
|
30
|
+
match = CTX_STEP_LITERAL_LABEL_PATTERN.exec(line);
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
const windowStart = Math.max(0, i - 10);
|
|
34
|
+
const context = lines.slice(windowStart, i + 1).join("\n");
|
|
35
|
+
if (!LOOP_PATTERNS.some((pattern) => pattern.test(context))) {
|
|
36
|
+
match = CTX_STEP_LITERAL_LABEL_PATTERN.exec(line);
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
errors.push({
|
|
40
|
+
label,
|
|
41
|
+
lineNumber: i + 1,
|
|
42
|
+
detail:
|
|
43
|
+
`Literal string label "${label}" at line ${i + 1} appears inside a loop. ` +
|
|
44
|
+
"Labels must be unique per run; use a template literal that includes the loop variable.",
|
|
45
|
+
});
|
|
46
|
+
match = CTX_STEP_LITERAL_LABEL_PATTERN.exec(line);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return errors.length > 0 ? { ok: false, errors } : { ok: true };
|
|
51
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
const DEFAULT_MAX_STEPS = 1000;
|
|
2
|
+
const DEFAULT_MAX_WALL_MS = 24 * 60 * 60 * 1000;
|
|
3
|
+
const DEFAULT_MAX_AGENT_TASKS = 50;
|
|
4
|
+
|
|
5
|
+
function positiveIntEnv(name: string, fallback: number): number {
|
|
6
|
+
const raw = process.env[name];
|
|
7
|
+
if (!raw) return fallback;
|
|
8
|
+
const parsed = Number(raw);
|
|
9
|
+
return Number.isFinite(parsed) && parsed > 0 ? Math.floor(parsed) : fallback;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export function scriptRunMaxSteps(): number {
|
|
13
|
+
return positiveIntEnv("SCRIPT_RUN_MAX_STEPS", DEFAULT_MAX_STEPS);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function scriptRunMaxWallMs(): number {
|
|
17
|
+
return positiveIntEnv("SCRIPT_RUN_MAX_WALL_MS", DEFAULT_MAX_WALL_MS);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function scriptRunMaxAgentTasks(): number {
|
|
21
|
+
return positiveIntEnv("SCRIPT_RUN_MAX_AGENT_TASKS", DEFAULT_MAX_AGENT_TASKS);
|
|
22
|
+
}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import { getRunningScriptRuns, getScriptRun, updateScriptRun } from "../be/db";
|
|
2
|
+
import type { ScriptRun } from "../types";
|
|
3
|
+
import { getApiKey } from "../utils/api-key";
|
|
4
|
+
import {
|
|
5
|
+
localProcessScriptExecutor,
|
|
6
|
+
type ScriptExecutionHandle,
|
|
7
|
+
type ScriptExecutor,
|
|
8
|
+
} from "./executor";
|
|
9
|
+
import { scriptRunMaxWallMs } from "./limits";
|
|
10
|
+
|
|
11
|
+
type ManagedRun = {
|
|
12
|
+
execution: ScriptExecutionHandle;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
const managed = new Map<string, ManagedRun>();
|
|
16
|
+
let reconcileTimer: ReturnType<typeof setInterval> | null = null;
|
|
17
|
+
let scriptExecutor: ScriptExecutor = localProcessScriptExecutor;
|
|
18
|
+
|
|
19
|
+
function supervisorDisabled(): boolean {
|
|
20
|
+
return process.env.SCRIPT_RUN_SUPERVISOR_DISABLE === "true";
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export function setScriptRunExecutor(executor: ScriptExecutor): void {
|
|
24
|
+
scriptExecutor = executor;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export async function startScriptRunProcess(
|
|
28
|
+
run: ScriptRun,
|
|
29
|
+
baseUrl: string,
|
|
30
|
+
apiKeyOverride?: string,
|
|
31
|
+
): Promise<void> {
|
|
32
|
+
if (supervisorDisabled()) return;
|
|
33
|
+
if (managed.has(run.id)) return;
|
|
34
|
+
const apiKey = apiKeyOverride ?? getApiKey();
|
|
35
|
+
if (!apiKey) throw new Error("AGENT_SWARM_API_KEY is required to spawn script runs");
|
|
36
|
+
if (process.env.SCRIPT_WORKFLOW_DEBUG === "true") {
|
|
37
|
+
console.error(
|
|
38
|
+
`[script-workflows] spawning ${run.id} auth override=${apiKeyOverride ? "yes" : "no"} len=${apiKey.length}`,
|
|
39
|
+
);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const execution = await scriptExecutor.start({ run, baseUrl, apiKey });
|
|
43
|
+
managed.set(run.id, { execution });
|
|
44
|
+
updateScriptRun(run.id, {
|
|
45
|
+
status: "running",
|
|
46
|
+
pid: execution.pid,
|
|
47
|
+
lastHeartbeatAt: new Date().toISOString(),
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
execution.exited
|
|
51
|
+
.then(async ({ exitCode, stderr }) => {
|
|
52
|
+
const current = getScriptRun(run.id);
|
|
53
|
+
if (current && current.status === "running") {
|
|
54
|
+
if (exitCode !== 0) {
|
|
55
|
+
console.error(
|
|
56
|
+
`[script-workflows] run ${run.id} subprocess exited ${exitCode}: ${stderr.trim() || "(no stderr)"}`,
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
updateScriptRun(run.id, {
|
|
60
|
+
status: exitCode === 0 ? "completed" : "failed",
|
|
61
|
+
pid: null,
|
|
62
|
+
finishedAt: new Date().toISOString(),
|
|
63
|
+
error:
|
|
64
|
+
exitCode === 0
|
|
65
|
+
? null
|
|
66
|
+
: stderr.trim() || `Script workflow subprocess exited ${exitCode}`,
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
})
|
|
70
|
+
.finally(async () => {
|
|
71
|
+
managed.delete(run.id);
|
|
72
|
+
await execution.cleanup();
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export function terminateScriptRunProcess(runId: string): boolean {
|
|
77
|
+
const managedRun = managed.get(runId);
|
|
78
|
+
const run = getScriptRun(runId);
|
|
79
|
+
if (managedRun) {
|
|
80
|
+
managedRun.execution.terminate("SIGTERM");
|
|
81
|
+
managed.delete(runId);
|
|
82
|
+
return true;
|
|
83
|
+
}
|
|
84
|
+
if (run?.pid && scriptExecutor.isRunning(run.pid)) {
|
|
85
|
+
scriptExecutor.terminatePid(run.pid, "SIGTERM");
|
|
86
|
+
return true;
|
|
87
|
+
}
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export function pauseScriptRunProcess(runId: string): void {
|
|
92
|
+
terminateScriptRunProcess(runId);
|
|
93
|
+
updateScriptRun(runId, { status: "paused", pid: null });
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export function abortScriptRunLimit(runId: string, reason: string): void {
|
|
97
|
+
terminateScriptRunProcess(runId);
|
|
98
|
+
updateScriptRun(runId, {
|
|
99
|
+
status: "aborted_limit",
|
|
100
|
+
pid: null,
|
|
101
|
+
finishedAt: new Date().toISOString(),
|
|
102
|
+
error: reason,
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export function reconcileScriptRuns(baseUrl: string): void {
|
|
107
|
+
if (supervisorDisabled()) return;
|
|
108
|
+
for (const run of getRunningScriptRuns()) {
|
|
109
|
+
if (run.status === "paused") continue;
|
|
110
|
+
const current = managed.get(run.id);
|
|
111
|
+
if (current && Date.now() - current.execution.startedAtMs > scriptRunMaxWallMs()) {
|
|
112
|
+
abortScriptRunLimit(run.id, `SCRIPT_RUN_MAX_WALL_MS exceeded (${scriptRunMaxWallMs()})`);
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
if (!current && (!run.pid || !scriptExecutor.isRunning(run.pid))) {
|
|
116
|
+
startScriptRunProcess(run, baseUrl).catch((err) => {
|
|
117
|
+
updateScriptRun(run.id, {
|
|
118
|
+
status: "failed",
|
|
119
|
+
pid: null,
|
|
120
|
+
finishedAt: new Date().toISOString(),
|
|
121
|
+
error: err instanceof Error ? err.message : String(err),
|
|
122
|
+
});
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
export function startScriptRunSupervisor(baseUrl: string): void {
|
|
129
|
+
if (supervisorDisabled() || reconcileTimer) return;
|
|
130
|
+
reconcileScriptRuns(baseUrl);
|
|
131
|
+
reconcileTimer = setInterval(() => reconcileScriptRuns(baseUrl), 15_000);
|
|
132
|
+
reconcileTimer.unref?.();
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
export function stopScriptRunSupervisor(): void {
|
|
136
|
+
if (reconcileTimer) clearInterval(reconcileTimer);
|
|
137
|
+
reconcileTimer = null;
|
|
138
|
+
for (const runId of [...managed.keys()]) terminateScriptRunProcess(runId);
|
|
139
|
+
}
|