@desplega.ai/agent-swarm 1.76.3 → 1.77.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openapi.json +1 -1
- package/package.json +1 -1
- package/src/cli.tsx +3 -24
- package/src/commands/credential-wait.ts +31 -6
- package/src/commands/runner.ts +1045 -1059
package/src/commands/runner.ts
CHANGED
|
@@ -272,6 +272,66 @@ async function fetchResolvedEnv(
|
|
|
272
272
|
return { env, credentialSelections, resolvedProvider };
|
|
273
273
|
}
|
|
274
274
|
|
|
275
|
+
/**
|
|
276
|
+
* Keys we permit `applyResolvedEnvToProcessEnv` to mutate live.
|
|
277
|
+
*
|
|
278
|
+
* Anything not in this list is considered unsafe to overwrite post-boot:
|
|
279
|
+
*
|
|
280
|
+
* - **Boot-time identity / connectivity** (AGENT_ID, API_KEY, MCP_BASE_URL,
|
|
281
|
+
* AGENT_ROLE, MANAGED_*): mutating these mid-flight effectively makes the
|
|
282
|
+
* worker a different agent talking to a different API. Reboot, don't reload.
|
|
283
|
+
* - **Credential pool members** (CLAUDE_CODE_OAUTH_TOKEN, ANTHROPIC_API_KEY,
|
|
284
|
+
* OPENAI_API_KEY, etc.): `resolveCredentialPools` picks one randomly *per
|
|
285
|
+
* task* from a comma-separated pool. Persisting the picked value into
|
|
286
|
+
* process.env freezes the rotation. Re-resolution happens per spawn anyway,
|
|
287
|
+
* so we deliberately leave these alone.
|
|
288
|
+
* - **Coordinated values with paired state** (HARNESS_PROVIDER): swapping
|
|
289
|
+
* the env without also swapping the adapter and rebuilding the system
|
|
290
|
+
* prompt produces an inconsistent worker. Handled by its own reconcile
|
|
291
|
+
* path that updates state.harnessProvider + adapter atomically.
|
|
292
|
+
* - **Process-runtime / OS-level** (PATH, HOME, NODE_OPTIONS, HOSTNAME, …):
|
|
293
|
+
* never overwrite. Some of these are read once by libraries at boot.
|
|
294
|
+
* - **Values memoized at boot** (TEMPLATE_ID, AGENT_NAME): the cached
|
|
295
|
+
* in-process value wins anyway — overwriting just creates confusion.
|
|
296
|
+
*
|
|
297
|
+
* For values that affect runner-loop behavior (like MAX_CONCURRENT_TASKS),
|
|
298
|
+
* prefer mutating `RunnerState` directly — no round-trip through process.env.
|
|
299
|
+
*/
|
|
300
|
+
const RELOADABLE_ENV_KEYS: ReadonlySet<string> = new Set([
|
|
301
|
+
"MODEL_OVERRIDE",
|
|
302
|
+
"AGENT_FS_SHARED_ORG_ID",
|
|
303
|
+
]);
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Apply a fresh resolved env to `process.env` for keys safe to mutate live.
|
|
307
|
+
* Returns the list of keys that actually changed (useful for logging).
|
|
308
|
+
*/
|
|
309
|
+
function applyResolvedEnvToProcessEnv(freshEnv: Record<string, string | undefined>): string[] {
|
|
310
|
+
const changed: string[] = [];
|
|
311
|
+
for (const key of RELOADABLE_ENV_KEYS) {
|
|
312
|
+
const next = freshEnv[key];
|
|
313
|
+
if (next !== undefined && next !== process.env[key]) {
|
|
314
|
+
process.env[key] = next;
|
|
315
|
+
changed.push(key);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
return changed;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/** Compute effective max concurrent tasks from env > template default > role default. */
|
|
322
|
+
function resolveMaxConcurrent(
|
|
323
|
+
env: Record<string, string | undefined>,
|
|
324
|
+
templateMax: number | undefined,
|
|
325
|
+
defaultMaxTasks: number,
|
|
326
|
+
): number {
|
|
327
|
+
const raw = env.MAX_CONCURRENT_TASKS;
|
|
328
|
+
if (raw) {
|
|
329
|
+
const parsed = parseInt(raw, 10);
|
|
330
|
+
if (Number.isFinite(parsed) && parsed > 0) return parsed;
|
|
331
|
+
}
|
|
332
|
+
return templateMax ?? defaultMaxTasks;
|
|
333
|
+
}
|
|
334
|
+
|
|
275
335
|
/** Tools that produce noise — skip auto-progress for these */
|
|
276
336
|
const SKIP_PROGRESS_TOOLS = new Set(["ToolSearch", "TodoRead", "TodoWrite"]);
|
|
277
337
|
|
|
@@ -962,7 +1022,6 @@ export interface RunnerOptions {
|
|
|
962
1022
|
systemPromptFile?: string;
|
|
963
1023
|
logsDir?: string;
|
|
964
1024
|
additionalArgs?: string[];
|
|
965
|
-
aiLoop?: boolean; // Use AI-based loop (old behavior)
|
|
966
1025
|
}
|
|
967
1026
|
|
|
968
1027
|
/** Running task state for parallel execution */
|
|
@@ -2099,68 +2158,6 @@ async function spawnProviderProcess(
|
|
|
2099
2158
|
return runningTask;
|
|
2100
2159
|
}
|
|
2101
2160
|
|
|
2102
|
-
/** Run a single provider iteration (blocking) - used for AI-loop mode */
|
|
2103
|
-
async function runProviderIteration(
|
|
2104
|
-
adapter: ReturnType<typeof createProviderAdapter>,
|
|
2105
|
-
opts: {
|
|
2106
|
-
prompt: string;
|
|
2107
|
-
logFile: string;
|
|
2108
|
-
systemPrompt?: string;
|
|
2109
|
-
additionalArgs?: string[];
|
|
2110
|
-
role: string;
|
|
2111
|
-
apiUrl: string;
|
|
2112
|
-
apiKey: string;
|
|
2113
|
-
agentId: string;
|
|
2114
|
-
taskId?: string;
|
|
2115
|
-
cwd?: string;
|
|
2116
|
-
},
|
|
2117
|
-
): Promise<ProviderResult> {
|
|
2118
|
-
const { env: freshEnv } = await fetchResolvedEnv(opts.apiUrl, opts.apiKey, opts.agentId);
|
|
2119
|
-
const model = (freshEnv.MODEL_OVERRIDE as string) || "";
|
|
2120
|
-
|
|
2121
|
-
const config: ProviderSessionConfig = {
|
|
2122
|
-
prompt: opts.prompt,
|
|
2123
|
-
systemPrompt: opts.systemPrompt || "",
|
|
2124
|
-
model,
|
|
2125
|
-
role: opts.role,
|
|
2126
|
-
agentId: opts.agentId,
|
|
2127
|
-
taskId: opts.taskId || crypto.randomUUID(),
|
|
2128
|
-
apiUrl: opts.apiUrl,
|
|
2129
|
-
apiKey: opts.apiKey,
|
|
2130
|
-
cwd: opts.cwd || process.cwd(),
|
|
2131
|
-
logFile: opts.logFile,
|
|
2132
|
-
additionalArgs: opts.additionalArgs,
|
|
2133
|
-
env: freshEnv as Record<string, string>,
|
|
2134
|
-
};
|
|
2135
|
-
|
|
2136
|
-
const session = await adapter.createSession(config);
|
|
2137
|
-
|
|
2138
|
-
let lastAiLoopProgressTime = 0;
|
|
2139
|
-
session.onEvent((event) => {
|
|
2140
|
-
if (event.type === "raw_log") prettyPrintLine(event.content, opts.role);
|
|
2141
|
-
if (event.type === "raw_stderr") prettyPrintStderr(event.content, opts.role);
|
|
2142
|
-
if (event.type === "session_init" && opts.taskId) {
|
|
2143
|
-
saveProviderSessionId(
|
|
2144
|
-
opts.apiUrl,
|
|
2145
|
-
opts.apiKey,
|
|
2146
|
-
opts.taskId,
|
|
2147
|
-
event.sessionId,
|
|
2148
|
-
event.provider,
|
|
2149
|
-
event.providerMeta,
|
|
2150
|
-
).catch((err) => console.warn(`[runner] Failed to save session ID: ${err}`));
|
|
2151
|
-
}
|
|
2152
|
-
if (event.type === "progress" && opts.taskId) {
|
|
2153
|
-
const now = Date.now();
|
|
2154
|
-
if (now - lastAiLoopProgressTime >= PROGRESS_THROTTLE_MS) {
|
|
2155
|
-
lastAiLoopProgressTime = now;
|
|
2156
|
-
updateProgressViaAPI(opts.apiUrl, opts.apiKey, opts.taskId, event.message).catch(() => {});
|
|
2157
|
-
}
|
|
2158
|
-
}
|
|
2159
|
-
});
|
|
2160
|
-
|
|
2161
|
-
return session.waitForCompletion();
|
|
2162
|
-
}
|
|
2163
|
-
|
|
2164
2161
|
/** Check for completed processes and remove them from active tasks */
|
|
2165
2162
|
async function checkCompletedProcesses(
|
|
2166
2163
|
state: RunnerState,
|
|
@@ -2541,86 +2538,142 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
2541
2538
|
);
|
|
2542
2539
|
console.log(`[${role}] Total system prompt length: ${resolvedSystemPrompt.length} chars`);
|
|
2543
2540
|
|
|
2544
|
-
const isAiLoop = opts.aiLoop || process.env.AI_LOOP === "true";
|
|
2545
|
-
|
|
2546
2541
|
// Constants for polling
|
|
2547
2542
|
const PollIntervalMs = 2000; // 2 seconds between polls
|
|
2548
2543
|
const PollTimeoutMs = 60000; // 1 minute timeout before retrying
|
|
2549
2544
|
|
|
2550
2545
|
let iteration = 0;
|
|
2551
2546
|
|
|
2552
|
-
|
|
2553
|
-
|
|
2554
|
-
|
|
2555
|
-
|
|
2556
|
-
let cachedTemplate: TemplateResponse | null = null;
|
|
2557
|
-
|
|
2558
|
-
if (templateId) {
|
|
2559
|
-
try {
|
|
2560
|
-
cachedTemplate = await fetchTemplate(templateId, registryUrl, "/workspace/.template-cache");
|
|
2561
|
-
if (cachedTemplate) {
|
|
2562
|
-
console.log(`[${role}] Fetched template: ${templateId}`);
|
|
2547
|
+
// Fetch template early (before registration) so defaults can be applied
|
|
2548
|
+
const templateId = process.env.TEMPLATE_ID;
|
|
2549
|
+
const registryUrl = process.env.TEMPLATE_REGISTRY_URL || "https://templates.agent-swarm.dev";
|
|
2550
|
+
let cachedTemplate: TemplateResponse | null = null;
|
|
2563
2551
|
|
|
2564
|
-
|
|
2565
|
-
|
|
2566
|
-
|
|
2567
|
-
|
|
2568
|
-
|
|
2569
|
-
|
|
2570
|
-
|
|
2571
|
-
|
|
2552
|
+
if (templateId) {
|
|
2553
|
+
try {
|
|
2554
|
+
cachedTemplate = await fetchTemplate(templateId, registryUrl, "/workspace/.template-cache");
|
|
2555
|
+
if (cachedTemplate) {
|
|
2556
|
+
console.log(`[${role}] Fetched template: ${templateId}`);
|
|
2557
|
+
|
|
2558
|
+
// Apply agentDefaults as fallbacks (env/config takes precedence)
|
|
2559
|
+
const defaults = cachedTemplate.config.agentDefaults;
|
|
2560
|
+
if (config.role === "worker" && defaults.role) {
|
|
2561
|
+
role = defaults.role;
|
|
2562
|
+
}
|
|
2563
|
+
if (!capabilities?.length && defaults.capabilities?.length) {
|
|
2564
|
+
capabilities = defaults.capabilities;
|
|
2572
2565
|
}
|
|
2566
|
+
}
|
|
2567
|
+
} catch (err) {
|
|
2568
|
+
console.warn(`[${role}] Failed to fetch template ${templateId}: ${err}`);
|
|
2569
|
+
}
|
|
2570
|
+
}
|
|
2571
|
+
|
|
2572
|
+
// Runner-level polling mode with parallel execution support
|
|
2573
|
+
const isLeadFromConfig = config.role === "lead";
|
|
2574
|
+
const isLead = isLeadFromConfig || (cachedTemplate?.config.agentDefaults?.isLead ?? false);
|
|
2575
|
+
const defaultMaxTasks = isLead ? 2 : 1;
|
|
2576
|
+
const templateMaxTasks = cachedTemplate?.config.agentDefaults?.maxTasks;
|
|
2577
|
+
const maxConcurrent = resolveMaxConcurrent(process.env, templateMaxTasks, defaultMaxTasks);
|
|
2578
|
+
console.log(`[${role}] Mode: runner-level polling`);
|
|
2579
|
+
console.log(`[${role}] Max concurrent tasks: ${maxConcurrent}`);
|
|
2580
|
+
|
|
2581
|
+
// Initialize runner state for parallel execution
|
|
2582
|
+
const state: RunnerState = {
|
|
2583
|
+
activeTasks: new Map(),
|
|
2584
|
+
maxConcurrent,
|
|
2585
|
+
harnessProvider: bootProvider,
|
|
2586
|
+
};
|
|
2587
|
+
|
|
2588
|
+
// Track tasks already signaled for cancellation to avoid repeated SIGTERM
|
|
2589
|
+
const cancelledSignaled = new Set<string>();
|
|
2590
|
+
|
|
2591
|
+
// Migration 055 — cache the harness_provider value used when we last
|
|
2592
|
+
// built a `cred_status` snapshot. Re-runs the post-task check only when
|
|
2593
|
+
// the resolved provider changes. Section 4 of the swarm_config-overrides-
|
|
2594
|
+
// HARNESS_PROVIDER work makes this dynamic: state.harnessProvider is
|
|
2595
|
+
// reconciled below from `swarm_config`, so an operator's change reaches
|
|
2596
|
+
// here without a worker restart.
|
|
2597
|
+
let cachedCredHarnessProvider: string | null = null;
|
|
2598
|
+
|
|
2599
|
+
// Throttle for live HARNESS_PROVIDER reconciliation. Each reconciliation
|
|
2600
|
+
// calls `fetchResolvedEnv` which also re-resolves credential pools — we
|
|
2601
|
+
// don't want that on every 2s poll. 10s gives operator changes a near-
|
|
2602
|
+
// immediate effect from a UX perspective without hammering the API.
|
|
2603
|
+
let lastHarnessReconcileAt = 0;
|
|
2604
|
+
const HARNESS_RECONCILE_INTERVAL_MS = 10_000;
|
|
2605
|
+
|
|
2606
|
+
// Create API config for ping/close
|
|
2607
|
+
const apiConfig: ApiConfig = { apiUrl, apiKey, agentId };
|
|
2608
|
+
|
|
2609
|
+
// Setup graceful shutdown handlers with API config and runner state access
|
|
2610
|
+
setupShutdownHandlers(role, apiConfig, () => state);
|
|
2611
|
+
|
|
2612
|
+
// Register agent before starting
|
|
2613
|
+
const agentName =
|
|
2614
|
+
process.env.AGENT_NAME ||
|
|
2615
|
+
cachedTemplate?.config.displayName ||
|
|
2616
|
+
`${role}-${agentId.slice(0, 8)}`;
|
|
2617
|
+
|
|
2618
|
+
/**
|
|
2619
|
+
* Reconcile RunnerState + process.env against a freshly resolved swarm
|
|
2620
|
+
* config snapshot. Single source of truth for live config drift; used
|
|
2621
|
+
* both during the credential-wait (so operator flips reach the predicate
|
|
2622
|
+
* mid-loop) and from the post-boot periodic reconciler.
|
|
2623
|
+
*
|
|
2624
|
+
* Returns whether anything agent-visible (provider, maxConcurrent)
|
|
2625
|
+
* changed — callers use this to decide whether to re-register.
|
|
2626
|
+
*/
|
|
2627
|
+
const applySwarmConfigDrift = async (
|
|
2628
|
+
freshEnv: Record<string, string | undefined>,
|
|
2629
|
+
resolvedProvider: ProviderName,
|
|
2630
|
+
): Promise<{ agentVisibleChanged: boolean }> => {
|
|
2631
|
+
let agentVisibleChanged = false;
|
|
2632
|
+
|
|
2633
|
+
// (1) Harness provider — swap adapter + rebuild prompt atomically.
|
|
2634
|
+
if (resolvedProvider !== state.harnessProvider) {
|
|
2635
|
+
const previous = state.harnessProvider;
|
|
2636
|
+
console.log(`[${role}] [harness] Reconciling adapter: ${previous} → ${resolvedProvider}`);
|
|
2637
|
+
try {
|
|
2638
|
+
adapter = createProviderAdapter(resolvedProvider);
|
|
2639
|
+
state.harnessProvider = resolvedProvider;
|
|
2640
|
+
basePrompt = await buildSystemPrompt();
|
|
2641
|
+
resolvedSystemPrompt = additionalSystemPrompt
|
|
2642
|
+
? `${basePrompt}\n\n${additionalSystemPrompt}`
|
|
2643
|
+
: basePrompt;
|
|
2644
|
+
cachedCredHarnessProvider = null;
|
|
2645
|
+
agentVisibleChanged = true;
|
|
2646
|
+
console.log(
|
|
2647
|
+
`[${role}] [harness] Swapped to ${resolvedProvider} (basePrompt rebuilt: ${basePrompt.length} chars)`,
|
|
2648
|
+
);
|
|
2573
2649
|
} catch (err) {
|
|
2574
|
-
console.warn(
|
|
2650
|
+
console.warn(
|
|
2651
|
+
`[${role}] [harness] Failed to swap to ${resolvedProvider} (staying on ${previous}): ${err}`,
|
|
2652
|
+
);
|
|
2575
2653
|
}
|
|
2576
2654
|
}
|
|
2577
2655
|
|
|
2578
|
-
//
|
|
2579
|
-
|
|
2580
|
-
|
|
2581
|
-
const
|
|
2582
|
-
|
|
2583
|
-
|
|
2584
|
-
|
|
2585
|
-
|
|
2586
|
-
|
|
2587
|
-
|
|
2588
|
-
//
|
|
2589
|
-
const
|
|
2590
|
-
|
|
2591
|
-
|
|
2592
|
-
|
|
2593
|
-
|
|
2656
|
+
// (2) Max concurrency — operator can tune from the dashboard live.
|
|
2657
|
+
// Note: shrinking below activeTasks.size won't kill in-flight tasks; new
|
|
2658
|
+
// spawns are simply gated until in-flight drain back under the new cap.
|
|
2659
|
+
const nextMax = resolveMaxConcurrent(freshEnv, templateMaxTasks, defaultMaxTasks);
|
|
2660
|
+
if (nextMax !== state.maxConcurrent) {
|
|
2661
|
+
console.log(`[${role}] [config] maxConcurrent: ${state.maxConcurrent} → ${nextMax}`);
|
|
2662
|
+
state.maxConcurrent = nextMax;
|
|
2663
|
+
agentVisibleChanged = true;
|
|
2664
|
+
}
|
|
2665
|
+
|
|
2666
|
+
// (3) Apply the small allowlist of safe-to-mutate env keys to process.env.
|
|
2667
|
+
const changedKeys = applyResolvedEnvToProcessEnv(freshEnv);
|
|
2668
|
+
if (changedKeys.length > 0) {
|
|
2669
|
+
console.log(`[${role}] [env-reload] Updated process.env: ${changedKeys.join(", ")}`);
|
|
2670
|
+
}
|
|
2671
|
+
|
|
2672
|
+
return { agentVisibleChanged };
|
|
2673
|
+
};
|
|
2594
2674
|
|
|
2595
|
-
|
|
2596
|
-
|
|
2597
|
-
|
|
2598
|
-
// Migration 055 — cache the harness_provider value used when we last
|
|
2599
|
-
// built a `cred_status` snapshot. Re-runs the post-task check only when
|
|
2600
|
-
// the resolved provider changes. Section 4 of the swarm_config-overrides-
|
|
2601
|
-
// HARNESS_PROVIDER work makes this dynamic: state.harnessProvider is
|
|
2602
|
-
// reconciled below from `swarm_config`, so an operator's change reaches
|
|
2603
|
-
// here without a worker restart.
|
|
2604
|
-
let cachedCredHarnessProvider: string | null = null;
|
|
2605
|
-
|
|
2606
|
-
// Throttle for live HARNESS_PROVIDER reconciliation. Each reconciliation
|
|
2607
|
-
// calls `fetchResolvedEnv` which also re-resolves credential pools — we
|
|
2608
|
-
// don't want that on every 2s poll. 10s gives operator changes a near-
|
|
2609
|
-
// immediate effect from a UX perspective without hammering the API.
|
|
2610
|
-
let lastHarnessReconcileAt = 0;
|
|
2611
|
-
const HARNESS_RECONCILE_INTERVAL_MS = 10_000;
|
|
2612
|
-
|
|
2613
|
-
// Create API config for ping/close
|
|
2614
|
-
const apiConfig: ApiConfig = { apiUrl, apiKey, agentId };
|
|
2615
|
-
|
|
2616
|
-
// Setup graceful shutdown handlers with API config and runner state access
|
|
2617
|
-
setupShutdownHandlers(role, apiConfig, () => state);
|
|
2618
|
-
|
|
2619
|
-
// Register agent before starting
|
|
2620
|
-
const agentName =
|
|
2621
|
-
process.env.AGENT_NAME ||
|
|
2622
|
-
cachedTemplate?.config.displayName ||
|
|
2623
|
-
`${role}-${agentId.slice(0, 8)}`;
|
|
2675
|
+
/** Push the current live state back to the API so the dashboard reflects it. */
|
|
2676
|
+
const reregisterAgent = async () => {
|
|
2624
2677
|
try {
|
|
2625
2678
|
await registerAgent({
|
|
2626
2679
|
apiUrl,
|
|
@@ -2630,1055 +2683,988 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
2630
2683
|
role,
|
|
2631
2684
|
isLead,
|
|
2632
2685
|
capabilities,
|
|
2633
|
-
maxTasks: maxConcurrent,
|
|
2634
|
-
harnessProvider:
|
|
2686
|
+
maxTasks: state.maxConcurrent,
|
|
2687
|
+
harnessProvider: state.harnessProvider,
|
|
2635
2688
|
});
|
|
2636
|
-
|
|
2637
|
-
|
|
2638
|
-
console.error(`[${role}] Failed to register: ${error}`);
|
|
2639
|
-
process.exit(1);
|
|
2640
|
-
}
|
|
2641
|
-
|
|
2642
|
-
// Block until harness credentials are present in env. This loop replaces
|
|
2643
|
-
// the old bash-level fail-fast in `docker-entrypoint.sh` — the worker is
|
|
2644
|
-
// already registered (visible to the dashboard) and self-heals once
|
|
2645
|
-
// creds appear in `swarm_config`. See plans/2026-05-06-worker-credential-safe-loop.md.
|
|
2646
|
-
//
|
|
2647
|
-
// CRED_CHECK_DISABLE=1 opts out entirely: the worker trusts the operator
|
|
2648
|
-
// and starts polling immediately, with a NULL `cred_status` row that the
|
|
2649
|
-
// dashboard surfaces as "unreported."
|
|
2650
|
-
const harnessProvider = bootProvider;
|
|
2651
|
-
cachedCredHarnessProvider = harnessProvider;
|
|
2652
|
-
if (isCredCheckDisabled(process.env)) {
|
|
2653
|
-
console.log(`[${role}] CRED_CHECK_DISABLE=1, skipping credential checks`);
|
|
2654
|
-
} else {
|
|
2655
|
-
try {
|
|
2656
|
-
await awaitCredentials({
|
|
2657
|
-
provider: harnessProvider,
|
|
2658
|
-
refreshEnv: async () => {
|
|
2659
|
-
const { env } = await fetchResolvedEnv(apiUrl, apiKey, agentId);
|
|
2660
|
-
return env;
|
|
2661
|
-
},
|
|
2662
|
-
onTick: (status) => {
|
|
2663
|
-
// Best-effort status report — the dispatcher uses it to route
|
|
2664
|
-
// around blocked agents. Failures are non-fatal (the wait loop
|
|
2665
|
-
// already swallows onTick exceptions). We do NOT include
|
|
2666
|
-
// `cred_status` here — the live test runs once the worker is
|
|
2667
|
-
// ready (below), and intermediate ticks are presence-only.
|
|
2668
|
-
fetch(`${apiUrl}/api/agents/${encodeURIComponent(agentId)}/credential-status`, {
|
|
2669
|
-
method: "PUT",
|
|
2670
|
-
headers: {
|
|
2671
|
-
Authorization: `Bearer ${apiKey}`,
|
|
2672
|
-
"X-Agent-ID": agentId,
|
|
2673
|
-
"Content-Type": "application/json",
|
|
2674
|
-
},
|
|
2675
|
-
body: JSON.stringify({ ready: status.ready, missing: status.missing }),
|
|
2676
|
-
}).catch(() => {
|
|
2677
|
-
// Swallowed — Phase 2 wait loop logs every tick anyway.
|
|
2678
|
-
});
|
|
2679
|
-
},
|
|
2680
|
-
});
|
|
2681
|
-
} catch (err) {
|
|
2682
|
-
if (err instanceof BootMaxWaitExceededError) {
|
|
2683
|
-
console.error(`[${role}] ${err.message}`);
|
|
2684
|
-
process.exit(EX_CONFIG);
|
|
2685
|
-
}
|
|
2686
|
-
throw err;
|
|
2687
|
-
}
|
|
2688
|
-
|
|
2689
|
-
// Migration 055: build the full snapshot (presence + live test) once
|
|
2690
|
-
// creds are ready and POST it to the agent row. Status endpoint reads
|
|
2691
|
-
// this instead of running predicates server-side.
|
|
2692
|
-
try {
|
|
2693
|
-
const snapshot = await buildCredStatusReport(harnessProvider, process.env, {}, "boot");
|
|
2694
|
-
await reportCredStatus(apiUrl, apiKey, agentId, snapshot);
|
|
2695
|
-
} catch (err) {
|
|
2696
|
-
// Non-fatal — worker proceeds even if reporting fails.
|
|
2697
|
-
console.warn(`[${role}] cred_status boot report failed (non-fatal): ${err}`);
|
|
2698
|
-
}
|
|
2689
|
+
} catch (err) {
|
|
2690
|
+
console.warn(`[${role}] [config] Re-register failed (non-fatal): ${err}`);
|
|
2699
2691
|
}
|
|
2692
|
+
};
|
|
2693
|
+
try {
|
|
2694
|
+
await registerAgent({
|
|
2695
|
+
apiUrl,
|
|
2696
|
+
apiKey,
|
|
2697
|
+
agentId,
|
|
2698
|
+
name: agentName,
|
|
2699
|
+
role,
|
|
2700
|
+
isLead,
|
|
2701
|
+
capabilities,
|
|
2702
|
+
maxTasks: maxConcurrent,
|
|
2703
|
+
harnessProvider: bootProvider,
|
|
2704
|
+
});
|
|
2705
|
+
console.log(`[${role}] Registered as "${agentName}" (ID: ${agentId})`);
|
|
2706
|
+
} catch (error) {
|
|
2707
|
+
console.error(`[${role}] Failed to register: ${error}`);
|
|
2708
|
+
process.exit(1);
|
|
2709
|
+
}
|
|
2700
2710
|
|
|
2701
|
-
|
|
2702
|
-
|
|
2703
|
-
|
|
2704
|
-
|
|
2705
|
-
|
|
2711
|
+
// Block until harness credentials are present in env. This loop replaces
|
|
2712
|
+
// the old bash-level fail-fast in `docker-entrypoint.sh` — the worker is
|
|
2713
|
+
// already registered (visible to the dashboard) and self-heals once
|
|
2714
|
+
// creds appear in `swarm_config`. See plans/2026-05-06-worker-credential-safe-loop.md.
|
|
2715
|
+
//
|
|
2716
|
+
// CRED_CHECK_DISABLE=1 opts out entirely: the worker trusts the operator
|
|
2717
|
+
// and starts polling immediately, with a NULL `cred_status` row that the
|
|
2718
|
+
// dashboard surfaces as "unreported."
|
|
2719
|
+
cachedCredHarnessProvider = state.harnessProvider;
|
|
2720
|
+
if (isCredCheckDisabled(process.env)) {
|
|
2721
|
+
console.log(`[${role}] CRED_CHECK_DISABLE=1, skipping credential checks`);
|
|
2722
|
+
} else {
|
|
2706
2723
|
try {
|
|
2707
|
-
|
|
2708
|
-
|
|
2709
|
-
|
|
2710
|
-
|
|
2711
|
-
|
|
2712
|
-
|
|
2713
|
-
|
|
2714
|
-
|
|
2715
|
-
|
|
2716
|
-
|
|
2717
|
-
|
|
2718
|
-
|
|
2719
|
-
|
|
2720
|
-
|
|
2721
|
-
|
|
2722
|
-
description?: string;
|
|
2723
|
-
};
|
|
2724
|
-
agentSoulMd = profile.soulMd;
|
|
2725
|
-
agentIdentityMd = profile.identityMd;
|
|
2726
|
-
agentSetupScript = profile.setupScript;
|
|
2727
|
-
agentToolsMd = profile.toolsMd;
|
|
2728
|
-
agentClaudeMd = profile.claudeMd;
|
|
2729
|
-
agentHeartbeatMd = profile.heartbeatMd;
|
|
2730
|
-
agentProfileName = profile.name;
|
|
2731
|
-
agentDescription = profile.description;
|
|
2732
|
-
|
|
2733
|
-
// Generate default templates if missing (runner registers via POST /api/agents
|
|
2734
|
-
// which doesn't generate templates like join-swarm does)
|
|
2735
|
-
if (
|
|
2736
|
-
!agentSoulMd ||
|
|
2737
|
-
!agentIdentityMd ||
|
|
2738
|
-
!agentToolsMd ||
|
|
2739
|
-
!agentClaudeMd ||
|
|
2740
|
-
!agentHeartbeatMd
|
|
2741
|
-
) {
|
|
2742
|
-
// Use already-fetched template (from pre-registration step)
|
|
2743
|
-
if (cachedTemplate) {
|
|
2744
|
-
const ctx = {
|
|
2745
|
-
agent: {
|
|
2746
|
-
name: agentProfileName || agentName,
|
|
2747
|
-
role: role,
|
|
2748
|
-
description: agentDescription || "",
|
|
2749
|
-
capabilities: (capabilities || []).join(", "),
|
|
2750
|
-
},
|
|
2751
|
-
};
|
|
2752
|
-
if (!agentSoulMd) agentSoulMd = interpolate(cachedTemplate.files.soulMd, ctx).result;
|
|
2753
|
-
if (!agentIdentityMd)
|
|
2754
|
-
agentIdentityMd = interpolate(cachedTemplate.files.identityMd, ctx).result;
|
|
2755
|
-
if (!agentToolsMd) agentToolsMd = interpolate(cachedTemplate.files.toolsMd, ctx).result;
|
|
2756
|
-
if (!agentClaudeMd)
|
|
2757
|
-
agentClaudeMd = interpolate(cachedTemplate.files.claudeMd, ctx).result;
|
|
2758
|
-
if (!agentSetupScript)
|
|
2759
|
-
agentSetupScript = interpolate(cachedTemplate.files.setupScript, ctx).result;
|
|
2760
|
-
if (!agentHeartbeatMd)
|
|
2761
|
-
agentHeartbeatMd = interpolate(cachedTemplate.files.heartbeatMd, ctx).result;
|
|
2762
|
-
console.log(`[${role}] Applied template: ${templateId}`);
|
|
2763
|
-
}
|
|
2764
|
-
|
|
2765
|
-
// Fallback to generic defaults for any still-missing fields
|
|
2766
|
-
const agentInfo = {
|
|
2767
|
-
name: agentProfileName || agentName,
|
|
2768
|
-
role: role,
|
|
2769
|
-
description: agentDescription,
|
|
2770
|
-
capabilities: config.capabilities,
|
|
2771
|
-
};
|
|
2772
|
-
if (!agentSoulMd) agentSoulMd = generateDefaultSoulMd(agentInfo);
|
|
2773
|
-
if (!agentIdentityMd) agentIdentityMd = generateDefaultIdentityMd(agentInfo);
|
|
2774
|
-
if (!agentToolsMd) agentToolsMd = generateDefaultToolsMd(agentInfo);
|
|
2775
|
-
if (!agentClaudeMd) agentClaudeMd = generateDefaultClaudeMd(agentInfo);
|
|
2776
|
-
|
|
2777
|
-
// Push generated templates to server
|
|
2778
|
-
try {
|
|
2779
|
-
const profileUpdate: Record<string, string> = {};
|
|
2780
|
-
if (!profile.soulMd) profileUpdate.soulMd = agentSoulMd;
|
|
2781
|
-
if (!profile.identityMd) profileUpdate.identityMd = agentIdentityMd;
|
|
2782
|
-
if (!profile.toolsMd) profileUpdate.toolsMd = agentToolsMd;
|
|
2783
|
-
if (!profile.claudeMd && agentClaudeMd) profileUpdate.claudeMd = agentClaudeMd;
|
|
2784
|
-
if (!profile.setupScript && agentSetupScript)
|
|
2785
|
-
profileUpdate.setupScript = agentSetupScript;
|
|
2786
|
-
if (!profile.heartbeatMd && agentHeartbeatMd)
|
|
2787
|
-
profileUpdate.heartbeatMd = agentHeartbeatMd;
|
|
2788
|
-
|
|
2789
|
-
await fetch(`${apiUrl}/api/agents/${agentId}/profile`, {
|
|
2790
|
-
method: "PUT",
|
|
2791
|
-
headers: {
|
|
2792
|
-
Authorization: `Bearer ${apiKey}`,
|
|
2793
|
-
"X-Agent-ID": agentId,
|
|
2794
|
-
"Content-Type": "application/json",
|
|
2795
|
-
},
|
|
2796
|
-
body: JSON.stringify(profileUpdate),
|
|
2797
|
-
});
|
|
2798
|
-
console.log(`[${role}] Generated and saved default identity templates`);
|
|
2799
|
-
} catch {
|
|
2800
|
-
console.warn(`[${role}] Could not save generated templates to server`);
|
|
2724
|
+
await awaitCredentials({
|
|
2725
|
+
provider: state.harnessProvider,
|
|
2726
|
+
// Re-read each tick so an operator's HARNESS_PROVIDER flip during
|
|
2727
|
+
// the wait pivots the credential predicate (and onwards).
|
|
2728
|
+
getProvider: () => state.harnessProvider,
|
|
2729
|
+
refreshEnv: async () => {
|
|
2730
|
+
const { env, resolvedProvider } = await fetchResolvedEnv(apiUrl, apiKey, agentId);
|
|
2731
|
+
// Apply drift inside the wait so adapter/prompt/state stay in
|
|
2732
|
+
// sync if the operator flips HARNESS_PROVIDER mid-loop. The
|
|
2733
|
+
// helper is idempotent when nothing changed.
|
|
2734
|
+
const { agentVisibleChanged } = await applySwarmConfigDrift(env, resolvedProvider);
|
|
2735
|
+
if (agentVisibleChanged) {
|
|
2736
|
+
// Fire-and-forget — dashboard reflects the live values, the
|
|
2737
|
+
// wait loop doesn't block on it.
|
|
2738
|
+
reregisterAgent().catch(() => {});
|
|
2801
2739
|
}
|
|
2802
|
-
|
|
2803
|
-
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
|
-
|
|
2740
|
+
return env;
|
|
2741
|
+
},
|
|
2742
|
+
onTick: (status) => {
|
|
2743
|
+
// Best-effort status report — the dispatcher uses it to route
|
|
2744
|
+
// around blocked agents. Failures are non-fatal (the wait loop
|
|
2745
|
+
// already swallows onTick exceptions). We do NOT include
|
|
2746
|
+
// `cred_status` here — the live test runs once the worker is
|
|
2747
|
+
// ready (below), and intermediate ticks are presence-only.
|
|
2748
|
+
fetch(`${apiUrl}/api/agents/${encodeURIComponent(agentId)}/credential-status`, {
|
|
2749
|
+
method: "PUT",
|
|
2807
2750
|
headers: {
|
|
2808
2751
|
Authorization: `Bearer ${apiKey}`,
|
|
2809
2752
|
"X-Agent-ID": agentId,
|
|
2753
|
+
"Content-Type": "application/json",
|
|
2810
2754
|
},
|
|
2755
|
+
body: JSON.stringify({ ready: status.ready, missing: status.missing }),
|
|
2756
|
+
}).catch(() => {
|
|
2757
|
+
// Swallowed — Phase 2 wait loop logs every tick anyway.
|
|
2811
2758
|
});
|
|
2812
|
-
|
|
2813
|
-
|
|
2814
|
-
|
|
2815
|
-
|
|
2816
|
-
|
|
2817
|
-
|
|
2818
|
-
|
|
2819
|
-
|
|
2820
|
-
|
|
2821
|
-
|
|
2822
|
-
|
|
2823
|
-
|
|
2824
|
-
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
2828
|
-
|
|
2829
|
-
|
|
2759
|
+
},
|
|
2760
|
+
});
|
|
2761
|
+
} catch (err) {
|
|
2762
|
+
if (err instanceof BootMaxWaitExceededError) {
|
|
2763
|
+
console.error(`[${role}] ${err.message}`);
|
|
2764
|
+
process.exit(EX_CONFIG);
|
|
2765
|
+
}
|
|
2766
|
+
throw err;
|
|
2767
|
+
}
|
|
2768
|
+
|
|
2769
|
+
// Migration 055: build the full snapshot (presence + live test) once
|
|
2770
|
+
// creds are ready and POST it to the agent row. Status endpoint reads
|
|
2771
|
+
// this instead of running predicates server-side. Always uses the
|
|
2772
|
+
// *current* state.harnessProvider in case it flipped during the wait.
|
|
2773
|
+
try {
|
|
2774
|
+
const snapshot = await buildCredStatusReport(state.harnessProvider, process.env, {}, "boot");
|
|
2775
|
+
await reportCredStatus(apiUrl, apiKey, agentId, snapshot);
|
|
2776
|
+
} catch (err) {
|
|
2777
|
+
// Non-fatal — worker proceeds even if reporting fails.
|
|
2778
|
+
console.warn(`[${role}] cred_status boot report failed (non-fatal): ${err}`);
|
|
2779
|
+
}
|
|
2780
|
+
}
|
|
2781
|
+
|
|
2782
|
+
// Clean up any stale active sessions from previous runs (crash recovery)
|
|
2783
|
+
await cleanupActiveSessions(apiConfig);
|
|
2784
|
+
console.log(`[${role}] Cleaned up stale active sessions`);
|
|
2785
|
+
|
|
2786
|
+
// Fetch full agent profile to get soul/identity content
|
|
2787
|
+
try {
|
|
2788
|
+
const resp = await fetch(`${apiUrl}/me`, {
|
|
2789
|
+
headers: {
|
|
2790
|
+
Authorization: `Bearer ${apiKey}`,
|
|
2791
|
+
"X-Agent-ID": agentId,
|
|
2792
|
+
},
|
|
2793
|
+
});
|
|
2794
|
+
if (resp.ok) {
|
|
2795
|
+
const profile = (await resp.json()) as {
|
|
2796
|
+
soulMd?: string;
|
|
2797
|
+
identityMd?: string;
|
|
2798
|
+
claudeMd?: string;
|
|
2799
|
+
setupScript?: string;
|
|
2800
|
+
toolsMd?: string;
|
|
2801
|
+
heartbeatMd?: string;
|
|
2802
|
+
name?: string;
|
|
2803
|
+
description?: string;
|
|
2804
|
+
};
|
|
2805
|
+
agentSoulMd = profile.soulMd;
|
|
2806
|
+
agentIdentityMd = profile.identityMd;
|
|
2807
|
+
agentSetupScript = profile.setupScript;
|
|
2808
|
+
agentToolsMd = profile.toolsMd;
|
|
2809
|
+
agentClaudeMd = profile.claudeMd;
|
|
2810
|
+
agentHeartbeatMd = profile.heartbeatMd;
|
|
2811
|
+
agentProfileName = profile.name;
|
|
2812
|
+
agentDescription = profile.description;
|
|
2813
|
+
|
|
2814
|
+
// Generate default templates if missing (runner registers via POST /api/agents
|
|
2815
|
+
// which doesn't generate templates like join-swarm does)
|
|
2816
|
+
if (
|
|
2817
|
+
!agentSoulMd ||
|
|
2818
|
+
!agentIdentityMd ||
|
|
2819
|
+
!agentToolsMd ||
|
|
2820
|
+
!agentClaudeMd ||
|
|
2821
|
+
!agentHeartbeatMd
|
|
2822
|
+
) {
|
|
2823
|
+
// Use already-fetched template (from pre-registration step)
|
|
2824
|
+
if (cachedTemplate) {
|
|
2825
|
+
const ctx = {
|
|
2826
|
+
agent: {
|
|
2827
|
+
name: agentProfileName || agentName,
|
|
2828
|
+
role: role,
|
|
2829
|
+
description: agentDescription || "",
|
|
2830
|
+
capabilities: (capabilities || []).join(", "),
|
|
2831
|
+
},
|
|
2832
|
+
};
|
|
2833
|
+
if (!agentSoulMd) agentSoulMd = interpolate(cachedTemplate.files.soulMd, ctx).result;
|
|
2834
|
+
if (!agentIdentityMd)
|
|
2835
|
+
agentIdentityMd = interpolate(cachedTemplate.files.identityMd, ctx).result;
|
|
2836
|
+
if (!agentToolsMd) agentToolsMd = interpolate(cachedTemplate.files.toolsMd, ctx).result;
|
|
2837
|
+
if (!agentClaudeMd)
|
|
2838
|
+
agentClaudeMd = interpolate(cachedTemplate.files.claudeMd, ctx).result;
|
|
2839
|
+
if (!agentSetupScript)
|
|
2840
|
+
agentSetupScript = interpolate(cachedTemplate.files.setupScript, ctx).result;
|
|
2841
|
+
if (!agentHeartbeatMd)
|
|
2842
|
+
agentHeartbeatMd = interpolate(cachedTemplate.files.heartbeatMd, ctx).result;
|
|
2843
|
+
console.log(`[${role}] Applied template: ${templateId}`);
|
|
2830
2844
|
}
|
|
2831
2845
|
|
|
2832
|
-
//
|
|
2846
|
+
// Fallback to generic defaults for any still-missing fields
|
|
2847
|
+
const agentInfo = {
|
|
2848
|
+
name: agentProfileName || agentName,
|
|
2849
|
+
role: role,
|
|
2850
|
+
description: agentDescription,
|
|
2851
|
+
capabilities: config.capabilities,
|
|
2852
|
+
};
|
|
2853
|
+
if (!agentSoulMd) agentSoulMd = generateDefaultSoulMd(agentInfo);
|
|
2854
|
+
if (!agentIdentityMd) agentIdentityMd = generateDefaultIdentityMd(agentInfo);
|
|
2855
|
+
if (!agentToolsMd) agentToolsMd = generateDefaultToolsMd(agentInfo);
|
|
2856
|
+
if (!agentClaudeMd) agentClaudeMd = generateDefaultClaudeMd(agentInfo);
|
|
2857
|
+
|
|
2858
|
+
// Push generated templates to server
|
|
2833
2859
|
try {
|
|
2834
|
-
const
|
|
2860
|
+
const profileUpdate: Record<string, string> = {};
|
|
2861
|
+
if (!profile.soulMd) profileUpdate.soulMd = agentSoulMd;
|
|
2862
|
+
if (!profile.identityMd) profileUpdate.identityMd = agentIdentityMd;
|
|
2863
|
+
if (!profile.toolsMd) profileUpdate.toolsMd = agentToolsMd;
|
|
2864
|
+
if (!profile.claudeMd && agentClaudeMd) profileUpdate.claudeMd = agentClaudeMd;
|
|
2865
|
+
if (!profile.setupScript && agentSetupScript)
|
|
2866
|
+
profileUpdate.setupScript = agentSetupScript;
|
|
2867
|
+
if (!profile.heartbeatMd && agentHeartbeatMd)
|
|
2868
|
+
profileUpdate.heartbeatMd = agentHeartbeatMd;
|
|
2869
|
+
|
|
2870
|
+
await fetch(`${apiUrl}/api/agents/${agentId}/profile`, {
|
|
2871
|
+
method: "PUT",
|
|
2835
2872
|
headers: {
|
|
2836
2873
|
Authorization: `Bearer ${apiKey}`,
|
|
2837
2874
|
"X-Agent-ID": agentId,
|
|
2875
|
+
"Content-Type": "application/json",
|
|
2838
2876
|
},
|
|
2877
|
+
body: JSON.stringify(profileUpdate),
|
|
2839
2878
|
});
|
|
2840
|
-
|
|
2841
|
-
const mcpServersData = (await mcpServersResp.json()) as {
|
|
2842
|
-
servers: {
|
|
2843
|
-
name: string;
|
|
2844
|
-
transport: string;
|
|
2845
|
-
description: string | null;
|
|
2846
|
-
isActive: boolean;
|
|
2847
|
-
isEnabled: boolean;
|
|
2848
|
-
}[];
|
|
2849
|
-
};
|
|
2850
|
-
const activeMcpServers = mcpServersData.servers.filter(
|
|
2851
|
-
(s) => s.isActive && s.isEnabled,
|
|
2852
|
-
);
|
|
2853
|
-
if (activeMcpServers.length > 0) {
|
|
2854
|
-
agentMcpServersSummary = activeMcpServers
|
|
2855
|
-
.map(
|
|
2856
|
-
(s) => `- **${s.name}** (${s.transport}): ${s.description || "No description"}`,
|
|
2857
|
-
)
|
|
2858
|
-
.join("\n");
|
|
2859
|
-
console.log(
|
|
2860
|
-
`[${role}] Loaded ${activeMcpServers.length} MCP servers for system prompt`,
|
|
2861
|
-
);
|
|
2862
|
-
}
|
|
2863
|
-
}
|
|
2879
|
+
console.log(`[${role}] Generated and saved default identity templates`);
|
|
2864
2880
|
} catch {
|
|
2865
|
-
|
|
2881
|
+
console.warn(`[${role}] Could not save generated templates to server`);
|
|
2866
2882
|
}
|
|
2867
|
-
|
|
2868
|
-
// Rebuild system prompt with identity
|
|
2869
|
-
basePrompt = await buildSystemPrompt();
|
|
2870
|
-
resolvedSystemPrompt = additionalSystemPrompt
|
|
2871
|
-
? `${basePrompt}\n\n${additionalSystemPrompt}`
|
|
2872
|
-
: basePrompt;
|
|
2873
|
-
console.log(
|
|
2874
|
-
`[${role}] Loaded agent identity (soul: ${agentSoulMd ? "yes" : "no"}, identity: ${agentIdentityMd ? "yes" : "no"}, tools: ${agentToolsMd ? "yes" : "no"}, claude: ${agentClaudeMd ? "yes" : "no"})`,
|
|
2875
|
-
);
|
|
2876
|
-
console.log(`[${role}] Updated system prompt length: ${resolvedSystemPrompt.length} chars`);
|
|
2877
2883
|
}
|
|
2878
|
-
} catch {
|
|
2879
|
-
console.warn(`[${role}] Could not fetch agent profile for identity — proceeding without`);
|
|
2880
|
-
}
|
|
2881
2884
|
|
|
2882
|
-
|
|
2883
|
-
const SOUL_MD_PATH = "/workspace/SOUL.md";
|
|
2884
|
-
const IDENTITY_MD_PATH = "/workspace/IDENTITY.md";
|
|
2885
|
-
|
|
2886
|
-
if (agentSoulMd) {
|
|
2887
|
-
try {
|
|
2888
|
-
await Bun.write(SOUL_MD_PATH, agentSoulMd);
|
|
2889
|
-
console.log(`[${role}] Wrote SOUL.md to workspace`);
|
|
2890
|
-
} catch (err) {
|
|
2891
|
-
console.warn(`[${role}] Could not write SOUL.md: ${(err as Error).message}`);
|
|
2892
|
-
}
|
|
2893
|
-
}
|
|
2894
|
-
if (agentIdentityMd) {
|
|
2885
|
+
// Fetch installed skills for system prompt
|
|
2895
2886
|
try {
|
|
2896
|
-
await
|
|
2897
|
-
|
|
2898
|
-
|
|
2899
|
-
|
|
2887
|
+
const skillsResp = await fetch(`${apiUrl}/api/agents/${agentId}/skills`, {
|
|
2888
|
+
headers: {
|
|
2889
|
+
Authorization: `Bearer ${apiKey}`,
|
|
2890
|
+
"X-Agent-ID": agentId,
|
|
2891
|
+
},
|
|
2892
|
+
});
|
|
2893
|
+
if (skillsResp.ok) {
|
|
2894
|
+
const skillsData = (await skillsResp.json()) as {
|
|
2895
|
+
skills: {
|
|
2896
|
+
name: string;
|
|
2897
|
+
description: string;
|
|
2898
|
+
isActive: boolean;
|
|
2899
|
+
isEnabled: boolean;
|
|
2900
|
+
}[];
|
|
2901
|
+
};
|
|
2902
|
+
agentSkillsSummary = skillsData.skills
|
|
2903
|
+
.filter((s) => s.isActive && s.isEnabled)
|
|
2904
|
+
.map((s) => ({ name: s.name, description: s.description }));
|
|
2905
|
+
if (agentSkillsSummary.length > 0) {
|
|
2906
|
+
console.log(`[${role}] Loaded ${agentSkillsSummary.length} skills for system prompt`);
|
|
2907
|
+
}
|
|
2908
|
+
}
|
|
2909
|
+
} catch {
|
|
2910
|
+
// Non-fatal — skills are optional
|
|
2900
2911
|
}
|
|
2901
|
-
}
|
|
2902
2912
|
|
|
2903
|
-
|
|
2904
|
-
// Only create if it doesn't exist — the entrypoint already composed/prepended it at container start
|
|
2905
|
-
if (agentSetupScript) {
|
|
2913
|
+
// Fetch installed MCP servers for system prompt
|
|
2906
2914
|
try {
|
|
2907
|
-
|
|
2908
|
-
|
|
2909
|
-
|
|
2915
|
+
const mcpServersResp = await fetch(`${apiUrl}/api/agents/${agentId}/mcp-servers`, {
|
|
2916
|
+
headers: {
|
|
2917
|
+
Authorization: `Bearer ${apiKey}`,
|
|
2918
|
+
"X-Agent-ID": agentId,
|
|
2919
|
+
},
|
|
2920
|
+
});
|
|
2921
|
+
if (mcpServersResp.ok) {
|
|
2922
|
+
const mcpServersData = (await mcpServersResp.json()) as {
|
|
2923
|
+
servers: {
|
|
2924
|
+
name: string;
|
|
2925
|
+
transport: string;
|
|
2926
|
+
description: string | null;
|
|
2927
|
+
isActive: boolean;
|
|
2928
|
+
isEnabled: boolean;
|
|
2929
|
+
}[];
|
|
2930
|
+
};
|
|
2931
|
+
const activeMcpServers = mcpServersData.servers.filter((s) => s.isActive && s.isEnabled);
|
|
2932
|
+
if (activeMcpServers.length > 0) {
|
|
2933
|
+
agentMcpServersSummary = activeMcpServers
|
|
2934
|
+
.map((s) => `- **${s.name}** (${s.transport}): ${s.description || "No description"}`)
|
|
2935
|
+
.join("\n");
|
|
2936
|
+
console.log(
|
|
2937
|
+
`[${role}] Loaded ${activeMcpServers.length} MCP servers for system prompt`,
|
|
2938
|
+
);
|
|
2939
|
+
}
|
|
2910
2940
|
}
|
|
2911
|
-
} catch
|
|
2912
|
-
|
|
2941
|
+
} catch {
|
|
2942
|
+
// Non-fatal — MCP servers are optional
|
|
2913
2943
|
}
|
|
2914
|
-
}
|
|
2915
2944
|
|
|
2916
|
-
|
|
2917
|
-
|
|
2918
|
-
|
|
2919
|
-
|
|
2920
|
-
|
|
2921
|
-
|
|
2922
|
-
|
|
2923
|
-
|
|
2945
|
+
// Rebuild system prompt with identity
|
|
2946
|
+
basePrompt = await buildSystemPrompt();
|
|
2947
|
+
resolvedSystemPrompt = additionalSystemPrompt
|
|
2948
|
+
? `${basePrompt}\n\n${additionalSystemPrompt}`
|
|
2949
|
+
: basePrompt;
|
|
2950
|
+
console.log(
|
|
2951
|
+
`[${role}] Loaded agent identity (soul: ${agentSoulMd ? "yes" : "no"}, identity: ${agentIdentityMd ? "yes" : "no"}, tools: ${agentToolsMd ? "yes" : "no"}, claude: ${agentClaudeMd ? "yes" : "no"})`,
|
|
2952
|
+
);
|
|
2953
|
+
console.log(`[${role}] Updated system prompt length: ${resolvedSystemPrompt.length} chars`);
|
|
2924
2954
|
}
|
|
2955
|
+
} catch {
|
|
2956
|
+
console.warn(`[${role}] Could not fetch agent profile for identity — proceeding without`);
|
|
2957
|
+
}
|
|
2925
2958
|
|
|
2926
|
-
|
|
2927
|
-
|
|
2928
|
-
|
|
2929
|
-
await Bun.write("/workspace/HEARTBEAT.md", agentHeartbeatMd);
|
|
2930
|
-
console.log(`[${role}] Wrote HEARTBEAT.md to workspace`);
|
|
2931
|
-
} catch (err) {
|
|
2932
|
-
console.warn(`[${role}] Could not write HEARTBEAT.md: ${(err as Error).message}`);
|
|
2933
|
-
}
|
|
2934
|
-
}
|
|
2959
|
+
// Write SOUL.md and IDENTITY.md to workspace before spawning Claude
|
|
2960
|
+
const SOUL_MD_PATH = "/workspace/SOUL.md";
|
|
2961
|
+
const IDENTITY_MD_PATH = "/workspace/IDENTITY.md";
|
|
2935
2962
|
|
|
2936
|
-
|
|
2937
|
-
|
|
2938
|
-
|
|
2939
|
-
|
|
2940
|
-
|
|
2941
|
-
}
|
|
2942
|
-
|
|
2943
|
-
|
|
2963
|
+
if (agentSoulMd) {
|
|
2964
|
+
try {
|
|
2965
|
+
await Bun.write(SOUL_MD_PATH, agentSoulMd);
|
|
2966
|
+
console.log(`[${role}] Wrote SOUL.md to workspace`);
|
|
2967
|
+
} catch (err) {
|
|
2968
|
+
console.warn(`[${role}] Could not write SOUL.md: ${(err as Error).message}`);
|
|
2969
|
+
}
|
|
2970
|
+
}
|
|
2971
|
+
if (agentIdentityMd) {
|
|
2972
|
+
try {
|
|
2973
|
+
await Bun.write(IDENTITY_MD_PATH, agentIdentityMd);
|
|
2974
|
+
console.log(`[${role}] Wrote IDENTITY.md to workspace`);
|
|
2975
|
+
} catch (err) {
|
|
2976
|
+
console.warn(`[${role}] Could not write IDENTITY.md: ${(err as Error).message}`);
|
|
2944
2977
|
}
|
|
2978
|
+
}
|
|
2945
2979
|
|
|
2946
|
-
|
|
2980
|
+
// Write setup script to workspace (agent can edit during session)
|
|
2981
|
+
// Only create if it doesn't exist — the entrypoint already composed/prepended it at container start
|
|
2982
|
+
if (agentSetupScript) {
|
|
2947
2983
|
try {
|
|
2948
|
-
|
|
2949
|
-
|
|
2950
|
-
|
|
2951
|
-
"X-Agent-ID": agentId,
|
|
2952
|
-
};
|
|
2953
|
-
if (apiKey) syncHeaders.Authorization = `Bearer ${apiKey}`;
|
|
2954
|
-
const syncRes = await fetch(`${swarmUrl}/api/skills/sync-filesystem`, {
|
|
2955
|
-
method: "POST",
|
|
2956
|
-
headers: syncHeaders,
|
|
2957
|
-
});
|
|
2958
|
-
if (syncRes.ok) {
|
|
2959
|
-
const syncResult = (await syncRes.json()) as {
|
|
2960
|
-
synced: number;
|
|
2961
|
-
removed: number;
|
|
2962
|
-
errors: string[];
|
|
2963
|
-
};
|
|
2964
|
-
console.log(
|
|
2965
|
-
`[${role}] Skills synced: ${syncResult.synced} written, ${syncResult.removed} removed`,
|
|
2966
|
-
);
|
|
2967
|
-
if (syncResult.errors.length > 0) {
|
|
2968
|
-
console.warn(`[${role}] Skill sync errors: ${syncResult.errors.join(", ")}`);
|
|
2969
|
-
}
|
|
2970
|
-
} else {
|
|
2971
|
-
console.warn(`[${role}] Skill sync failed: HTTP ${syncRes.status}`);
|
|
2984
|
+
if (!(await Bun.file("/workspace/start-up.sh").exists())) {
|
|
2985
|
+
await Bun.write("/workspace/start-up.sh", `#!/bin/bash\n${agentSetupScript}\n`);
|
|
2986
|
+
console.log(`[${role}] Wrote start-up.sh to workspace`);
|
|
2972
2987
|
}
|
|
2973
2988
|
} catch (err) {
|
|
2974
|
-
console.warn(`[${role}]
|
|
2989
|
+
console.warn(`[${role}] Could not write start-up.sh: ${(err as Error).message}`);
|
|
2975
2990
|
}
|
|
2991
|
+
}
|
|
2976
2992
|
|
|
2977
|
-
|
|
2978
|
-
|
|
2993
|
+
// Write TOOLS.md to workspace (agent can edit during session)
|
|
2994
|
+
if (agentToolsMd) {
|
|
2979
2995
|
try {
|
|
2980
|
-
|
|
2981
|
-
|
|
2982
|
-
|
|
2983
|
-
|
|
2984
|
-
|
|
2996
|
+
await Bun.write("/workspace/TOOLS.md", agentToolsMd);
|
|
2997
|
+
console.log(`[${role}] Wrote TOOLS.md to workspace`);
|
|
2998
|
+
} catch (err) {
|
|
2999
|
+
console.warn(`[${role}] Could not write TOOLS.md: ${(err as Error).message}`);
|
|
3000
|
+
}
|
|
3001
|
+
}
|
|
2985
3002
|
|
|
2986
|
-
|
|
2987
|
-
|
|
2988
|
-
|
|
2989
|
-
|
|
2990
|
-
|
|
2991
|
-
|
|
2992
|
-
|
|
2993
|
-
|
|
3003
|
+
// Write HEARTBEAT.md to workspace (lead's periodic checklist)
|
|
3004
|
+
if (agentHeartbeatMd) {
|
|
3005
|
+
try {
|
|
3006
|
+
await Bun.write("/workspace/HEARTBEAT.md", agentHeartbeatMd);
|
|
3007
|
+
console.log(`[${role}] Wrote HEARTBEAT.md to workspace`);
|
|
3008
|
+
} catch (err) {
|
|
3009
|
+
console.warn(`[${role}] Could not write HEARTBEAT.md: ${(err as Error).message}`);
|
|
3010
|
+
}
|
|
3011
|
+
}
|
|
2994
3012
|
|
|
2995
|
-
|
|
2996
|
-
|
|
2997
|
-
|
|
2998
|
-
|
|
2999
|
-
|
|
3013
|
+
// Write CLAUDE.md to workspace (agent-level instructions)
|
|
3014
|
+
if (agentClaudeMd) {
|
|
3015
|
+
try {
|
|
3016
|
+
await Bun.write("/workspace/CLAUDE.md", agentClaudeMd);
|
|
3017
|
+
console.log(`[${role}] Wrote CLAUDE.md to workspace`);
|
|
3018
|
+
} catch (err) {
|
|
3019
|
+
console.warn(`[${role}] Could not write CLAUDE.md: ${(err as Error).message}`);
|
|
3020
|
+
}
|
|
3021
|
+
}
|
|
3000
3022
|
|
|
3001
|
-
|
|
3002
|
-
|
|
3003
|
-
|
|
3023
|
+
// ========== Sync skills to filesystem ==========
|
|
3024
|
+
try {
|
|
3025
|
+
console.log(`[${role}] Syncing skills to filesystem...`);
|
|
3026
|
+
const syncHeaders: Record<string, string> = {
|
|
3027
|
+
"Content-Type": "application/json",
|
|
3028
|
+
"X-Agent-ID": agentId,
|
|
3029
|
+
};
|
|
3030
|
+
if (apiKey) syncHeaders.Authorization = `Bearer ${apiKey}`;
|
|
3031
|
+
const syncRes = await fetch(`${swarmUrl}/api/skills/sync-filesystem`, {
|
|
3032
|
+
method: "POST",
|
|
3033
|
+
headers: syncHeaders,
|
|
3034
|
+
});
|
|
3035
|
+
if (syncRes.ok) {
|
|
3036
|
+
const syncResult = (await syncRes.json()) as {
|
|
3037
|
+
synced: number;
|
|
3038
|
+
removed: number;
|
|
3039
|
+
errors: string[];
|
|
3040
|
+
};
|
|
3041
|
+
console.log(
|
|
3042
|
+
`[${role}] Skills synced: ${syncResult.synced} written, ${syncResult.removed} removed`,
|
|
3043
|
+
);
|
|
3044
|
+
if (syncResult.errors.length > 0) {
|
|
3045
|
+
console.warn(`[${role}] Skill sync errors: ${syncResult.errors.join(", ")}`);
|
|
3046
|
+
}
|
|
3047
|
+
} else {
|
|
3048
|
+
console.warn(`[${role}] Skill sync failed: HTTP ${syncRes.status}`);
|
|
3049
|
+
}
|
|
3050
|
+
} catch (err) {
|
|
3051
|
+
console.warn(`[${role}] Skill sync failed: ${(err as Error).message}`);
|
|
3052
|
+
}
|
|
3004
3053
|
|
|
3005
|
-
|
|
3006
|
-
|
|
3007
|
-
|
|
3008
|
-
|
|
3009
|
-
|
|
3010
|
-
);
|
|
3011
|
-
continue;
|
|
3012
|
-
}
|
|
3054
|
+
// ========== Resume paused tasks with PRIORITY ==========
|
|
3055
|
+
// Check for paused tasks from previous shutdown and resume them before normal polling
|
|
3056
|
+
try {
|
|
3057
|
+
console.log(`[${role}] Checking for paused tasks to resume...`);
|
|
3058
|
+
const pausedTasks = await getPausedTasksFromAPI(apiConfig);
|
|
3013
3059
|
|
|
3014
|
-
|
|
3015
|
-
|
|
3016
|
-
hasMcp: adapter.traits.hasMcp,
|
|
3017
|
-
});
|
|
3060
|
+
if (pausedTasks.length > 0) {
|
|
3061
|
+
console.log(`[${role}] Found ${pausedTasks.length} paused task(s) to resume`);
|
|
3018
3062
|
|
|
3019
|
-
|
|
3020
|
-
|
|
3021
|
-
|
|
3022
|
-
|
|
3023
|
-
|
|
3024
|
-
task.task,
|
|
3025
|
-
task.id,
|
|
3063
|
+
for (const task of pausedTasks) {
|
|
3064
|
+
// Defensive: skip tasks that already have completion data (zombie prevention)
|
|
3065
|
+
if (task.finishedAt || task.output) {
|
|
3066
|
+
console.warn(
|
|
3067
|
+
`[${role}] Skipping zombie task ${task.id.slice(0, 8)} — already has completion data (finishedAt: ${!!task.finishedAt}, output: ${!!task.output})`,
|
|
3026
3068
|
);
|
|
3027
|
-
|
|
3028
|
-
|
|
3029
|
-
console.log(`[${role}] Injected relevant memories into resumed task prompt`);
|
|
3030
|
-
}
|
|
3069
|
+
continue;
|
|
3070
|
+
}
|
|
3031
3071
|
|
|
3032
|
-
|
|
3033
|
-
|
|
3034
|
-
|
|
3035
|
-
|
|
3036
|
-
|
|
3037
|
-
`[${role}] Resuming task's own session ${task.claudeSessionId.slice(0, 8)}`,
|
|
3038
|
-
);
|
|
3039
|
-
} else if (task.parentTaskId) {
|
|
3040
|
-
const parentSessionId = await fetchProviderSessionId(apiUrl, apiKey, task.parentTaskId);
|
|
3041
|
-
if (parentSessionId) {
|
|
3042
|
-
resumeAdditionalArgs = [...resumeAdditionalArgs, "--resume", parentSessionId];
|
|
3043
|
-
console.log(`[${role}] Resuming parent session ${parentSessionId.slice(0, 8)}`);
|
|
3044
|
-
}
|
|
3045
|
-
}
|
|
3072
|
+
// Wait if at capacity (though unlikely on fresh startup)
|
|
3073
|
+
while (state.activeTasks.size >= state.maxConcurrent) {
|
|
3074
|
+
await checkCompletedProcesses(state, role, apiConfig);
|
|
3075
|
+
await Bun.sleep(1000);
|
|
3076
|
+
}
|
|
3046
3077
|
|
|
3047
|
-
|
|
3048
|
-
|
|
3049
|
-
|
|
3050
|
-
const logFile = `${logDir}/${timestamp}-resume-${task.id.slice(0, 8)}.jsonl`;
|
|
3051
|
-
|
|
3052
|
-
console.log(`\n[${role}] === Resuming paused task (iteration ${iteration}) ===`);
|
|
3053
|
-
console.log(`[${role}] Logging to: ${logFile}`);
|
|
3054
|
-
console.log(`[${role}] Prompt: ${resumePrompt.slice(0, 100)}...`);
|
|
3055
|
-
|
|
3056
|
-
const metadata = {
|
|
3057
|
-
type: metadataType,
|
|
3058
|
-
sessionId,
|
|
3059
|
-
iteration,
|
|
3060
|
-
timestamp: new Date().toISOString(),
|
|
3061
|
-
prompt: resumePrompt,
|
|
3062
|
-
trigger: "task_resumed",
|
|
3063
|
-
resumedTaskId: task.id,
|
|
3064
|
-
yolo: isYolo,
|
|
3065
|
-
};
|
|
3066
|
-
await Bun.write(logFile, `${JSON.stringify(metadata)}\n`);
|
|
3067
|
-
|
|
3068
|
-
// Resolve cwd for resumed task (mirrors normal task path: task.dir > vcsRepo clonePath)
|
|
3069
|
-
let resumeCwd: string | undefined;
|
|
3070
|
-
if (task.dir) {
|
|
3071
|
-
try {
|
|
3072
|
-
if (existsSync(task.dir) && statSync(task.dir).isDirectory()) {
|
|
3073
|
-
resumeCwd = task.dir;
|
|
3074
|
-
} else {
|
|
3075
|
-
console.warn(
|
|
3076
|
-
`[${role}] Resume task dir "${task.dir}" does not exist or is not a directory, falling back to default cwd`,
|
|
3077
|
-
);
|
|
3078
|
-
}
|
|
3079
|
-
} catch {
|
|
3080
|
-
console.warn(
|
|
3081
|
-
`[${role}] Failed to check resume task dir "${task.dir}", falling back to default cwd`,
|
|
3082
|
-
);
|
|
3083
|
-
}
|
|
3084
|
-
}
|
|
3078
|
+
console.log(
|
|
3079
|
+
`[${role}] Resuming paused task ${task.id.slice(0, 8)}: "${task.task.slice(0, 50)}..."`,
|
|
3080
|
+
);
|
|
3085
3081
|
|
|
3086
|
-
|
|
3087
|
-
|
|
3088
|
-
|
|
3089
|
-
|
|
3090
|
-
|
|
3091
|
-
|
|
3092
|
-
defaultBranch: "main",
|
|
3093
|
-
};
|
|
3094
|
-
const repoContext = await ensureRepoForTask(effectiveConfig, role);
|
|
3095
|
-
if (repoContext?.clonePath) {
|
|
3096
|
-
resumeCwd = repoContext.clonePath;
|
|
3097
|
-
}
|
|
3098
|
-
}
|
|
3082
|
+
// Resume the task via API (marks as in_progress)
|
|
3083
|
+
const resumed = await resumeTaskViaAPI(apiConfig, task.id);
|
|
3084
|
+
if (!resumed) {
|
|
3085
|
+
console.warn(`[${role}] Failed to resume task ${task.id.slice(0, 8)} via API, skipping`);
|
|
3086
|
+
continue;
|
|
3087
|
+
}
|
|
3099
3088
|
|
|
3100
|
-
|
|
3101
|
-
|
|
3089
|
+
// Build prompt with resume context + memory injection
|
|
3090
|
+
let resumePrompt = await buildResumePrompt(task, adapter.formatCommand.bind(adapter), {
|
|
3091
|
+
hasMcp: adapter.traits.hasMcp,
|
|
3092
|
+
});
|
|
3102
3093
|
|
|
3103
|
-
|
|
3104
|
-
|
|
3105
|
-
|
|
3106
|
-
|
|
3107
|
-
|
|
3108
|
-
|
|
3109
|
-
|
|
3110
|
-
|
|
3111
|
-
|
|
3112
|
-
|
|
3113
|
-
|
|
3114
|
-
|
|
3115
|
-
agentId,
|
|
3116
|
-
runnerSessionId: resumeRunnerSessionId,
|
|
3117
|
-
iteration,
|
|
3118
|
-
taskId: task.id,
|
|
3119
|
-
model: (task as { model?: string }).model,
|
|
3120
|
-
cwd: resumeCwd,
|
|
3121
|
-
vcsRepo: task.vcsRepo,
|
|
3122
|
-
},
|
|
3123
|
-
logDir,
|
|
3124
|
-
isYolo,
|
|
3125
|
-
);
|
|
3126
|
-
} catch (spawnErr) {
|
|
3127
|
-
const errMsg = spawnErr instanceof Error ? spawnErr.message : String(spawnErr);
|
|
3128
|
-
console.error(
|
|
3129
|
-
`[${role}] Failed to spawn process for resumed task ${task.id.slice(0, 8)}: ${errMsg}`,
|
|
3130
|
-
);
|
|
3131
|
-
await ensureTaskFinished(
|
|
3132
|
-
apiConfig,
|
|
3133
|
-
role,
|
|
3134
|
-
task.id,
|
|
3135
|
-
1,
|
|
3136
|
-
`Spawn failed: ${errMsg}`,
|
|
3137
|
-
undefined,
|
|
3138
|
-
state.harnessProvider,
|
|
3139
|
-
);
|
|
3140
|
-
continue;
|
|
3141
|
-
}
|
|
3094
|
+
// Inject relevant memories for resumed tasks
|
|
3095
|
+
const resumeMemoryContext = await fetchRelevantMemories(
|
|
3096
|
+
apiUrl,
|
|
3097
|
+
apiKey,
|
|
3098
|
+
agentId,
|
|
3099
|
+
task.task,
|
|
3100
|
+
task.id,
|
|
3101
|
+
);
|
|
3102
|
+
if (resumeMemoryContext) {
|
|
3103
|
+
resumePrompt += resumeMemoryContext;
|
|
3104
|
+
console.log(`[${role}] Injected relevant memories into resumed task prompt`);
|
|
3105
|
+
}
|
|
3142
3106
|
|
|
3143
|
-
|
|
3144
|
-
|
|
3145
|
-
|
|
3146
|
-
|
|
3147
|
-
|
|
3148
|
-
|
|
3149
|
-
|
|
3150
|
-
|
|
3151
|
-
|
|
3152
|
-
|
|
3107
|
+
// Resolve --resume: prefer own session ID, then parent's
|
|
3108
|
+
let resumeAdditionalArgs = opts.additionalArgs || [];
|
|
3109
|
+
if (task.claudeSessionId) {
|
|
3110
|
+
resumeAdditionalArgs = [...resumeAdditionalArgs, "--resume", task.claudeSessionId];
|
|
3111
|
+
console.log(`[${role}] Resuming task's own session ${task.claudeSessionId.slice(0, 8)}`);
|
|
3112
|
+
} else if (task.parentTaskId) {
|
|
3113
|
+
const parentSessionId = await fetchProviderSessionId(apiUrl, apiKey, task.parentTaskId);
|
|
3114
|
+
if (parentSessionId) {
|
|
3115
|
+
resumeAdditionalArgs = [...resumeAdditionalArgs, "--resume", parentSessionId];
|
|
3116
|
+
console.log(`[${role}] Resuming parent session ${parentSessionId.slice(0, 8)}`);
|
|
3117
|
+
}
|
|
3153
3118
|
}
|
|
3154
3119
|
|
|
3155
|
-
|
|
3156
|
-
|
|
3157
|
-
|
|
3158
|
-
|
|
3159
|
-
} catch (error) {
|
|
3160
|
-
console.error(`[${role}] Error checking/resuming paused tasks: ${error}`);
|
|
3161
|
-
// Continue to normal polling even if resume fails
|
|
3162
|
-
}
|
|
3163
|
-
// ========== END: Resume paused tasks ==========
|
|
3120
|
+
// Spawn Claude process for resumed task
|
|
3121
|
+
iteration++;
|
|
3122
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
|
3123
|
+
const logFile = `${logDir}/${timestamp}-resume-${task.id.slice(0, 8)}.jsonl`;
|
|
3164
3124
|
|
|
3165
|
-
|
|
3166
|
-
|
|
3167
|
-
|
|
3168
|
-
const swept = await triggerHeartbeatSweep(apiConfig);
|
|
3169
|
-
if (swept) {
|
|
3170
|
-
console.log(`[${role}] Startup heartbeat sweep completed`);
|
|
3171
|
-
} else {
|
|
3172
|
-
console.warn(`[${role}] Startup heartbeat sweep failed (non-fatal)`);
|
|
3173
|
-
}
|
|
3174
|
-
}
|
|
3125
|
+
console.log(`\n[${role}] === Resuming paused task (iteration ${iteration}) ===`);
|
|
3126
|
+
console.log(`[${role}] Logging to: ${logFile}`);
|
|
3127
|
+
console.log(`[${role}] Prompt: ${resumePrompt.slice(0, 100)}...`);
|
|
3175
3128
|
|
|
3176
|
-
|
|
3177
|
-
|
|
3178
|
-
|
|
3179
|
-
|
|
3180
|
-
|
|
3181
|
-
|
|
3182
|
-
|
|
3183
|
-
|
|
3184
|
-
|
|
3185
|
-
|
|
3186
|
-
|
|
3187
|
-
|
|
3188
|
-
|
|
3189
|
-
|
|
3190
|
-
|
|
3191
|
-
|
|
3192
|
-
|
|
3193
|
-
|
|
3194
|
-
|
|
3195
|
-
// and continue on the old adapter unaffected. New spawns (below) read
|
|
3196
|
-
// the current `adapter` binding and pick up the swap. `basePrompt` is
|
|
3197
|
-
// rebuilt because traits (and therefore prompt content) may differ across
|
|
3198
|
-
// providers.
|
|
3199
|
-
if (Date.now() - lastHarnessReconcileAt > HARNESS_RECONCILE_INTERVAL_MS) {
|
|
3200
|
-
lastHarnessReconcileAt = Date.now();
|
|
3201
|
-
try {
|
|
3202
|
-
const { resolvedProvider } = await fetchResolvedEnv(apiUrl, apiKey, agentId);
|
|
3203
|
-
if (resolvedProvider !== state.harnessProvider) {
|
|
3204
|
-
const previous = state.harnessProvider;
|
|
3205
|
-
console.log(
|
|
3206
|
-
`[${role}] [harness] Reconciling adapter: ${previous} → ${resolvedProvider}`,
|
|
3207
|
-
);
|
|
3208
|
-
try {
|
|
3209
|
-
adapter = createProviderAdapter(resolvedProvider);
|
|
3210
|
-
state.harnessProvider = resolvedProvider;
|
|
3211
|
-
basePrompt = await buildSystemPrompt();
|
|
3212
|
-
resolvedSystemPrompt = additionalSystemPrompt
|
|
3213
|
-
? `${basePrompt}\n\n${additionalSystemPrompt}`
|
|
3214
|
-
: basePrompt;
|
|
3215
|
-
// Force a fresh cred_status report below for the new provider.
|
|
3216
|
-
cachedCredHarnessProvider = null;
|
|
3217
|
-
console.log(
|
|
3218
|
-
`[${role}] [harness] Swapped to ${resolvedProvider} (basePrompt rebuilt: ${basePrompt.length} chars)`,
|
|
3219
|
-
);
|
|
3220
|
-
} catch (err) {
|
|
3129
|
+
const metadata = {
|
|
3130
|
+
type: metadataType,
|
|
3131
|
+
sessionId,
|
|
3132
|
+
iteration,
|
|
3133
|
+
timestamp: new Date().toISOString(),
|
|
3134
|
+
prompt: resumePrompt,
|
|
3135
|
+
trigger: "task_resumed",
|
|
3136
|
+
resumedTaskId: task.id,
|
|
3137
|
+
yolo: isYolo,
|
|
3138
|
+
};
|
|
3139
|
+
await Bun.write(logFile, `${JSON.stringify(metadata)}\n`);
|
|
3140
|
+
|
|
3141
|
+
// Resolve cwd for resumed task (mirrors normal task path: task.dir > vcsRepo clonePath)
|
|
3142
|
+
let resumeCwd: string | undefined;
|
|
3143
|
+
if (task.dir) {
|
|
3144
|
+
try {
|
|
3145
|
+
if (existsSync(task.dir) && statSync(task.dir).isDirectory()) {
|
|
3146
|
+
resumeCwd = task.dir;
|
|
3147
|
+
} else {
|
|
3221
3148
|
console.warn(
|
|
3222
|
-
`[${role}]
|
|
3149
|
+
`[${role}] Resume task dir "${task.dir}" does not exist or is not a directory, falling back to default cwd`,
|
|
3223
3150
|
);
|
|
3224
3151
|
}
|
|
3152
|
+
} catch {
|
|
3153
|
+
console.warn(
|
|
3154
|
+
`[${role}] Failed to check resume task dir "${task.dir}", falling back to default cwd`,
|
|
3155
|
+
);
|
|
3225
3156
|
}
|
|
3226
|
-
} catch (err) {
|
|
3227
|
-
console.warn(`[${role}] [harness] Reconcile fetch failed (non-fatal): ${err}`);
|
|
3228
3157
|
}
|
|
3229
|
-
}
|
|
3230
3158
|
|
|
3231
|
-
|
|
3232
|
-
|
|
3233
|
-
|
|
3234
|
-
|
|
3235
|
-
|
|
3236
|
-
|
|
3237
|
-
|
|
3238
|
-
|
|
3239
|
-
|
|
3240
|
-
|
|
3241
|
-
.
|
|
3242
|
-
|
|
3243
|
-
);
|
|
3159
|
+
if (!resumeCwd && task.vcsRepo && apiUrl) {
|
|
3160
|
+
const repoConfig = await fetchRepoConfig(apiUrl, apiKey, task.vcsRepo);
|
|
3161
|
+
const effectiveConfig = repoConfig ?? {
|
|
3162
|
+
url: task.vcsRepo,
|
|
3163
|
+
name: task.vcsRepo.split("/").pop() || task.vcsRepo,
|
|
3164
|
+
clonePath: `/workspace/repos/${task.vcsRepo.split("/").pop() || task.vcsRepo}`,
|
|
3165
|
+
defaultBranch: "main",
|
|
3166
|
+
};
|
|
3167
|
+
const repoContext = await ensureRepoForTask(effectiveConfig, role);
|
|
3168
|
+
if (repoContext?.clonePath) {
|
|
3169
|
+
resumeCwd = repoContext.clonePath;
|
|
3170
|
+
}
|
|
3244
3171
|
}
|
|
3245
|
-
}
|
|
3246
3172
|
|
|
3247
|
-
|
|
3248
|
-
|
|
3249
|
-
for (const [taskId, task] of state.activeTasks) {
|
|
3250
|
-
if (vcsDetectedTasks.has(taskId)) continue;
|
|
3251
|
-
const lastCheck = vcsCheckTimestamps.get(taskId) ?? 0;
|
|
3252
|
-
if (now - lastCheck < VCS_CHECK_INTERVAL) continue;
|
|
3253
|
-
if (!task.workingDir) continue;
|
|
3173
|
+
// Per-task runner session ID so session logs are scoped to this task
|
|
3174
|
+
const resumeRunnerSessionId = crypto.randomUUID();
|
|
3254
3175
|
|
|
3255
|
-
|
|
3256
|
-
|
|
3257
|
-
|
|
3258
|
-
|
|
3259
|
-
|
|
3260
|
-
|
|
3261
|
-
|
|
3262
|
-
|
|
3263
|
-
|
|
3264
|
-
|
|
3265
|
-
|
|
3266
|
-
|
|
3267
|
-
|
|
3268
|
-
|
|
3269
|
-
|
|
3270
|
-
|
|
3271
|
-
},
|
|
3272
|
-
|
|
3273
|
-
|
|
3274
|
-
|
|
3275
|
-
|
|
3276
|
-
|
|
3277
|
-
|
|
3278
|
-
|
|
3279
|
-
|
|
3280
|
-
|
|
3281
|
-
|
|
3282
|
-
|
|
3283
|
-
|
|
3284
|
-
|
|
3285
|
-
|
|
3286
|
-
|
|
3287
|
-
|
|
3176
|
+
let runningTask: RunningTask;
|
|
3177
|
+
try {
|
|
3178
|
+
runningTask = await spawnProviderProcess(
|
|
3179
|
+
adapter,
|
|
3180
|
+
{
|
|
3181
|
+
prompt: resumePrompt,
|
|
3182
|
+
logFile,
|
|
3183
|
+
systemPrompt: resolvedSystemPrompt,
|
|
3184
|
+
additionalArgs: resumeAdditionalArgs,
|
|
3185
|
+
role,
|
|
3186
|
+
apiUrl,
|
|
3187
|
+
apiKey,
|
|
3188
|
+
agentId,
|
|
3189
|
+
runnerSessionId: resumeRunnerSessionId,
|
|
3190
|
+
iteration,
|
|
3191
|
+
taskId: task.id,
|
|
3192
|
+
model: (task as { model?: string }).model,
|
|
3193
|
+
cwd: resumeCwd,
|
|
3194
|
+
vcsRepo: task.vcsRepo,
|
|
3195
|
+
},
|
|
3196
|
+
logDir,
|
|
3197
|
+
isYolo,
|
|
3198
|
+
);
|
|
3199
|
+
} catch (spawnErr) {
|
|
3200
|
+
const errMsg = spawnErr instanceof Error ? spawnErr.message : String(spawnErr);
|
|
3201
|
+
console.error(
|
|
3202
|
+
`[${role}] Failed to spawn process for resumed task ${task.id.slice(0, 8)}: ${errMsg}`,
|
|
3203
|
+
);
|
|
3204
|
+
await ensureTaskFinished(
|
|
3205
|
+
apiConfig,
|
|
3206
|
+
role,
|
|
3207
|
+
task.id,
|
|
3208
|
+
1,
|
|
3209
|
+
`Spawn failed: ${errMsg}`,
|
|
3210
|
+
undefined,
|
|
3211
|
+
state.harnessProvider,
|
|
3212
|
+
);
|
|
3213
|
+
continue;
|
|
3288
3214
|
}
|
|
3289
|
-
}
|
|
3290
3215
|
|
|
3291
|
-
|
|
3292
|
-
|
|
3216
|
+
state.activeTasks.set(task.id, runningTask);
|
|
3217
|
+
registerActiveSession(apiConfig, {
|
|
3218
|
+
taskId: task.id,
|
|
3219
|
+
triggerType: "task_resumed",
|
|
3220
|
+
taskDescription: task.task?.slice(0, 200),
|
|
3221
|
+
runnerSessionId: resumeRunnerSessionId,
|
|
3222
|
+
});
|
|
3293
3223
|
console.log(
|
|
3294
|
-
`[${role}]
|
|
3224
|
+
`[${role}] Resumed task ${task.id.slice(0, 8)} (${state.activeTasks.size}/${state.maxConcurrent} active)`,
|
|
3295
3225
|
);
|
|
3226
|
+
}
|
|
3296
3227
|
|
|
3297
|
-
|
|
3298
|
-
|
|
3299
|
-
|
|
3300
|
-
|
|
3301
|
-
|
|
3302
|
-
|
|
3303
|
-
|
|
3304
|
-
|
|
3305
|
-
|
|
3306
|
-
|
|
3228
|
+
console.log(`[${role}] All paused tasks resumed. Entering normal polling...`);
|
|
3229
|
+
} else {
|
|
3230
|
+
console.log(`[${role}] No paused tasks found. Entering normal polling...`);
|
|
3231
|
+
}
|
|
3232
|
+
} catch (error) {
|
|
3233
|
+
console.error(`[${role}] Error checking/resuming paused tasks: ${error}`);
|
|
3234
|
+
// Continue to normal polling even if resume fails
|
|
3235
|
+
}
|
|
3236
|
+
// ========== END: Resume paused tasks ==========
|
|
3237
|
+
|
|
3238
|
+
// ========== Lead startup self-check ==========
|
|
3239
|
+
if (isLead) {
|
|
3240
|
+
console.log(`[${role}] Running startup heartbeat sweep...`);
|
|
3241
|
+
const swept = await triggerHeartbeatSweep(apiConfig);
|
|
3242
|
+
if (swept) {
|
|
3243
|
+
console.log(`[${role}] Startup heartbeat sweep completed`);
|
|
3244
|
+
} else {
|
|
3245
|
+
console.warn(`[${role}] Startup heartbeat sweep failed (non-fatal)`);
|
|
3246
|
+
}
|
|
3247
|
+
}
|
|
3307
3248
|
|
|
3308
|
-
|
|
3309
|
-
|
|
3310
|
-
|
|
3311
|
-
|
|
3312
|
-
// We deliberately `continue` BEFORE the empty-poll counter logic
|
|
3313
|
-
// below — refusals are not empty polls.
|
|
3314
|
-
if (trigger.type === "budget_refused") {
|
|
3315
|
-
consecutiveBudgetRefusals++;
|
|
3316
|
-
const backoffMs = computeBudgetBackoffMs(consecutiveBudgetRefusals, PollIntervalMs);
|
|
3317
|
-
const refusalPayload = JSON.stringify({
|
|
3318
|
-
event: "budget_refused",
|
|
3319
|
-
cause: trigger.cause,
|
|
3320
|
-
agentSpend: trigger.agentSpend,
|
|
3321
|
-
agentBudget: trigger.agentBudget,
|
|
3322
|
-
globalSpend: trigger.globalSpend,
|
|
3323
|
-
globalBudget: trigger.globalBudget,
|
|
3324
|
-
resetAt: trigger.resetAt,
|
|
3325
|
-
consecutiveRefusals: consecutiveBudgetRefusals,
|
|
3326
|
-
backoffMs,
|
|
3327
|
-
});
|
|
3328
|
-
console.log(
|
|
3329
|
-
`[${role}] budget_refused — backing off ${backoffMs}ms: ${scrubSecrets(refusalPayload)}`,
|
|
3330
|
-
);
|
|
3331
|
-
await Bun.sleep(backoffMs);
|
|
3332
|
-
continue;
|
|
3333
|
-
}
|
|
3249
|
+
// Phase 4 — exponential back-off state for `budget_refused` triggers.
|
|
3250
|
+
// Resets to 0 on any non-refused outcome. Lives outside the loop so
|
|
3251
|
+
// state persists across iterations.
|
|
3252
|
+
let consecutiveBudgetRefusals = 0;
|
|
3334
3253
|
|
|
3335
|
-
|
|
3336
|
-
|
|
3337
|
-
|
|
3338
|
-
|
|
3339
|
-
console.log(`[${role}] Trigger received: ${trigger.type}`);
|
|
3340
|
-
|
|
3341
|
-
if (
|
|
3342
|
-
trigger.taskId &&
|
|
3343
|
-
(trigger.type === "task_assigned" || trigger.type === "task_offered")
|
|
3344
|
-
) {
|
|
3345
|
-
ensure({
|
|
3346
|
-
id: "worker_received",
|
|
3347
|
-
flow: "task",
|
|
3348
|
-
runId: trigger.taskId,
|
|
3349
|
-
depIds: ["started"],
|
|
3350
|
-
data: {
|
|
3351
|
-
taskId: trigger.taskId,
|
|
3352
|
-
agentId,
|
|
3353
|
-
triggerType: trigger.type,
|
|
3354
|
-
role,
|
|
3355
|
-
},
|
|
3356
|
-
// biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
|
|
3357
|
-
filter: ({}, ctx) => ctx.deps.length > 0,
|
|
3358
|
-
conditions: [{ timeout_ms: 60_000 }], // 1 min: immediate after poll
|
|
3359
|
-
});
|
|
3360
|
-
}
|
|
3254
|
+
// Track last finished task check for leads (to avoid re-processing)
|
|
3255
|
+
while (true) {
|
|
3256
|
+
// Ping server on each iteration to keep status updated
|
|
3257
|
+
await pingServer(apiConfig, role);
|
|
3361
3258
|
|
|
3362
|
-
|
|
3363
|
-
|
|
3364
|
-
trigger,
|
|
3365
|
-
prompt,
|
|
3366
|
-
adapter.formatCommand.bind(adapter),
|
|
3367
|
-
{ hasMcp: adapter.traits.hasMcp },
|
|
3368
|
-
);
|
|
3259
|
+
// Check for completed processes first and ensure tasks are marked as finished
|
|
3260
|
+
await checkCompletedProcesses(state, role, apiConfig);
|
|
3369
3261
|
|
|
3370
|
-
|
|
3371
|
-
|
|
3372
|
-
|
|
3373
|
-
|
|
3374
|
-
|
|
3375
|
-
|
|
3376
|
-
|
|
3377
|
-
|
|
3378
|
-
|
|
3379
|
-
|
|
3380
|
-
|
|
3381
|
-
|
|
3382
|
-
|
|
3383
|
-
|
|
3384
|
-
|
|
3385
|
-
|
|
3386
|
-
|
|
3387
|
-
|
|
3388
|
-
|
|
3389
|
-
|
|
3262
|
+
// Live HARNESS_PROVIDER reconciliation. Re-fetches `swarm_config` (overlaid
|
|
3263
|
+
// on env) and swaps the adapter if the resolved provider changed —
|
|
3264
|
+
// typically because an operator PATCH'd /api/agents/:id/harness-provider
|
|
3265
|
+
// (which writes a swarm_config row) or upserted a config row directly.
|
|
3266
|
+
//
|
|
3267
|
+
// Safety: in-flight sessions hold their own `ProviderSession` references
|
|
3268
|
+
// and continue on the old adapter unaffected. New spawns (below) read
|
|
3269
|
+
// the current `adapter` binding and pick up the swap. `basePrompt` is
|
|
3270
|
+
// rebuilt because traits (and therefore prompt content) may differ across
|
|
3271
|
+
// providers.
|
|
3272
|
+
if (Date.now() - lastHarnessReconcileAt > HARNESS_RECONCILE_INTERVAL_MS) {
|
|
3273
|
+
lastHarnessReconcileAt = Date.now();
|
|
3274
|
+
try {
|
|
3275
|
+
const { env: freshEnv, resolvedProvider } = await fetchResolvedEnv(apiUrl, apiKey, agentId);
|
|
3276
|
+
const { agentVisibleChanged } = await applySwarmConfigDrift(freshEnv, resolvedProvider);
|
|
3277
|
+
if (agentVisibleChanged) {
|
|
3278
|
+
// Re-register so the agents row + dashboard reflect the live
|
|
3279
|
+
// harness_provider / maxTasks. Idempotent: only writes columns
|
|
3280
|
+
// that actually changed (see src/http/agents.ts).
|
|
3281
|
+
await reregisterAgent();
|
|
3282
|
+
}
|
|
3283
|
+
} catch (err) {
|
|
3284
|
+
console.warn(`[${role}] [harness] Reconcile fetch failed (non-fatal): ${err}`);
|
|
3285
|
+
}
|
|
3286
|
+
}
|
|
3390
3287
|
|
|
3391
|
-
|
|
3392
|
-
|
|
3393
|
-
|
|
3394
|
-
|
|
3395
|
-
|
|
3396
|
-
|
|
3397
|
-
|
|
3398
|
-
|
|
3399
|
-
|
|
3400
|
-
|
|
3401
|
-
|
|
3402
|
-
|
|
3403
|
-
|
|
3404
|
-
|
|
3405
|
-
|
|
3406
|
-
console.log(`[${role}] Child task — parent session ID not found, starting fresh`);
|
|
3407
|
-
}
|
|
3408
|
-
}
|
|
3288
|
+
// Migration 055 — post-task credential refresh, cache-keyed on the
|
|
3289
|
+
// *resolved* harness_provider. Re-runs the snapshot when the provider
|
|
3290
|
+
// changes (boot, or after a live swap above) so the dashboard shows
|
|
3291
|
+
// up-to-date credential status for the active adapter.
|
|
3292
|
+
if (!isCredCheckDisabled(process.env)) {
|
|
3293
|
+
const currentHarness = state.harnessProvider;
|
|
3294
|
+
if (currentHarness !== cachedCredHarnessProvider) {
|
|
3295
|
+
cachedCredHarnessProvider = currentHarness;
|
|
3296
|
+
buildCredStatusReport(currentHarness, process.env, {}, "post_task")
|
|
3297
|
+
.then((snap) => reportCredStatus(apiUrl, apiKey, agentId, snap))
|
|
3298
|
+
.catch((err) =>
|
|
3299
|
+
console.warn(`[${role}] cred_status post_task report failed (non-fatal): ${err}`),
|
|
3300
|
+
);
|
|
3301
|
+
}
|
|
3302
|
+
}
|
|
3409
3303
|
|
|
3410
|
-
|
|
3411
|
-
|
|
3304
|
+
// Periodic VCS detection for running tasks (fire-and-forget, throttled per task)
|
|
3305
|
+
const now = Date.now();
|
|
3306
|
+
for (const [taskId, task] of state.activeTasks) {
|
|
3307
|
+
if (vcsDetectedTasks.has(taskId)) continue;
|
|
3308
|
+
const lastCheck = vcsCheckTimestamps.get(taskId) ?? 0;
|
|
3309
|
+
if (now - lastCheck < VCS_CHECK_INTERVAL) continue;
|
|
3310
|
+
if (!task.workingDir) continue;
|
|
3412
3311
|
|
|
3413
|
-
|
|
3414
|
-
|
|
3415
|
-
|
|
3416
|
-
const taskSlackThreadTs = (trigger.task as { slackThreadTs?: string } | undefined)
|
|
3417
|
-
?.slackThreadTs;
|
|
3418
|
-
currentTaskSlackContext = taskSlackChannelId
|
|
3419
|
-
? { channelId: taskSlackChannelId, threadTs: taskSlackThreadTs }
|
|
3420
|
-
: undefined;
|
|
3312
|
+
vcsCheckTimestamps.set(taskId, now);
|
|
3313
|
+
detectVcsForTask(apiUrl, apiKey, taskId, task.workingDir);
|
|
3314
|
+
}
|
|
3421
3315
|
|
|
3422
|
-
|
|
3423
|
-
|
|
3424
|
-
|
|
3425
|
-
|
|
3426
|
-
|
|
3427
|
-
|
|
3428
|
-
|
|
3429
|
-
|
|
3430
|
-
|
|
3431
|
-
|
|
3432
|
-
|
|
3433
|
-
|
|
3434
|
-
|
|
3435
|
-
|
|
3436
|
-
|
|
3316
|
+
// Check for cancelled tasks and signal their subprocesses
|
|
3317
|
+
if (state.activeTasks.size > 0) {
|
|
3318
|
+
for (const [taskId, task] of state.activeTasks) {
|
|
3319
|
+
if (cancelledSignaled.has(taskId)) continue; // Already sent SIGTERM
|
|
3320
|
+
try {
|
|
3321
|
+
const cancelResp = await fetch(
|
|
3322
|
+
`${apiUrl}/cancelled-tasks?taskId=${encodeURIComponent(taskId)}`,
|
|
3323
|
+
{
|
|
3324
|
+
headers: {
|
|
3325
|
+
Authorization: `Bearer ${apiKey}`,
|
|
3326
|
+
"X-Agent-ID": agentId,
|
|
3327
|
+
},
|
|
3328
|
+
},
|
|
3329
|
+
);
|
|
3330
|
+
if (cancelResp.ok) {
|
|
3331
|
+
const cancelData = (await cancelResp.json()) as {
|
|
3332
|
+
cancelled: Array<{ id: string }>;
|
|
3437
3333
|
};
|
|
3438
|
-
|
|
3439
|
-
|
|
3440
|
-
|
|
3441
|
-
|
|
3442
|
-
// Resolve effective working directory (priority: task.dir > repoContext.clonePath > process.cwd())
|
|
3443
|
-
const taskDir = (trigger.task as { dir?: string } | undefined)?.dir;
|
|
3444
|
-
let effectiveCwd: string | undefined;
|
|
3445
|
-
|
|
3446
|
-
if (taskDir) {
|
|
3447
|
-
try {
|
|
3448
|
-
if (existsSync(taskDir) && statSync(taskDir).isDirectory()) {
|
|
3449
|
-
effectiveCwd = taskDir;
|
|
3450
|
-
} else {
|
|
3451
|
-
console.warn(
|
|
3452
|
-
`[${role}] Task dir "${taskDir}" does not exist or is not a directory, falling back to default cwd`,
|
|
3453
|
-
);
|
|
3454
|
-
}
|
|
3455
|
-
} catch {
|
|
3456
|
-
console.warn(
|
|
3457
|
-
`[${role}] Failed to check task dir "${taskDir}", falling back to default cwd`,
|
|
3334
|
+
if (cancelData.cancelled?.some((t) => t.id === taskId)) {
|
|
3335
|
+
console.log(
|
|
3336
|
+
`[${role}] Task ${taskId.slice(0, 8)} was cancelled — sending SIGTERM to subprocess`,
|
|
3458
3337
|
);
|
|
3338
|
+
task.session.abort().catch(() => {});
|
|
3339
|
+
cancelledSignaled.add(taskId);
|
|
3459
3340
|
}
|
|
3460
3341
|
}
|
|
3342
|
+
} catch {
|
|
3343
|
+
// Non-blocking — cancellation check is best-effort
|
|
3344
|
+
}
|
|
3345
|
+
}
|
|
3346
|
+
}
|
|
3461
3347
|
|
|
3462
|
-
|
|
3463
|
-
|
|
3464
|
-
|
|
3465
|
-
|
|
3466
|
-
|
|
3467
|
-
if (effectiveCwd && effectiveCwd !== process.cwd()) {
|
|
3468
|
-
triggerPrompt += `\n\n---\n**Working Directory**: You are starting in \`${effectiveCwd}\`. `;
|
|
3469
|
-
if (taskDir) {
|
|
3470
|
-
triggerPrompt += "This was explicitly set on the task.";
|
|
3471
|
-
} else if (currentRepoContext?.clonePath) {
|
|
3472
|
-
triggerPrompt += "This is the repository clone path for this task's VCS repo.";
|
|
3473
|
-
}
|
|
3474
|
-
triggerPrompt +=
|
|
3475
|
-
" You can still access any path on the filesystem — this is just your starting directory.";
|
|
3476
|
-
}
|
|
3348
|
+
// Only poll if we have capacity
|
|
3349
|
+
if (state.activeTasks.size < state.maxConcurrent) {
|
|
3350
|
+
console.log(
|
|
3351
|
+
`[${role}] Polling for triggers (${state.activeTasks.size}/${state.maxConcurrent} active)...`,
|
|
3352
|
+
);
|
|
3477
3353
|
|
|
3478
|
-
|
|
3479
|
-
|
|
3480
|
-
if (taskDir && !effectiveCwd) {
|
|
3481
|
-
cwdWarning = `\n\nNote: The task requested working directory "${taskDir}" but it does not exist. Falling back to default directory.`;
|
|
3482
|
-
}
|
|
3354
|
+
// Use shorter timeout if tasks are running (to check completion more often)
|
|
3355
|
+
const effectiveTimeout = state.activeTasks.size > 0 ? 5000 : PollTimeoutMs;
|
|
3483
3356
|
|
|
3484
|
-
|
|
3485
|
-
|
|
3486
|
-
|
|
3487
|
-
|
|
3488
|
-
|
|
3489
|
-
|
|
3490
|
-
|
|
3491
|
-
iteration++;
|
|
3492
|
-
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
|
3493
|
-
const taskIdSlice = trigger.taskId?.slice(0, 8) || "notask";
|
|
3494
|
-
const logFile = `${logDir}/${timestamp}-${taskIdSlice}.jsonl`;
|
|
3495
|
-
|
|
3496
|
-
console.log(`\n[${role}] === Iteration ${iteration} ===`);
|
|
3497
|
-
console.log(`[${role}] Logging to: ${logFile}`);
|
|
3498
|
-
console.log(`[${role}] Prompt: ${triggerPrompt.slice(0, 100)}...`);
|
|
3499
|
-
if (effectiveCwd) {
|
|
3500
|
-
console.log(`[${role}] Working directory: ${effectiveCwd}`);
|
|
3501
|
-
}
|
|
3357
|
+
const trigger = await pollForTrigger({
|
|
3358
|
+
apiUrl,
|
|
3359
|
+
apiKey,
|
|
3360
|
+
agentId,
|
|
3361
|
+
pollInterval: PollIntervalMs,
|
|
3362
|
+
pollTimeout: effectiveTimeout,
|
|
3363
|
+
});
|
|
3502
3364
|
|
|
3503
|
-
|
|
3504
|
-
|
|
3505
|
-
|
|
3506
|
-
|
|
3507
|
-
|
|
3508
|
-
|
|
3509
|
-
|
|
3510
|
-
|
|
3511
|
-
|
|
3512
|
-
|
|
3365
|
+
if (trigger) {
|
|
3366
|
+
// Phase 4 — server refused to admit a claim because the agent or
|
|
3367
|
+
// global budget is exhausted. Log a structured payload (scrubbed
|
|
3368
|
+
// at egress per project convention) and back off exponentially.
|
|
3369
|
+
// We deliberately `continue` BEFORE the empty-poll counter logic
|
|
3370
|
+
// below — refusals are not empty polls.
|
|
3371
|
+
if (trigger.type === "budget_refused") {
|
|
3372
|
+
consecutiveBudgetRefusals++;
|
|
3373
|
+
const backoffMs = computeBudgetBackoffMs(consecutiveBudgetRefusals, PollIntervalMs);
|
|
3374
|
+
const refusalPayload = JSON.stringify({
|
|
3375
|
+
event: "budget_refused",
|
|
3376
|
+
cause: trigger.cause,
|
|
3377
|
+
agentSpend: trigger.agentSpend,
|
|
3378
|
+
agentBudget: trigger.agentBudget,
|
|
3379
|
+
globalSpend: trigger.globalSpend,
|
|
3380
|
+
globalBudget: trigger.globalBudget,
|
|
3381
|
+
resetAt: trigger.resetAt,
|
|
3382
|
+
consecutiveRefusals: consecutiveBudgetRefusals,
|
|
3383
|
+
backoffMs,
|
|
3384
|
+
});
|
|
3385
|
+
console.log(
|
|
3386
|
+
`[${role}] budget_refused — backing off ${backoffMs}ms: ${scrubSecrets(refusalPayload)}`,
|
|
3387
|
+
);
|
|
3388
|
+
await Bun.sleep(backoffMs);
|
|
3389
|
+
continue;
|
|
3390
|
+
}
|
|
3513
3391
|
|
|
3514
|
-
|
|
3515
|
-
|
|
3392
|
+
// Any other non-null trigger means we're being admitted normally —
|
|
3393
|
+
// reset the back-off so the next refusal starts at base interval.
|
|
3394
|
+
consecutiveBudgetRefusals = 0;
|
|
3516
3395
|
|
|
3517
|
-
|
|
3518
|
-
let runningTask: RunningTask;
|
|
3519
|
-
try {
|
|
3520
|
-
runningTask = await spawnProviderProcess(
|
|
3521
|
-
adapter,
|
|
3522
|
-
{
|
|
3523
|
-
prompt: triggerPrompt,
|
|
3524
|
-
logFile,
|
|
3525
|
-
systemPrompt: taskSystemPrompt,
|
|
3526
|
-
additionalArgs: effectiveAdditionalArgs,
|
|
3527
|
-
role,
|
|
3528
|
-
apiUrl,
|
|
3529
|
-
apiKey,
|
|
3530
|
-
agentId,
|
|
3531
|
-
runnerSessionId: taskRunnerSessionId,
|
|
3532
|
-
iteration,
|
|
3533
|
-
taskId: trigger.taskId,
|
|
3534
|
-
model: taskModel,
|
|
3535
|
-
cwd: effectiveCwd,
|
|
3536
|
-
vcsRepo: taskVcsRepo,
|
|
3537
|
-
},
|
|
3538
|
-
logDir,
|
|
3539
|
-
isYolo,
|
|
3540
|
-
);
|
|
3541
|
-
} catch (spawnErr) {
|
|
3542
|
-
const errMsg = spawnErr instanceof Error ? spawnErr.message : String(spawnErr);
|
|
3543
|
-
console.error(
|
|
3544
|
-
`[${role}] Failed to spawn process for task ${trigger.taskId?.slice(0, 8) || "unknown"}: ${errMsg}`,
|
|
3545
|
-
);
|
|
3546
|
-
if (trigger.taskId) {
|
|
3547
|
-
await ensureTaskFinished(
|
|
3548
|
-
apiConfig,
|
|
3549
|
-
role,
|
|
3550
|
-
trigger.taskId,
|
|
3551
|
-
1,
|
|
3552
|
-
`Spawn failed: ${errMsg}`,
|
|
3553
|
-
undefined,
|
|
3554
|
-
state.harnessProvider,
|
|
3555
|
-
);
|
|
3556
|
-
}
|
|
3557
|
-
continue;
|
|
3558
|
-
}
|
|
3396
|
+
console.log(`[${role}] Trigger received: ${trigger.type}`);
|
|
3559
3397
|
|
|
3398
|
+
if (
|
|
3399
|
+
trigger.taskId &&
|
|
3400
|
+
(trigger.type === "task_assigned" || trigger.type === "task_offered")
|
|
3401
|
+
) {
|
|
3560
3402
|
ensure({
|
|
3561
|
-
id: "
|
|
3403
|
+
id: "worker_received",
|
|
3562
3404
|
flow: "task",
|
|
3563
|
-
runId:
|
|
3564
|
-
depIds: ["
|
|
3405
|
+
runId: trigger.taskId,
|
|
3406
|
+
depIds: ["started"],
|
|
3565
3407
|
data: {
|
|
3566
|
-
taskId:
|
|
3408
|
+
taskId: trigger.taskId,
|
|
3567
3409
|
agentId,
|
|
3410
|
+
triggerType: trigger.type,
|
|
3568
3411
|
role,
|
|
3569
|
-
model: taskModel,
|
|
3570
3412
|
},
|
|
3571
3413
|
// biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
|
|
3572
3414
|
filter: ({}, ctx) => ctx.deps.length > 0,
|
|
3573
|
-
conditions: [{ timeout_ms: 60_000 }], // 1 min:
|
|
3415
|
+
conditions: [{ timeout_ms: 60_000 }], // 1 min: immediate after poll
|
|
3574
3416
|
});
|
|
3417
|
+
}
|
|
3575
3418
|
|
|
3576
|
-
|
|
3577
|
-
|
|
3578
|
-
|
|
3579
|
-
|
|
3580
|
-
|
|
3581
|
-
|
|
3582
|
-
|
|
3583
|
-
channelId: string;
|
|
3584
|
-
ts: string;
|
|
3585
|
-
}>;
|
|
3586
|
-
}
|
|
3587
|
-
|
|
3588
|
-
state.activeTasks.set(runningTask.taskId, runningTask);
|
|
3419
|
+
// Build prompt based on trigger
|
|
3420
|
+
let triggerPrompt = await buildPromptForTrigger(
|
|
3421
|
+
trigger,
|
|
3422
|
+
prompt,
|
|
3423
|
+
adapter.formatCommand.bind(adapter),
|
|
3424
|
+
{ hasMcp: adapter.traits.hasMcp },
|
|
3425
|
+
);
|
|
3589
3426
|
|
|
3590
|
-
|
|
3591
|
-
|
|
3427
|
+
// Enrich prompt with relevant memories from past sessions
|
|
3428
|
+
if (trigger.type === "task_assigned" || trigger.type === "task_offered") {
|
|
3429
|
+
const task =
|
|
3592
3430
|
trigger.task && typeof trigger.task === "object" && "task" in trigger.task
|
|
3593
|
-
?
|
|
3594
|
-
:
|
|
3595
|
-
|
|
3596
|
-
|
|
3597
|
-
|
|
3598
|
-
|
|
3599
|
-
|
|
3600
|
-
|
|
3431
|
+
? (trigger.task as { task: string; id?: string })
|
|
3432
|
+
: null;
|
|
3433
|
+
if (task?.task) {
|
|
3434
|
+
const memoryContext = await fetchRelevantMemories(
|
|
3435
|
+
apiUrl,
|
|
3436
|
+
apiKey,
|
|
3437
|
+
agentId,
|
|
3438
|
+
task.task,
|
|
3439
|
+
task.id,
|
|
3440
|
+
);
|
|
3441
|
+
if (memoryContext) {
|
|
3442
|
+
triggerPrompt += memoryContext;
|
|
3443
|
+
console.log(`[${role}] Injected relevant memories into task prompt`);
|
|
3444
|
+
}
|
|
3445
|
+
}
|
|
3446
|
+
}
|
|
3601
3447
|
|
|
3602
|
-
|
|
3603
|
-
|
|
3448
|
+
// Resolve --resume for child tasks with parentTaskId
|
|
3449
|
+
let effectiveAdditionalArgs = opts.additionalArgs || [];
|
|
3450
|
+
const taskObj = trigger.task as { parentTaskId?: string } | undefined;
|
|
3451
|
+
if (taskObj?.parentTaskId) {
|
|
3452
|
+
const parentSessionId = await fetchProviderSessionId(
|
|
3453
|
+
apiUrl,
|
|
3454
|
+
apiKey,
|
|
3455
|
+
taskObj.parentTaskId,
|
|
3604
3456
|
);
|
|
3457
|
+
if (parentSessionId) {
|
|
3458
|
+
effectiveAdditionalArgs = [...effectiveAdditionalArgs, "--resume", parentSessionId];
|
|
3459
|
+
console.log(
|
|
3460
|
+
`[${role}] Child task — resuming parent session ${parentSessionId.slice(0, 8)}`,
|
|
3461
|
+
);
|
|
3462
|
+
} else {
|
|
3463
|
+
console.log(`[${role}] Child task — parent session ID not found, starting fresh`);
|
|
3464
|
+
}
|
|
3605
3465
|
}
|
|
3606
|
-
} else {
|
|
3607
|
-
console.log(
|
|
3608
|
-
`[${role}] At capacity (${state.activeTasks.size}/${state.maxConcurrent}), waiting for completion...`,
|
|
3609
|
-
);
|
|
3610
|
-
await Bun.sleep(1000);
|
|
3611
|
-
}
|
|
3612
|
-
}
|
|
3613
|
-
} else {
|
|
3614
|
-
// Original AI-loop mode (existing behavior)
|
|
3615
|
-
console.log(`[${role}] Mode: AI-based polling (legacy)`);
|
|
3616
|
-
|
|
3617
|
-
// Create API config for ping/close
|
|
3618
|
-
const apiConfig: ApiConfig = { apiUrl, apiKey, agentId };
|
|
3619
3466
|
|
|
3620
|
-
|
|
3621
|
-
|
|
3467
|
+
// Extract model from task data for per-task model selection
|
|
3468
|
+
const taskModel = (trigger.task as { model?: string } | undefined)?.model;
|
|
3469
|
+
|
|
3470
|
+
// Detect Slack context for conditional prompt sections
|
|
3471
|
+
const taskSlackChannelId = (trigger.task as { slackChannelId?: string } | undefined)
|
|
3472
|
+
?.slackChannelId;
|
|
3473
|
+
const taskSlackThreadTs = (trigger.task as { slackThreadTs?: string } | undefined)
|
|
3474
|
+
?.slackThreadTs;
|
|
3475
|
+
currentTaskSlackContext = taskSlackChannelId
|
|
3476
|
+
? { channelId: taskSlackChannelId, threadTs: taskSlackThreadTs }
|
|
3477
|
+
: undefined;
|
|
3478
|
+
|
|
3479
|
+
// Handle repo context for tasks with vcsRepo (GitHub/GitLab)
|
|
3480
|
+
const taskVcsRepo = (trigger.task as { vcsRepo?: string } | undefined)?.vcsRepo;
|
|
3481
|
+
if (taskVcsRepo && apiUrl) {
|
|
3482
|
+
const repoConfig = await fetchRepoConfig(apiUrl, apiKey, taskVcsRepo);
|
|
3483
|
+
// Fall back to convention-based config if repo is not registered
|
|
3484
|
+
const effectiveConfig = repoConfig ?? {
|
|
3485
|
+
url: taskVcsRepo,
|
|
3486
|
+
name: taskVcsRepo.split("/").pop() || taskVcsRepo,
|
|
3487
|
+
clonePath: `/workspace/repos/${taskVcsRepo.split("/").pop() || taskVcsRepo}`,
|
|
3488
|
+
defaultBranch: "main",
|
|
3489
|
+
};
|
|
3490
|
+
const repoResult = await ensureRepoForTask(effectiveConfig, role);
|
|
3491
|
+
currentRepoContext = {
|
|
3492
|
+
...repoResult,
|
|
3493
|
+
guidelines: repoConfig?.guidelines ?? null,
|
|
3494
|
+
};
|
|
3495
|
+
} else {
|
|
3496
|
+
currentRepoContext = undefined;
|
|
3497
|
+
}
|
|
3622
3498
|
|
|
3623
|
-
|
|
3624
|
-
|
|
3625
|
-
|
|
3499
|
+
// Resolve effective working directory (priority: task.dir > repoContext.clonePath > process.cwd())
|
|
3500
|
+
const taskDir = (trigger.task as { dir?: string } | undefined)?.dir;
|
|
3501
|
+
let effectiveCwd: string | undefined;
|
|
3626
3502
|
|
|
3627
|
-
|
|
3628
|
-
|
|
3629
|
-
|
|
3503
|
+
if (taskDir) {
|
|
3504
|
+
try {
|
|
3505
|
+
if (existsSync(taskDir) && statSync(taskDir).isDirectory()) {
|
|
3506
|
+
effectiveCwd = taskDir;
|
|
3507
|
+
} else {
|
|
3508
|
+
console.warn(
|
|
3509
|
+
`[${role}] Task dir "${taskDir}" does not exist or is not a directory, falling back to default cwd`,
|
|
3510
|
+
);
|
|
3511
|
+
}
|
|
3512
|
+
} catch {
|
|
3513
|
+
console.warn(
|
|
3514
|
+
`[${role}] Failed to check task dir "${taskDir}", falling back to default cwd`,
|
|
3515
|
+
);
|
|
3516
|
+
}
|
|
3517
|
+
}
|
|
3630
3518
|
|
|
3631
|
-
|
|
3632
|
-
|
|
3519
|
+
if (!effectiveCwd && currentRepoContext?.clonePath) {
|
|
3520
|
+
effectiveCwd = currentRepoContext.clonePath;
|
|
3521
|
+
}
|
|
3633
3522
|
|
|
3634
|
-
|
|
3635
|
-
|
|
3636
|
-
|
|
3637
|
-
|
|
3638
|
-
|
|
3639
|
-
|
|
3640
|
-
|
|
3641
|
-
|
|
3642
|
-
|
|
3523
|
+
// Annotate prompt with working directory context
|
|
3524
|
+
if (effectiveCwd && effectiveCwd !== process.cwd()) {
|
|
3525
|
+
triggerPrompt += `\n\n---\n**Working Directory**: You are starting in \`${effectiveCwd}\`. `;
|
|
3526
|
+
if (taskDir) {
|
|
3527
|
+
triggerPrompt += "This was explicitly set on the task.";
|
|
3528
|
+
} else if (currentRepoContext?.clonePath) {
|
|
3529
|
+
triggerPrompt += "This is the repository clone path for this task's VCS repo.";
|
|
3530
|
+
}
|
|
3531
|
+
triggerPrompt +=
|
|
3532
|
+
" You can still access any path on the filesystem — this is just your starting directory.";
|
|
3533
|
+
}
|
|
3643
3534
|
|
|
3644
|
-
|
|
3645
|
-
|
|
3646
|
-
|
|
3647
|
-
|
|
3648
|
-
|
|
3649
|
-
role,
|
|
3650
|
-
apiUrl,
|
|
3651
|
-
apiKey,
|
|
3652
|
-
agentId,
|
|
3653
|
-
});
|
|
3535
|
+
// Warn in system prompt when task dir was specified but doesn't exist
|
|
3536
|
+
let cwdWarning = "";
|
|
3537
|
+
if (taskDir && !effectiveCwd) {
|
|
3538
|
+
cwdWarning = `\n\nNote: The task requested working directory "${taskDir}" but it does not exist. Falling back to default directory.`;
|
|
3539
|
+
}
|
|
3654
3540
|
|
|
3655
|
-
|
|
3656
|
-
const
|
|
3657
|
-
|
|
3541
|
+
// Rebuild system prompt with per-task repo context
|
|
3542
|
+
const taskBasePrompt = await buildSystemPrompt();
|
|
3543
|
+
const taskSystemPrompt =
|
|
3544
|
+
(additionalSystemPrompt
|
|
3545
|
+
? `${taskBasePrompt}\n\n${additionalSystemPrompt}`
|
|
3546
|
+
: taskBasePrompt) + cwdWarning;
|
|
3547
|
+
|
|
3548
|
+
iteration++;
|
|
3549
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
|
3550
|
+
const taskIdSlice = trigger.taskId?.slice(0, 8) || "notask";
|
|
3551
|
+
const logFile = `${logDir}/${timestamp}-${taskIdSlice}.jsonl`;
|
|
3552
|
+
|
|
3553
|
+
console.log(`\n[${role}] === Iteration ${iteration} ===`);
|
|
3554
|
+
console.log(`[${role}] Logging to: ${logFile}`);
|
|
3555
|
+
console.log(`[${role}] Prompt: ${triggerPrompt.slice(0, 100)}...`);
|
|
3556
|
+
if (effectiveCwd) {
|
|
3557
|
+
console.log(`[${role}] Working directory: ${effectiveCwd}`);
|
|
3558
|
+
}
|
|
3658
3559
|
|
|
3659
|
-
const
|
|
3660
|
-
|
|
3560
|
+
const metadata = {
|
|
3561
|
+
type: metadataType,
|
|
3562
|
+
sessionId,
|
|
3661
3563
|
iteration,
|
|
3662
|
-
|
|
3663
|
-
|
|
3664
|
-
|
|
3564
|
+
timestamp: new Date().toISOString(),
|
|
3565
|
+
prompt: triggerPrompt,
|
|
3566
|
+
trigger: trigger.type,
|
|
3567
|
+
yolo: isYolo,
|
|
3665
3568
|
};
|
|
3569
|
+
await Bun.write(logFile, `${JSON.stringify(metadata)}\n`);
|
|
3570
|
+
|
|
3571
|
+
// Per-task runner session ID so session logs are scoped to this task
|
|
3572
|
+
const taskRunnerSessionId = crypto.randomUUID();
|
|
3573
|
+
|
|
3574
|
+
// Spawn without blocking (await to set up session, but process runs async)
|
|
3575
|
+
let runningTask: RunningTask;
|
|
3576
|
+
try {
|
|
3577
|
+
runningTask = await spawnProviderProcess(
|
|
3578
|
+
adapter,
|
|
3579
|
+
{
|
|
3580
|
+
prompt: triggerPrompt,
|
|
3581
|
+
logFile,
|
|
3582
|
+
systemPrompt: taskSystemPrompt,
|
|
3583
|
+
additionalArgs: effectiveAdditionalArgs,
|
|
3584
|
+
role,
|
|
3585
|
+
apiUrl,
|
|
3586
|
+
apiKey,
|
|
3587
|
+
agentId,
|
|
3588
|
+
runnerSessionId: taskRunnerSessionId,
|
|
3589
|
+
iteration,
|
|
3590
|
+
taskId: trigger.taskId,
|
|
3591
|
+
model: taskModel,
|
|
3592
|
+
cwd: effectiveCwd,
|
|
3593
|
+
vcsRepo: taskVcsRepo,
|
|
3594
|
+
},
|
|
3595
|
+
logDir,
|
|
3596
|
+
isYolo,
|
|
3597
|
+
);
|
|
3598
|
+
} catch (spawnErr) {
|
|
3599
|
+
const errMsg = spawnErr instanceof Error ? spawnErr.message : String(spawnErr);
|
|
3600
|
+
console.error(
|
|
3601
|
+
`[${role}] Failed to spawn process for task ${trigger.taskId?.slice(0, 8) || "unknown"}: ${errMsg}`,
|
|
3602
|
+
);
|
|
3603
|
+
if (trigger.taskId) {
|
|
3604
|
+
await ensureTaskFinished(
|
|
3605
|
+
apiConfig,
|
|
3606
|
+
role,
|
|
3607
|
+
trigger.taskId,
|
|
3608
|
+
1,
|
|
3609
|
+
`Spawn failed: ${errMsg}`,
|
|
3610
|
+
undefined,
|
|
3611
|
+
state.harnessProvider,
|
|
3612
|
+
);
|
|
3613
|
+
}
|
|
3614
|
+
continue;
|
|
3615
|
+
}
|
|
3616
|
+
|
|
3617
|
+
ensure({
|
|
3618
|
+
id: "worker_process_spawned",
|
|
3619
|
+
flow: "task",
|
|
3620
|
+
runId: runningTask.taskId,
|
|
3621
|
+
depIds: ["worker_received"],
|
|
3622
|
+
data: {
|
|
3623
|
+
taskId: runningTask.taskId,
|
|
3624
|
+
agentId,
|
|
3625
|
+
role,
|
|
3626
|
+
model: taskModel,
|
|
3627
|
+
},
|
|
3628
|
+
// biome-ignore lint/correctness/noEmptyPattern: data unused, ctx needed
|
|
3629
|
+
filter: ({}, ctx) => ctx.deps.length > 0,
|
|
3630
|
+
conditions: [{ timeout_ms: 60_000 }], // 1 min: process startup
|
|
3631
|
+
});
|
|
3666
3632
|
|
|
3667
|
-
|
|
3668
|
-
|
|
3669
|
-
|
|
3670
|
-
await Bun.write(errorsFile, `${existingErrors}${JSON.stringify(errorLog)}\n`);
|
|
3633
|
+
// Attach trigger metadata for logging
|
|
3634
|
+
runningTask.triggerType = trigger.type;
|
|
3635
|
+
runningTask.workingDir = effectiveCwd;
|
|
3671
3636
|
|
|
3672
|
-
|
|
3673
|
-
|
|
3674
|
-
|
|
3675
|
-
|
|
3637
|
+
// Attach deferred cursor updates for channel_activity triggers
|
|
3638
|
+
if (trigger.type === "channel_activity" && trigger.cursorUpdates) {
|
|
3639
|
+
runningTask.cursorUpdates = trigger.cursorUpdates as Array<{
|
|
3640
|
+
channelId: string;
|
|
3641
|
+
ts: string;
|
|
3642
|
+
}>;
|
|
3676
3643
|
}
|
|
3677
3644
|
|
|
3678
|
-
|
|
3679
|
-
}
|
|
3645
|
+
state.activeTasks.set(runningTask.taskId, runningTask);
|
|
3680
3646
|
|
|
3681
|
-
|
|
3647
|
+
// Register active session for concurrency awareness
|
|
3648
|
+
const taskDesc =
|
|
3649
|
+
trigger.task && typeof trigger.task === "object" && "task" in trigger.task
|
|
3650
|
+
? String((trigger.task as { task: string }).task).slice(0, 200)
|
|
3651
|
+
: undefined;
|
|
3652
|
+
registerActiveSession(apiConfig, {
|
|
3653
|
+
taskId: runningTask.taskId,
|
|
3654
|
+
triggerType: trigger.type,
|
|
3655
|
+
taskDescription: taskDesc,
|
|
3656
|
+
runnerSessionId: taskRunnerSessionId,
|
|
3657
|
+
});
|
|
3658
|
+
|
|
3659
|
+
console.log(
|
|
3660
|
+
`[${role}] Started task ${runningTask.taskId.slice(0, 8)} (${state.activeTasks.size}/${state.maxConcurrent} active, trigger: ${trigger.type})`,
|
|
3661
|
+
);
|
|
3662
|
+
}
|
|
3663
|
+
} else {
|
|
3664
|
+
console.log(
|
|
3665
|
+
`[${role}] At capacity (${state.activeTasks.size}/${state.maxConcurrent}), waiting for completion...`,
|
|
3666
|
+
);
|
|
3667
|
+
await Bun.sleep(1000);
|
|
3682
3668
|
}
|
|
3683
3669
|
}
|
|
3684
3670
|
}
|