@desplega.ai/agent-swarm 1.75.0 → 1.76.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/openapi.json +973 -36
- package/package.json +2 -2
- package/src/be/db.ts +527 -9
- package/src/be/memory/raters/llm-summarizer.ts +218 -0
- package/src/be/memory/raters/llm.ts +56 -75
- package/src/be/memory/retrieval-store.ts +21 -0
- package/src/be/migrations/054_agent_harness_provider.sql +21 -0
- package/src/be/migrations/055_agent_cred_status.sql +15 -0
- package/src/be/migrations/056_drop_agent_tasks_source_check.sql +139 -0
- package/src/be/migrations/057_inbox_item_state.sql +27 -0
- package/src/be/migrations/058_task_templates.sql +31 -0
- package/src/be/swarm-config-guard.ts +24 -0
- package/src/commands/credential-wait.ts +1 -1
- package/src/commands/provider-credentials.ts +434 -0
- package/src/commands/runner.ts +229 -42
- package/src/hooks/hook.ts +115 -95
- package/src/http/agents.ts +82 -2
- package/src/http/config.ts +11 -1
- package/src/http/inbox-state.ts +89 -0
- package/src/http/index.ts +10 -0
- package/src/http/sessions.ts +86 -0
- package/src/http/status.ts +665 -0
- package/src/http/task-templates.ts +51 -0
- package/src/http/tasks.ts +85 -5
- package/src/http/users.ts +134 -0
- package/src/providers/claude-adapter.ts +5 -0
- package/src/providers/codex-adapter.ts +1 -1
- package/src/providers/index.ts +1 -1
- package/src/slack/handlers.ts +0 -1
- package/src/tests/agents-harness-provider.test.ts +333 -0
- package/src/tests/credential-check.test.ts +32 -1
- package/src/tests/credential-status-api.test.ts +42 -0
- package/src/tests/harness-provider-resolution.test.ts +242 -0
- package/src/tests/jira-sync.test.ts +1 -1
- package/src/tests/memory-rater-llm-summarizer.test.ts +317 -0
- package/src/tests/memory-rater-llm.test.ts +265 -107
- package/src/tests/migration-runner-regressions.test.ts +17 -2
- package/src/tests/sessions.test.ts +141 -0
- package/src/tests/status.test.ts +843 -0
- package/src/tests/stop-hook-task-resolution.test.ts +98 -0
- package/src/tests/template-recommendations.test.ts +148 -0
- package/src/tests/use-dismissible-card.test.ts +140 -0
- package/src/tools/swarm-config/set-config.ts +17 -1
- package/src/types.ts +117 -0
- package/src/utils/harness-provider.ts +32 -0
- package/tsconfig.json +0 -2
- package/src/providers/credentials.ts +0 -74
package/src/commands/runner.ts
CHANGED
|
@@ -25,11 +25,17 @@ import { computeBudgetBackoffMs } from "../utils/budget-backoff.ts";
|
|
|
25
25
|
import { getContextWindowSize } from "../utils/context-window.ts";
|
|
26
26
|
import { type CredentialSelection, resolveCredentialPools } from "../utils/credentials.ts";
|
|
27
27
|
import { parseRateLimitResetTime } from "../utils/error-tracker.ts";
|
|
28
|
+
import { resolveHarnessProvider } from "../utils/harness-provider.ts";
|
|
28
29
|
import { prettyPrintLine, prettyPrintStderr } from "../utils/pretty-print.ts";
|
|
29
30
|
import { scrubSecrets } from "../utils/secret-scrubber.ts";
|
|
30
31
|
import { detectVcsProvider } from "../vcs/index.ts";
|
|
31
32
|
import { interpolate } from "../workflows/template.ts";
|
|
32
33
|
import { awaitCredentials, BootMaxWaitExceededError, EX_CONFIG } from "./credential-wait.ts";
|
|
34
|
+
import {
|
|
35
|
+
buildCredStatusReport,
|
|
36
|
+
isCredCheckDisabled,
|
|
37
|
+
reportCredStatus,
|
|
38
|
+
} from "./provider-credentials.ts";
|
|
33
39
|
// Side-effect import: registers runner trigger/resumption templates
|
|
34
40
|
import "./templates.ts";
|
|
35
41
|
|
|
@@ -204,6 +210,13 @@ async function closeAgent(config: ApiConfig, role: string): Promise<void> {
|
|
|
204
210
|
interface ResolvedEnvResult {
|
|
205
211
|
env: Record<string, string | undefined>;
|
|
206
212
|
credentialSelections: CredentialSelection[];
|
|
213
|
+
/**
|
|
214
|
+
* Effective `HARNESS_PROVIDER` after layering swarm_config over the base
|
|
215
|
+
* env. Callers should prefer this over `process.env.HARNESS_PROVIDER` so
|
|
216
|
+
* that an operator's swarm_config row (repo > agent > global) actually
|
|
217
|
+
* takes effect on the worker.
|
|
218
|
+
*/
|
|
219
|
+
resolvedProvider: ProviderName;
|
|
207
220
|
}
|
|
208
221
|
|
|
209
222
|
async function fetchResolvedEnv(
|
|
@@ -241,6 +254,8 @@ async function fetchResolvedEnv(
|
|
|
241
254
|
}
|
|
242
255
|
}
|
|
243
256
|
|
|
257
|
+
const resolvedProvider = resolveHarnessProvider(env, baseEnv);
|
|
258
|
+
|
|
244
259
|
const credentialSelections = await resolveCredentialPools(env, {
|
|
245
260
|
apiUrl,
|
|
246
261
|
apiKey,
|
|
@@ -248,10 +263,13 @@ async function fetchResolvedEnv(
|
|
|
248
263
|
// CLAUDE_CODE_OAUTH_TOKEN stamped on their task record (and vice
|
|
249
264
|
// versa) just because both env vars happen to be set in the worker
|
|
250
265
|
// container. See `PROVIDER_CREDENTIAL_VARS` in src/utils/credentials.ts.
|
|
251
|
-
|
|
266
|
+
//
|
|
267
|
+
// Use the resolved provider (swarm_config > env) so an operator can flip
|
|
268
|
+
// the worker's harness from the dashboard without restarting the container.
|
|
269
|
+
provider: resolvedProvider,
|
|
252
270
|
});
|
|
253
271
|
|
|
254
|
-
return { env, credentialSelections };
|
|
272
|
+
return { env, credentialSelections, resolvedProvider };
|
|
255
273
|
}
|
|
256
274
|
|
|
257
275
|
/** Tools that produce noise — skip auto-progress for these */
|
|
@@ -535,6 +553,12 @@ export async function ensureTaskFinished(
|
|
|
535
553
|
exitCode: number,
|
|
536
554
|
failureReason?: string,
|
|
537
555
|
providerOutput?: string,
|
|
556
|
+
/**
|
|
557
|
+
* Active provider for this task. When provided, gates the structured-output
|
|
558
|
+
* fallback path correctly even if `process.env.HARNESS_PROVIDER` differs
|
|
559
|
+
* from the resolved swarm_config value. Falls back to env when omitted.
|
|
560
|
+
*/
|
|
561
|
+
provider?: ProviderName,
|
|
538
562
|
): Promise<void> {
|
|
539
563
|
const headers: Record<string, string> = {
|
|
540
564
|
"X-Agent-ID": config.agentId,
|
|
@@ -560,7 +584,7 @@ export async function ensureTaskFinished(
|
|
|
560
584
|
body.output = providerOutput;
|
|
561
585
|
} else {
|
|
562
586
|
// Try structured output fallback if the task has an outputSchema
|
|
563
|
-
const adapterType = process.env.HARNESS_PROVIDER
|
|
587
|
+
const adapterType = provider ?? process.env.HARNESS_PROVIDER ?? "claude";
|
|
564
588
|
const fallback = await handleStructuredOutputFallback(config, taskId, adapterType);
|
|
565
589
|
|
|
566
590
|
console.log(`[${role}] Task ${taskId.slice(0, 8)} fallback result: ${fallback.kind}`);
|
|
@@ -892,7 +916,15 @@ function setupShutdownHandlers(
|
|
|
892
916
|
console.warn(
|
|
893
917
|
`[${role}] Failed to pause task ${taskId.slice(0, 8)}, marking as failed instead`,
|
|
894
918
|
);
|
|
895
|
-
await ensureTaskFinished(
|
|
919
|
+
await ensureTaskFinished(
|
|
920
|
+
apiConfig,
|
|
921
|
+
role,
|
|
922
|
+
taskId,
|
|
923
|
+
1,
|
|
924
|
+
undefined,
|
|
925
|
+
undefined,
|
|
926
|
+
state.harnessProvider,
|
|
927
|
+
);
|
|
896
928
|
}
|
|
897
929
|
}
|
|
898
930
|
}
|
|
@@ -960,6 +992,14 @@ interface RunningTask {
|
|
|
960
992
|
interface RunnerState {
|
|
961
993
|
activeTasks: Map<string, RunningTask>;
|
|
962
994
|
maxConcurrent: number;
|
|
995
|
+
/**
|
|
996
|
+
* Effective harness provider for this worker boot session — resolved
|
|
997
|
+
* from `swarm_config` (overlay) > `process.env.HARNESS_PROVIDER` > "claude".
|
|
998
|
+
* Used by error / cleanup paths so the structured-output fallback runs the
|
|
999
|
+
* right adapter even when env disagrees with swarm_config. Section 4
|
|
1000
|
+
* (per-task live re-resolution) will mutate this between tasks.
|
|
1001
|
+
*/
|
|
1002
|
+
harnessProvider: ProviderName;
|
|
963
1003
|
}
|
|
964
1004
|
|
|
965
1005
|
/** Buffer for session logs */
|
|
@@ -1337,6 +1377,13 @@ async function registerAgent(opts: {
|
|
|
1337
1377
|
role?: string;
|
|
1338
1378
|
capabilities?: string[];
|
|
1339
1379
|
maxTasks?: number;
|
|
1380
|
+
/**
|
|
1381
|
+
* Resolved harness provider (swarm_config > env > "claude"). Sent as both
|
|
1382
|
+
* the legacy `provider` field and the canonical `harness_provider` column.
|
|
1383
|
+
* Defaults to `process.env.HARNESS_PROVIDER || "claude"` for callers that
|
|
1384
|
+
* haven't migrated to passing it explicitly.
|
|
1385
|
+
*/
|
|
1386
|
+
harnessProvider?: ProviderName;
|
|
1340
1387
|
}): Promise<void> {
|
|
1341
1388
|
const headers: Record<string, string> = {
|
|
1342
1389
|
"Content-Type": "application/json",
|
|
@@ -1346,7 +1393,16 @@ async function registerAgent(opts: {
|
|
|
1346
1393
|
headers.Authorization = `Bearer ${opts.apiKey}`;
|
|
1347
1394
|
}
|
|
1348
1395
|
|
|
1349
|
-
const provider =
|
|
1396
|
+
const provider: ProviderName =
|
|
1397
|
+
opts.harnessProvider ?? ((process.env.HARNESS_PROVIDER || "claude") as ProviderName);
|
|
1398
|
+
|
|
1399
|
+
// Phase 1.5 (cloud-personalization): also push the canonical
|
|
1400
|
+
// `harness_provider` field so the API can persist it in its own column
|
|
1401
|
+
// (`agents.harness_provider`). Always send the resolved provider value
|
|
1402
|
+
// (defaulting to "claude" when HARNESS_PROVIDER is unset) so agents that
|
|
1403
|
+
// don't explicitly set the env var still self-report instead of leaving
|
|
1404
|
+
// the column NULL — matches how `provider` already defaults above.
|
|
1405
|
+
const harnessProvider: ProviderName = provider;
|
|
1350
1406
|
|
|
1351
1407
|
const response = await fetch(`${opts.apiUrl}/api/agents`, {
|
|
1352
1408
|
method: "POST",
|
|
@@ -1358,6 +1414,7 @@ async function registerAgent(opts: {
|
|
|
1358
1414
|
capabilities: opts.capabilities,
|
|
1359
1415
|
maxTasks: opts.maxTasks,
|
|
1360
1416
|
provider,
|
|
1417
|
+
harness_provider: harnessProvider,
|
|
1361
1418
|
}),
|
|
1362
1419
|
});
|
|
1363
1420
|
|
|
@@ -2186,6 +2243,7 @@ async function checkCompletedProcesses(
|
|
|
2186
2243
|
result.exitCode,
|
|
2187
2244
|
failureReason,
|
|
2188
2245
|
result.output,
|
|
2246
|
+
state.harnessProvider,
|
|
2189
2247
|
);
|
|
2190
2248
|
|
|
2191
2249
|
ensure({
|
|
@@ -2296,9 +2354,6 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
2296
2354
|
// Initialize Business-Use SDK for worker-side instrumentation
|
|
2297
2355
|
initialize();
|
|
2298
2356
|
|
|
2299
|
-
// Create provider adapter based on HARNESS_PROVIDER env var (default: claude)
|
|
2300
|
-
const adapter = createProviderAdapter(process.env.HARNESS_PROVIDER || "claude");
|
|
2301
|
-
|
|
2302
2357
|
const sessionId = process.env.SESSION_ID || crypto.randomUUID().slice(0, 8);
|
|
2303
2358
|
const baseLogDir = opts.logsDir || process.env.LOG_DIR || "/logs";
|
|
2304
2359
|
const logDir = `${baseLogDir}/${sessionId}`;
|
|
@@ -2313,6 +2368,30 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
2313
2368
|
|
|
2314
2369
|
const apiUrl = process.env.MCP_BASE_URL || `http://localhost:${process.env.PORT || "3013"}`;
|
|
2315
2370
|
const swarmUrl = process.env.SWARM_URL || "localhost";
|
|
2371
|
+
const apiKey = process.env.API_KEY || "";
|
|
2372
|
+
|
|
2373
|
+
// Resolve the boot harness provider from swarm_config (repo > agent > global,
|
|
2374
|
+
// overlaid on top of `process.env`). This is what selects the adapter for
|
|
2375
|
+
// this worker's lifetime. On a fresh worker (agentId="unknown") only global
|
|
2376
|
+
// swarm_config applies; once registered, an operator writing an agent-scoped
|
|
2377
|
+
// HARNESS_PROVIDER row takes effect on the next reconciliation cycle (Section 4)
|
|
2378
|
+
// or worker restart.
|
|
2379
|
+
//
|
|
2380
|
+
// Failures (network, API down, malformed value) fall back to env then "claude"
|
|
2381
|
+
// so a swarm_config outage cannot wedge boot.
|
|
2382
|
+
let bootProvider: ProviderName;
|
|
2383
|
+
try {
|
|
2384
|
+
bootProvider = (await fetchResolvedEnv(apiUrl, apiKey, agentId)).resolvedProvider;
|
|
2385
|
+
} catch (err) {
|
|
2386
|
+
console.warn(`[runner] fetchResolvedEnv failed at boot, falling back to env: ${err}`);
|
|
2387
|
+
bootProvider = resolveHarnessProvider({}, process.env);
|
|
2388
|
+
}
|
|
2389
|
+
console.log(`[runner] Resolved HARNESS_PROVIDER: ${bootProvider}`);
|
|
2390
|
+
|
|
2391
|
+
// Create provider adapter using the resolved value. `let` so the poll-loop
|
|
2392
|
+
// reconciliation block (Section 4) can swap it live when an operator changes
|
|
2393
|
+
// HARNESS_PROVIDER in swarm_config — call sites read the current binding.
|
|
2394
|
+
let adapter = createProviderAdapter(bootProvider);
|
|
2316
2395
|
|
|
2317
2396
|
// Configure HTTP-based template resolution (workers resolve via API, not local DB)
|
|
2318
2397
|
if (process.env.API_KEY) {
|
|
@@ -2383,9 +2462,11 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
2383
2462
|
// Slack context for current task (gates Slack instructions in prompt)
|
|
2384
2463
|
let currentTaskSlackContext: BasePromptArgs["slackContext"] | undefined;
|
|
2385
2464
|
|
|
2386
|
-
// Generate base prompt (identity fields injected after profile fetch below)
|
|
2387
|
-
|
|
2465
|
+
// Generate base prompt (identity fields injected after profile fetch below).
|
|
2466
|
+
// Traits are read fresh on each call so a live adapter swap (Section 4)
|
|
2467
|
+
// produces a prompt matching the new provider's capabilities.
|
|
2388
2468
|
const buildSystemPrompt = async () => {
|
|
2469
|
+
const { traits } = adapter;
|
|
2389
2470
|
return getBasePrompt({
|
|
2390
2471
|
role,
|
|
2391
2472
|
agentId,
|
|
@@ -2461,7 +2542,6 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
2461
2542
|
console.log(`[${role}] Total system prompt length: ${resolvedSystemPrompt.length} chars`);
|
|
2462
2543
|
|
|
2463
2544
|
const isAiLoop = opts.aiLoop || process.env.AI_LOOP === "true";
|
|
2464
|
-
const apiKey = process.env.API_KEY || "";
|
|
2465
2545
|
|
|
2466
2546
|
// Constants for polling
|
|
2467
2547
|
const PollIntervalMs = 2000; // 2 seconds between polls
|
|
@@ -2509,11 +2589,27 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
2509
2589
|
const state: RunnerState = {
|
|
2510
2590
|
activeTasks: new Map(),
|
|
2511
2591
|
maxConcurrent,
|
|
2592
|
+
harnessProvider: bootProvider,
|
|
2512
2593
|
};
|
|
2513
2594
|
|
|
2514
2595
|
// Track tasks already signaled for cancellation to avoid repeated SIGTERM
|
|
2515
2596
|
const cancelledSignaled = new Set<string>();
|
|
2516
2597
|
|
|
2598
|
+
// Migration 055 — cache the harness_provider value used when we last
|
|
2599
|
+
// built a `cred_status` snapshot. Re-runs the post-task check only when
|
|
2600
|
+
// the resolved provider changes. Section 4 of the swarm_config-overrides-
|
|
2601
|
+
// HARNESS_PROVIDER work makes this dynamic: state.harnessProvider is
|
|
2602
|
+
// reconciled below from `swarm_config`, so an operator's change reaches
|
|
2603
|
+
// here without a worker restart.
|
|
2604
|
+
let cachedCredHarnessProvider: string | null = null;
|
|
2605
|
+
|
|
2606
|
+
// Throttle for live HARNESS_PROVIDER reconciliation. Each reconciliation
|
|
2607
|
+
// calls `fetchResolvedEnv` which also re-resolves credential pools — we
|
|
2608
|
+
// don't want that on every 2s poll. 10s gives operator changes a near-
|
|
2609
|
+
// immediate effect from a UX perspective without hammering the API.
|
|
2610
|
+
let lastHarnessReconcileAt = 0;
|
|
2611
|
+
const HARNESS_RECONCILE_INTERVAL_MS = 10_000;
|
|
2612
|
+
|
|
2517
2613
|
// Create API config for ping/close
|
|
2518
2614
|
const apiConfig: ApiConfig = { apiUrl, apiKey, agentId };
|
|
2519
2615
|
|
|
@@ -2535,6 +2631,7 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
2535
2631
|
isLead,
|
|
2536
2632
|
capabilities,
|
|
2537
2633
|
maxTasks: maxConcurrent,
|
|
2634
|
+
harnessProvider: bootProvider,
|
|
2538
2635
|
});
|
|
2539
2636
|
console.log(`[${role}] Registered as "${agentName}" (ID: ${agentId})`);
|
|
2540
2637
|
} catch (error) {
|
|
@@ -2546,37 +2643,59 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
2546
2643
|
// the old bash-level fail-fast in `docker-entrypoint.sh` — the worker is
|
|
2547
2644
|
// already registered (visible to the dashboard) and self-heals once
|
|
2548
2645
|
// creds appear in `swarm_config`. See plans/2026-05-06-worker-credential-safe-loop.md.
|
|
2549
|
-
|
|
2550
|
-
|
|
2551
|
-
|
|
2552
|
-
|
|
2553
|
-
|
|
2554
|
-
|
|
2555
|
-
|
|
2556
|
-
|
|
2557
|
-
|
|
2558
|
-
|
|
2559
|
-
|
|
2560
|
-
|
|
2561
|
-
|
|
2562
|
-
|
|
2563
|
-
|
|
2564
|
-
|
|
2565
|
-
|
|
2566
|
-
|
|
2567
|
-
|
|
2568
|
-
|
|
2569
|
-
|
|
2570
|
-
//
|
|
2571
|
-
|
|
2572
|
-
|
|
2573
|
-
|
|
2574
|
-
|
|
2575
|
-
|
|
2576
|
-
|
|
2577
|
-
|
|
2646
|
+
//
|
|
2647
|
+
// CRED_CHECK_DISABLE=1 opts out entirely: the worker trusts the operator
|
|
2648
|
+
// and starts polling immediately, with a NULL `cred_status` row that the
|
|
2649
|
+
// dashboard surfaces as "unreported."
|
|
2650
|
+
const harnessProvider = bootProvider;
|
|
2651
|
+
cachedCredHarnessProvider = harnessProvider;
|
|
2652
|
+
if (isCredCheckDisabled(process.env)) {
|
|
2653
|
+
console.log(`[${role}] CRED_CHECK_DISABLE=1, skipping credential checks`);
|
|
2654
|
+
} else {
|
|
2655
|
+
try {
|
|
2656
|
+
await awaitCredentials({
|
|
2657
|
+
provider: harnessProvider,
|
|
2658
|
+
refreshEnv: async () => {
|
|
2659
|
+
const { env } = await fetchResolvedEnv(apiUrl, apiKey, agentId);
|
|
2660
|
+
return env;
|
|
2661
|
+
},
|
|
2662
|
+
onTick: (status) => {
|
|
2663
|
+
// Best-effort status report — the dispatcher uses it to route
|
|
2664
|
+
// around blocked agents. Failures are non-fatal (the wait loop
|
|
2665
|
+
// already swallows onTick exceptions). We do NOT include
|
|
2666
|
+
// `cred_status` here — the live test runs once the worker is
|
|
2667
|
+
// ready (below), and intermediate ticks are presence-only.
|
|
2668
|
+
fetch(`${apiUrl}/api/agents/${encodeURIComponent(agentId)}/credential-status`, {
|
|
2669
|
+
method: "PUT",
|
|
2670
|
+
headers: {
|
|
2671
|
+
Authorization: `Bearer ${apiKey}`,
|
|
2672
|
+
"X-Agent-ID": agentId,
|
|
2673
|
+
"Content-Type": "application/json",
|
|
2674
|
+
},
|
|
2675
|
+
body: JSON.stringify({ ready: status.ready, missing: status.missing }),
|
|
2676
|
+
}).catch(() => {
|
|
2677
|
+
// Swallowed — Phase 2 wait loop logs every tick anyway.
|
|
2678
|
+
});
|
|
2679
|
+
},
|
|
2680
|
+
});
|
|
2681
|
+
} catch (err) {
|
|
2682
|
+
if (err instanceof BootMaxWaitExceededError) {
|
|
2683
|
+
console.error(`[${role}] ${err.message}`);
|
|
2684
|
+
process.exit(EX_CONFIG);
|
|
2685
|
+
}
|
|
2686
|
+
throw err;
|
|
2687
|
+
}
|
|
2688
|
+
|
|
2689
|
+
// Migration 055: build the full snapshot (presence + live test) once
|
|
2690
|
+
// creds are ready and POST it to the agent row. Status endpoint reads
|
|
2691
|
+
// this instead of running predicates server-side.
|
|
2692
|
+
try {
|
|
2693
|
+
const snapshot = await buildCredStatusReport(harnessProvider, process.env, {}, "boot");
|
|
2694
|
+
await reportCredStatus(apiUrl, apiKey, agentId, snapshot);
|
|
2695
|
+
} catch (err) {
|
|
2696
|
+
// Non-fatal — worker proceeds even if reporting fails.
|
|
2697
|
+
console.warn(`[${role}] cred_status boot report failed (non-fatal): ${err}`);
|
|
2578
2698
|
}
|
|
2579
|
-
throw err;
|
|
2580
2699
|
}
|
|
2581
2700
|
|
|
2582
2701
|
// Clean up any stale active sessions from previous runs (crash recovery)
|
|
@@ -3009,7 +3128,15 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
3009
3128
|
console.error(
|
|
3010
3129
|
`[${role}] Failed to spawn process for resumed task ${task.id.slice(0, 8)}: ${errMsg}`,
|
|
3011
3130
|
);
|
|
3012
|
-
await ensureTaskFinished(
|
|
3131
|
+
await ensureTaskFinished(
|
|
3132
|
+
apiConfig,
|
|
3133
|
+
role,
|
|
3134
|
+
task.id,
|
|
3135
|
+
1,
|
|
3136
|
+
`Spawn failed: ${errMsg}`,
|
|
3137
|
+
undefined,
|
|
3138
|
+
state.harnessProvider,
|
|
3139
|
+
);
|
|
3013
3140
|
continue;
|
|
3014
3141
|
}
|
|
3015
3142
|
|
|
@@ -3059,6 +3186,64 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
3059
3186
|
// Check for completed processes first and ensure tasks are marked as finished
|
|
3060
3187
|
await checkCompletedProcesses(state, role, apiConfig);
|
|
3061
3188
|
|
|
3189
|
+
// Live HARNESS_PROVIDER reconciliation. Re-fetches `swarm_config` (overlaid
|
|
3190
|
+
// on env) and swaps the adapter if the resolved provider changed —
|
|
3191
|
+
// typically because an operator PATCH'd /api/agents/:id/harness-provider
|
|
3192
|
+
// (which writes a swarm_config row) or upserted a config row directly.
|
|
3193
|
+
//
|
|
3194
|
+
// Safety: in-flight sessions hold their own `ProviderSession` references
|
|
3195
|
+
// and continue on the old adapter unaffected. New spawns (below) read
|
|
3196
|
+
// the current `adapter` binding and pick up the swap. `basePrompt` is
|
|
3197
|
+
// rebuilt because traits (and therefore prompt content) may differ across
|
|
3198
|
+
// providers.
|
|
3199
|
+
if (Date.now() - lastHarnessReconcileAt > HARNESS_RECONCILE_INTERVAL_MS) {
|
|
3200
|
+
lastHarnessReconcileAt = Date.now();
|
|
3201
|
+
try {
|
|
3202
|
+
const { resolvedProvider } = await fetchResolvedEnv(apiUrl, apiKey, agentId);
|
|
3203
|
+
if (resolvedProvider !== state.harnessProvider) {
|
|
3204
|
+
const previous = state.harnessProvider;
|
|
3205
|
+
console.log(
|
|
3206
|
+
`[${role}] [harness] Reconciling adapter: ${previous} → ${resolvedProvider}`,
|
|
3207
|
+
);
|
|
3208
|
+
try {
|
|
3209
|
+
adapter = createProviderAdapter(resolvedProvider);
|
|
3210
|
+
state.harnessProvider = resolvedProvider;
|
|
3211
|
+
basePrompt = await buildSystemPrompt();
|
|
3212
|
+
resolvedSystemPrompt = additionalSystemPrompt
|
|
3213
|
+
? `${basePrompt}\n\n${additionalSystemPrompt}`
|
|
3214
|
+
: basePrompt;
|
|
3215
|
+
// Force a fresh cred_status report below for the new provider.
|
|
3216
|
+
cachedCredHarnessProvider = null;
|
|
3217
|
+
console.log(
|
|
3218
|
+
`[${role}] [harness] Swapped to ${resolvedProvider} (basePrompt rebuilt: ${basePrompt.length} chars)`,
|
|
3219
|
+
);
|
|
3220
|
+
} catch (err) {
|
|
3221
|
+
console.warn(
|
|
3222
|
+
`[${role}] [harness] Failed to swap to ${resolvedProvider} (staying on ${previous}): ${err}`,
|
|
3223
|
+
);
|
|
3224
|
+
}
|
|
3225
|
+
}
|
|
3226
|
+
} catch (err) {
|
|
3227
|
+
console.warn(`[${role}] [harness] Reconcile fetch failed (non-fatal): ${err}`);
|
|
3228
|
+
}
|
|
3229
|
+
}
|
|
3230
|
+
|
|
3231
|
+
// Migration 055 — post-task credential refresh, cache-keyed on the
|
|
3232
|
+
// *resolved* harness_provider. Re-runs the snapshot when the provider
|
|
3233
|
+
// changes (boot, or after a live swap above) so the dashboard shows
|
|
3234
|
+
// up-to-date credential status for the active adapter.
|
|
3235
|
+
if (!isCredCheckDisabled(process.env)) {
|
|
3236
|
+
const currentHarness = state.harnessProvider;
|
|
3237
|
+
if (currentHarness !== cachedCredHarnessProvider) {
|
|
3238
|
+
cachedCredHarnessProvider = currentHarness;
|
|
3239
|
+
buildCredStatusReport(currentHarness, process.env, {}, "post_task")
|
|
3240
|
+
.then((snap) => reportCredStatus(apiUrl, apiKey, agentId, snap))
|
|
3241
|
+
.catch((err) =>
|
|
3242
|
+
console.warn(`[${role}] cred_status post_task report failed (non-fatal): ${err}`),
|
|
3243
|
+
);
|
|
3244
|
+
}
|
|
3245
|
+
}
|
|
3246
|
+
|
|
3062
3247
|
// Periodic VCS detection for running tasks (fire-and-forget, throttled per task)
|
|
3063
3248
|
const now = Date.now();
|
|
3064
3249
|
for (const [taskId, task] of state.activeTasks) {
|
|
@@ -3365,6 +3550,8 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
|
|
|
3365
3550
|
trigger.taskId,
|
|
3366
3551
|
1,
|
|
3367
3552
|
`Spawn failed: ${errMsg}`,
|
|
3553
|
+
undefined,
|
|
3554
|
+
state.harnessProvider,
|
|
3368
3555
|
);
|
|
3369
3556
|
}
|
|
3370
3557
|
continue;
|