@open-multi-agent/core 1.6.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +58 -21
  2. package/dist/agent/pool.d.ts +1 -1
  3. package/dist/agent/pool.d.ts.map +1 -1
  4. package/dist/agent/pool.js +23 -1
  5. package/dist/agent/pool.js.map +1 -1
  6. package/dist/agent/runner.d.ts.map +1 -1
  7. package/dist/agent/runner.js +37 -7
  8. package/dist/agent/runner.js.map +1 -1
  9. package/dist/cli/oma.js +1 -1
  10. package/dist/cli/oma.js.map +1 -1
  11. package/dist/dashboard/render-team-run-dashboard.d.ts.map +1 -1
  12. package/dist/dashboard/render-team-run-dashboard.js +5 -1
  13. package/dist/dashboard/render-team-run-dashboard.js.map +1 -1
  14. package/dist/index.d.ts +2 -1
  15. package/dist/index.d.ts.map +1 -1
  16. package/dist/index.js +1 -0
  17. package/dist/index.js.map +1 -1
  18. package/dist/llm/bedrock.d.ts +1 -1
  19. package/dist/llm/bedrock.d.ts.map +1 -1
  20. package/dist/llm/bedrock.js +58 -25
  21. package/dist/llm/bedrock.js.map +1 -1
  22. package/dist/llm/minimax.d.ts +2 -2
  23. package/dist/llm/minimax.js +2 -2
  24. package/dist/memory/checkpoint.d.ts +28 -0
  25. package/dist/memory/checkpoint.d.ts.map +1 -0
  26. package/dist/memory/checkpoint.js +95 -0
  27. package/dist/memory/checkpoint.js.map +1 -0
  28. package/dist/memory/shared.d.ts +25 -1
  29. package/dist/memory/shared.d.ts.map +1 -1
  30. package/dist/memory/shared.js +74 -1
  31. package/dist/memory/shared.js.map +1 -1
  32. package/dist/orchestrator/orchestrator.d.ts +53 -18
  33. package/dist/orchestrator/orchestrator.d.ts.map +1 -1
  34. package/dist/orchestrator/orchestrator.js +822 -93
  35. package/dist/orchestrator/orchestrator.js.map +1 -1
  36. package/dist/task/queue.d.ts +18 -1
  37. package/dist/task/queue.d.ts.map +1 -1
  38. package/dist/task/queue.js +100 -0
  39. package/dist/task/queue.js.map +1 -1
  40. package/dist/task/task.d.ts +4 -1
  41. package/dist/task/task.d.ts.map +1 -1
  42. package/dist/task/task.js +3 -0
  43. package/dist/task/task.js.map +1 -1
  44. package/dist/types.d.ts +262 -4
  45. package/dist/types.d.ts.map +1 -1
  46. package/package.json +4 -3
@@ -49,7 +49,10 @@ import { registerBuiltInTools } from '../tool/built-in/index.js';
49
49
  import { defaultWorkspaceDir } from '../tool/built-in/path-safety.js';
50
50
  import { Team } from '../team/team.js';
51
51
  import { TaskQueue } from '../task/queue.js';
52
+ import { Checkpoint } from '../memory/checkpoint.js';
53
+ import { InMemoryStore } from '../memory/store.js';
52
54
  import { createTask, validateTaskDependencies } from '../task/task.js';
55
+ import { extractJSON, validateOutput } from '../agent/structured-output.js';
53
56
  import { Scheduler } from './scheduler.js';
54
57
  import { TokenBudgetExceededError } from '../errors.js';
55
58
  import { extractKeywords, keywordScore } from '../utils/keywords.js';
@@ -195,6 +198,23 @@ function buildAgent(config, toolRegistration) {
195
198
  });
196
199
  return new Agent(config, registry, executor);
197
200
  }
201
+ /**
202
+ * Apply the orchestrator's {@link OrchestratorConfig.defaultToolPreset} as a
203
+ * fallback grant for an agent that declares neither `tools` nor `toolPreset`.
204
+ *
205
+ * Built-in tools are opt-in (default-deny): an agent with no grant resolves to
206
+ * zero built-in tools. This fills that gap when the orchestrator opts in to a
207
+ * default. Per-agent grants always win — the default never widens an agent that
208
+ * already declares `tools` or `toolPreset`.
209
+ */
210
+ function applyDefaultToolPreset(config, defaultToolPreset) {
211
+ if (defaultToolPreset === undefined
212
+ || config.tools !== undefined
213
+ || config.toolPreset !== undefined) {
214
+ return config;
215
+ }
216
+ return { ...config, toolPreset: defaultToolPreset };
217
+ }
198
218
  /** Promise-based delay. */
199
219
  function sleep(ms) {
200
220
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -274,6 +294,53 @@ export async function executeWithRetry(run, task, onRetry, delayFn = sleep) {
274
294
  toolCalls: [],
275
295
  };
276
296
  }
297
+ /**
298
+ * Resolve a parsed task spec's `verify` field into a full
299
+ * {@link ConsensusVerifyOptions} (or `undefined` when no verify should run).
300
+ *
301
+ * - Full `ConsensusVerifyOptions` (already has `judges`): used as-is.
302
+ * - `true` or `CoordinatorVerifySpec` (no `judges`): merged with
303
+ * `verifyJudges` when provided; ignored when `verifyJudges` is absent.
304
+ * - `undefined`: no verify.
305
+ */
306
+ function resolveVerify(spec, verifyJudges) {
307
+ if (spec === undefined)
308
+ return undefined;
309
+ if (spec !== true && 'judges' in spec)
310
+ return spec;
311
+ if (!verifyJudges || verifyJudges.length === 0)
312
+ return undefined;
313
+ const partial = spec === true ? {} : spec;
314
+ return {
315
+ judges: verifyJudges,
316
+ ...(partial.mode !== undefined ? { mode: partial.mode } : {}),
317
+ ...(partial.quorum !== undefined ? { quorum: partial.quorum } : {}),
318
+ ...(partial.maxRounds !== undefined ? { maxRounds: partial.maxRounds } : {}),
319
+ ...(partial.onDissent !== undefined ? { onDissent: partial.onDissent } : {}),
320
+ };
321
+ }
322
+ /**
323
+ * Parse the coordinator-emitted `verify` field on a task JSON object.
324
+ * Accepts `true` (use all defaults) or a partial object with `mode`,
325
+ * `quorum`, `maxRounds`, and/or `onDissent`. Returns `undefined` for any
326
+ * other value so missing / null / unrecognised values are ignored safely.
327
+ */
328
+ function parseCoordinatorVerify(raw) {
329
+ if (raw === true)
330
+ return true;
331
+ if (typeof raw !== 'object' || raw === null)
332
+ return undefined;
333
+ const obj = raw;
334
+ const mode = obj['mode'] === 'refute' || obj['mode'] === 'lens' ? obj['mode'] : undefined;
335
+ const quorum = typeof obj['quorum'] === 'number' && obj['quorum'] >= 1 ? Math.floor(obj['quorum']) : undefined;
336
+ const maxRounds = typeof obj['maxRounds'] === 'number' && obj['maxRounds'] >= 1 ? Math.floor(obj['maxRounds']) : undefined;
337
+ const onDissent = obj['onDissent'] === 'revise' || obj['onDissent'] === 'reject' || obj['onDissent'] === 'keep'
338
+ ? obj['onDissent']
339
+ : undefined;
340
+ if (mode === undefined && quorum === undefined && maxRounds === undefined && onDissent === undefined)
341
+ return true;
342
+ return { mode, quorum, maxRounds, onDissent };
343
+ }
277
344
  /**
278
345
  * Attempt to extract a JSON array of task specs from the coordinator's raw
279
346
  * output. The coordinator is prompted to emit JSON inside a ```json … ``` fence
@@ -314,6 +381,11 @@ function parseTaskSpecs(raw) {
314
381
  maxRetries: typeof obj['maxRetries'] === 'number' ? obj['maxRetries'] : undefined,
315
382
  retryDelayMs: typeof obj['retryDelayMs'] === 'number' ? obj['retryDelayMs'] : undefined,
316
383
  retryBackoff: typeof obj['retryBackoff'] === 'number' ? obj['retryBackoff'] : undefined,
384
+ role: typeof obj['role'] === 'string' ? obj['role'] : undefined,
385
+ priority: obj['priority'] === 'low' || obj['priority'] === 'normal' || obj['priority'] === 'high' || obj['priority'] === 'critical'
386
+ ? obj['priority']
387
+ : undefined,
388
+ verify: parseCoordinatorVerify(obj['verify']),
317
389
  });
318
390
  }
319
391
  return specs.length > 0 ? specs : null;
@@ -322,6 +394,47 @@ function parseTaskSpecs(raw) {
322
394
  return null;
323
395
  }
324
396
  }
397
+ function routeMatches(policy, selection) {
398
+ if (!policy)
399
+ return undefined;
400
+ const task = selection.task;
401
+ for (const rule of policy.rules) {
402
+ const match = rule.match;
403
+ if (match.phase !== undefined && match.phase !== selection.phase)
404
+ continue;
405
+ if (match.agent !== undefined && match.agent !== selection.agent)
406
+ continue;
407
+ if (match.taskRole !== undefined && match.taskRole !== task?.role)
408
+ continue;
409
+ if (match.taskPriority !== undefined && match.taskPriority !== task?.priority)
410
+ continue;
411
+ if (match.leaf !== undefined && match.leaf !== selection.leaf)
412
+ continue;
413
+ if (match.hasDependencies !== undefined && match.hasDependencies !== ((task?.dependsOn?.length ?? 0) > 0))
414
+ continue;
415
+ return rule.route;
416
+ }
417
+ return undefined;
418
+ }
419
+ function withModelRoute(config, route) {
420
+ if (!route)
421
+ return config;
422
+ return {
423
+ ...config,
424
+ model: route.model,
425
+ provider: route.provider ?? config.provider,
426
+ baseURL: route.baseURL ?? config.baseURL,
427
+ apiKey: route.apiKey ?? config.apiKey,
428
+ region: route.region ?? config.region,
429
+ };
430
+ }
431
+ function isLeafTask(task, tasks) {
432
+ for (const candidate of tasks) {
433
+ if (candidate.dependsOn?.includes(task.id))
434
+ return false;
435
+ }
436
+ return true;
437
+ }
325
438
  function buildRevealCoordinatorLines(revealContext, assignee) {
326
439
  return [
327
440
  '## Team context',
@@ -376,13 +489,19 @@ function buildTaskAgentTeamInfo(ctx, taskId, traceBase, delegationDepth, delegat
376
489
  }
377
490
  // Apply orchestrator-level defaults just like buildPool, then construct a
378
491
  // one-shot Agent for this delegation only.
379
- const effective = {
492
+ const route = routeMatches(ctx.modelRouting, {
493
+ phase: 'delegated',
494
+ agent: targetAgent,
495
+ task: ctx.taskById.get(taskId),
496
+ leaf: ctx.taskLeafById.get(taskId),
497
+ });
498
+ const effective = withModelRoute(applyDefaultToolPreset({
380
499
  ...targetConfig,
381
500
  provider: targetConfig.provider ?? ctx.config.defaultProvider,
382
501
  baseURL: targetConfig.baseURL ?? ctx.config.defaultBaseURL,
383
502
  apiKey: targetConfig.apiKey ?? ctx.config.defaultApiKey,
384
503
  cwd: targetConfig.cwd === undefined ? ctx.config.defaultCwd : targetConfig.cwd,
385
- };
504
+ }, ctx.config.defaultToolPreset), route);
386
505
  const tempAgent = buildAgent(effective, { includeDelegateTool: true });
387
506
  const nestedTeam = buildTaskAgentTeamInfo(ctx, taskId, traceBase, delegationDepth + 1, [...delegationChain, targetAgent]);
388
507
  const childOpts = {
@@ -404,6 +523,54 @@ function buildTaskAgentTeamInfo(ctx, taskId, traceBase, delegationDepth, delegat
404
523
  runDelegatedAgent,
405
524
  };
406
525
  }
526
+ async function saveRunCheckpoint(queue, ctx) {
527
+ const active = ctx.checkpoint;
528
+ if (!active)
529
+ return;
530
+ // Best-effort: a checkpoint write must never take down the run it protects.
531
+ // Both snapshot construction and the store write are guarded, so a failing
532
+ // store (e.g. a transient Redis/SQLite error) is surfaced via `onProgress`
533
+ // and the run continues — the next completed task retries the write.
534
+ const save = async () => {
535
+ const sharedMem = ctx.team.getSharedMemoryInstance();
536
+ const completedTaskResults = queue.getByStatus('completed').map((task) => ({
537
+ taskId: task.id,
538
+ ...(task.assignee !== undefined ? { assignee: task.assignee } : {}),
539
+ ...(task.result !== undefined ? { result: task.result } : {}),
540
+ }));
541
+ const snapshot = {
542
+ version: 1,
543
+ mode: active.mode,
544
+ createdAt: new Date().toISOString(),
545
+ ...(active.runId !== undefined ? { runId: active.runId } : {}),
546
+ ...(active.goal !== undefined ? { goal: active.goal } : {}),
547
+ queue: queue.snapshot(),
548
+ // When the checkpoint store IS the shared-memory store, the entries are
549
+ // already durable there — embedding a full snapshot on every task would
550
+ // be ~O(N^2) write volume. Persist only the turn counter (cheap) so TTL
551
+ // expiry stays correct; restore reads the entries straight from the store.
552
+ ...(sharedMem && !active.reusesSharedMemoryStore
553
+ ? { sharedMemory: await sharedMem.snapshot() }
554
+ : {}),
555
+ ...(sharedMem ? { turnCount: sharedMem.getTurnCount() } : {}),
556
+ completedTaskResults,
557
+ };
558
+ await active.manager.save(snapshot);
559
+ };
560
+ const nextSave = active.saveChain.catch(() => undefined).then(save);
561
+ // Keep the stored chain non-rejecting so a failed save never leaves an
562
+ // unhandled rejection or blocks the next checkpoint in the chain.
563
+ active.saveChain = nextSave.catch(() => undefined);
564
+ try {
565
+ await nextSave;
566
+ }
567
+ catch (error) {
568
+ ctx.config.onProgress?.({
569
+ type: 'error',
570
+ data: { kind: 'checkpoint_save_failed', error },
571
+ });
572
+ }
573
+ }
407
574
  /**
408
575
  * Execute all tasks in `queue` using agents in `pool`, respecting dependencies
409
576
  * and running independent tasks in parallel.
@@ -461,6 +628,18 @@ async function executeQueue(queue, ctx) {
461
628
  });
462
629
  return;
463
630
  }
631
+ const agentConfig = team.getAgent(assignee);
632
+ if (!agentConfig) {
633
+ const msg = `Agent "${assignee}" not found in team for task "${task.title}".`;
634
+ queue.fail(task.id, msg);
635
+ config.onProgress?.({
636
+ type: 'error',
637
+ task: task.id,
638
+ agent: assignee,
639
+ data: msg,
640
+ });
641
+ return;
642
+ }
464
643
  const agent = pool.get(assignee);
465
644
  if (!agent) {
466
645
  const msg = `Agent "${assignee}" not found in pool for task "${task.title}".`;
@@ -503,9 +682,22 @@ async function executeQueue(queue, ctx) {
503
682
  ...traceBase,
504
683
  team: buildTaskAgentTeamInfo(ctx, task.id, traceBase, 0, [assignee]),
505
684
  };
506
- const taskStartMs = Date.now();
507
- let retryCount = 0;
508
- const result = await executeWithRetry(() => pool.run(assignee, prompt, runOptions, config.onAgentStream
685
+ const workerRoute = routeMatches(ctx.modelRouting, {
686
+ phase: 'worker',
687
+ agent: assignee,
688
+ task,
689
+ leaf: ctx.taskLeafById.get(task.id),
690
+ });
691
+ const routedAgent = workerRoute
692
+ ? buildAgent(withModelRoute(applyDefaultToolPreset({
693
+ ...agentConfig,
694
+ provider: agentConfig.provider ?? config.defaultProvider,
695
+ baseURL: agentConfig.baseURL ?? config.defaultBaseURL,
696
+ apiKey: agentConfig.apiKey ?? config.defaultApiKey,
697
+ cwd: agentConfig.cwd === undefined ? config.defaultCwd : agentConfig.cwd,
698
+ }, config.defaultToolPreset), workerRoute), { includeDelegateTool: true })
699
+ : undefined;
700
+ const streamCallback = config.onAgentStream
509
701
  ? (event) => {
510
702
  if (config.onTrace) {
511
703
  const streamMs = Date.now();
@@ -522,7 +714,12 @@ async function executeQueue(queue, ctx) {
522
714
  }
523
715
  config.onAgentStream(assignee, event);
524
716
  }
525
- : undefined), task, (retryData) => {
717
+ : undefined;
718
+ const taskStartMs = Date.now();
719
+ let retryCount = 0;
720
+ const result = await executeWithRetry(() => routedAgent
721
+ ? pool.runEphemeral(routedAgent, prompt, runOptions, streamCallback)
722
+ : pool.run(assignee, prompt, runOptions, streamCallback), task, (retryData) => {
526
723
  retryCount++;
527
724
  config.onProgress?.({
528
725
  type: 'task_retry',
@@ -571,27 +768,38 @@ async function executeQueue(queue, ctx) {
571
768
  });
572
769
  }
573
770
  if (result.success) {
574
- // Persist result into shared memory so other agents can read it
575
771
  const sharedMem = team.getSharedMemoryInstance();
772
+ // Opt-in consensus verification runs *before* the task is finalised so the
773
+ // verified outcome (accepted → revised, rejected → original) flows into the
774
+ // queue, shared memory, progress events, and agentResults as one consistent
775
+ // result. Judge usage is charged to the same parent budget as the rest of the run.
776
+ let effective = result;
777
+ if (task.verify && !ctx.budgetExceededTriggered) {
778
+ effective = await runTaskVerify(task, assignee, result, sharedMem, ctx);
779
+ }
780
+ // Reflect the verified result in the per-task record the caller receives.
781
+ ctx.agentResults.set(`${assignee}:${task.id}`, effective);
782
+ // Persist result into shared memory so other agents can read it
576
783
  if (sharedMem) {
577
- await sharedMem.write(assignee, `task:${task.id}:result`, result.output);
784
+ await sharedMem.write(assignee, `task:${task.id}:result`, effective.output);
578
785
  // Advance the turn counter so any TTL-tagged entries written during
579
786
  // this task can be expired by subsequent reads.
580
787
  sharedMem.advanceTurn();
581
788
  }
582
- const completedTask = queue.complete(task.id, result.output);
789
+ const completedTask = queue.complete(task.id, effective.output);
583
790
  completedThisRound.push(completedTask);
791
+ await saveRunCheckpoint(queue, ctx);
584
792
  config.onProgress?.({
585
793
  type: 'task_complete',
586
794
  task: task.id,
587
795
  agent: assignee,
588
- data: result,
796
+ data: effective,
589
797
  });
590
798
  config.onProgress?.({
591
799
  type: 'agent_complete',
592
800
  agent: assignee,
593
801
  task: task.id,
594
- data: result,
802
+ data: effective,
595
803
  });
596
804
  }
597
805
  else {
@@ -691,6 +899,251 @@ async function buildTaskPrompt(task, team, queue, revealContext) {
691
899
  }
692
900
  return lines.join('\n');
693
901
  }
902
+ /** Skeptic framing applied to every judge (refute mode and lens-mode base). */
903
+ const DEFAULT_VERIFIER_INSTRUCTION = 'You are a rigorous skeptic reviewing a proposed answer to the question shown below. ' +
904
+ 'Judge the answer against what that question actually asks: hunt for errors, unsupported ' +
905
+ 'claims, gaps, and faulty reasoning, then decide whether it withstands scrutiny.';
906
+ /** Per-judge review angles used in `lens` mode (assigned round-robin by index). */
907
+ const CONSENSUS_LENSES = [
908
+ 'factual correctness and logical soundness',
909
+ 'completeness and coverage of the question',
910
+ 'edge cases, failure modes, and counterexamples',
911
+ 'clarity, precision, and freedom from ambiguity',
912
+ 'hidden assumptions and unstated premises',
913
+ 'evidence, citations, and verifiability',
914
+ ];
915
+ /** Verdict contract appended to every judge prompt. */
916
+ const VERDICT_INSTRUCTION = 'Respond ONLY with a JSON object {"accept": <true|false>, "critique": "<concise reason>"}. ' +
917
+ 'Set "accept" to true only if the answer withstands scrutiny; otherwise set it false ' +
918
+ 'and explain the problem in "critique".';
919
+ /** Apply orchestrator defaults to a consensus agent config, mirroring buildPool. */
920
+ function applyConsensusDefaults(config, defaults) {
921
+ return {
922
+ ...config,
923
+ provider: config.provider ?? defaults.defaultProvider,
924
+ baseURL: config.baseURL ?? defaults.defaultBaseURL,
925
+ apiKey: config.apiKey ?? defaults.defaultApiKey,
926
+ cwd: config.cwd === undefined ? defaults.defaultCwd : config.cwd,
927
+ };
928
+ }
929
+ /** Build the user prompt sent to a single judge, always including the original question. */
930
+ function buildJudgePrompt(p) {
931
+ let instruction;
932
+ if (p.judgePrompt !== undefined) {
933
+ instruction = typeof p.judgePrompt === 'function' ? p.judgePrompt(p.judge) : p.judgePrompt;
934
+ }
935
+ else if (p.mode === 'lens') {
936
+ const lens = CONSENSUS_LENSES[p.judgeIndex % CONSENSUS_LENSES.length];
937
+ instruction = `${DEFAULT_VERIFIER_INSTRUCTION}\nFocus specifically on: ${lens}. ` +
938
+ 'If that angle is irrelevant to this question, accept the answer rather than inventing objections.';
939
+ }
940
+ else {
941
+ instruction = DEFAULT_VERIFIER_INSTRUCTION;
942
+ }
943
+ return [
944
+ instruction,
945
+ '',
946
+ '## Question',
947
+ p.prompt,
948
+ '',
949
+ '## Proposed answer',
950
+ p.answer,
951
+ '',
952
+ '## Your verdict',
953
+ VERDICT_INSTRUCTION,
954
+ ].join('\n');
955
+ }
956
+ /** Build the proposer prompt for a revision round, feeding back the prior answer and the dissent. */
957
+ function buildRevisePrompt(prompt, answer, dissent) {
958
+ return [
959
+ prompt,
960
+ '',
961
+ '## Your previous answer',
962
+ answer,
963
+ '',
964
+ '## Reviewer critiques to address',
965
+ ...dissent.map((d) => `- ${d}`),
966
+ '',
967
+ 'Revise the previous answer to address every critique above. Respond with the improved answer only.',
968
+ ].join('\n');
969
+ }
970
+ /** Parse a judge's raw output into an accept/critique decision. */
971
+ function parseJudgeVerdict(output, verdictSchema) {
972
+ let parsed;
973
+ try {
974
+ parsed = extractJSON(output);
975
+ }
976
+ catch {
977
+ return { accept: false, critique: 'Judge output was not valid JSON.' };
978
+ }
979
+ if (verdictSchema) {
980
+ try {
981
+ validateOutput(verdictSchema, parsed);
982
+ }
983
+ catch (err) {
984
+ return { accept: false, critique: `Verdict failed schema validation: ${err instanceof Error ? err.message : String(err)}` };
985
+ }
986
+ }
987
+ const obj = (parsed && typeof parsed === 'object' ? parsed : {});
988
+ const accept = typeof obj['accept'] === 'boolean' ? obj['accept'] : false;
989
+ const critique = typeof obj['critique'] === 'string' && obj['critique']
990
+ ? obj['critique']
991
+ : accept ? '' : 'No critique provided.';
992
+ return { accept, critique };
993
+ }
994
+ /**
995
+ * Run the judge/refutation loop over a proposed answer: judges run sequentially
996
+ * (so quorum and budget can stop the rest), dissent is recorded to shared memory
997
+ * and trace, and `onDissent` decides whether to revise, reject, or keep.
998
+ */
999
+ async function runConsensusCore(params) {
1000
+ const { team, prompt, judges, mode, quorum, maxRounds, verdictSchema, onDissent, judgePrompt, budget, budgetBaseTokens, reviseProposer, defaults, onTrace, runId, } = params;
1001
+ const pool = params.pool ?? new AgentPool(Math.max(1, defaults.maxConcurrency));
1002
+ const sharedMem = team.getSharedMemoryInstance();
1003
+ let answer = params.initialAnswer;
1004
+ let usage = params.initialUsage;
1005
+ const dissent = [];
1006
+ let rounds = 0;
1007
+ let accepted = false;
1008
+ const overBudget = () => budget !== undefined && budgetBaseTokens + usage.input_tokens + usage.output_tokens > budget;
1009
+ const runEphemeral = (config, text) => pool.runEphemeral(buildAgent(applyConsensusDefaults(config, defaults)), text);
1010
+ // Proposer usage was already accumulated by the caller; bail before judging if it blew the budget.
1011
+ if (overBudget()) {
1012
+ return { answer, verdict: 'rejected', dissent, rounds, tokenUsage: usage };
1013
+ }
1014
+ let budgetHit = false;
1015
+ for (let round = 1; round <= maxRounds; round++) {
1016
+ rounds = round;
1017
+ let acceptCount = 0;
1018
+ const roundDissent = [];
1019
+ for (let j = 0; j < judges.length; j++) {
1020
+ const judge = judges[j];
1021
+ const judgeText = buildJudgePrompt({ judge: judge.name, answer, prompt, mode, judgeIndex: j, judgePrompt });
1022
+ const r = await runEphemeral(judge, judgeText);
1023
+ usage = addUsage(usage, r.tokenUsage);
1024
+ if (overBudget()) {
1025
+ budgetHit = true;
1026
+ break;
1027
+ }
1028
+ const verdict = parseJudgeVerdict(r.output, verdictSchema);
1029
+ // Trace every verdict (accept or dissent); shared memory records dissent only.
1030
+ if (onTrace) {
1031
+ const now = Date.now();
1032
+ emitTrace(onTrace, {
1033
+ type: 'consensus',
1034
+ runId: runId ?? '',
1035
+ agent: judge.name,
1036
+ round,
1037
+ accepted: verdict.accept,
1038
+ ...(verdict.accept ? {} : { dissent: verdict.critique }),
1039
+ startMs: now,
1040
+ endMs: now,
1041
+ durationMs: 0,
1042
+ });
1043
+ }
1044
+ if (verdict.accept) {
1045
+ acceptCount++;
1046
+ if (acceptCount >= quorum) {
1047
+ accepted = true;
1048
+ break;
1049
+ }
1050
+ }
1051
+ else {
1052
+ const labelled = `${judge.name}: ${verdict.critique}`;
1053
+ roundDissent.push(labelled);
1054
+ dissent.push(labelled);
1055
+ if (sharedMem) {
1056
+ await sharedMem.write(judge.name, `consensus:round:${round}:dissent`, verdict.critique);
1057
+ }
1058
+ }
1059
+ }
1060
+ if (budgetHit || accepted)
1061
+ break;
1062
+ // Round missed quorum. Revise (if rounds remain) or stop.
1063
+ if (onDissent === 'revise' && round < maxRounds && reviseProposer) {
1064
+ const r = await runEphemeral(reviseProposer, buildRevisePrompt(prompt, answer, roundDissent));
1065
+ usage = addUsage(usage, r.tokenUsage);
1066
+ if (r.success && r.output)
1067
+ answer = r.output;
1068
+ if (overBudget()) {
1069
+ budgetHit = true;
1070
+ break;
1071
+ }
1072
+ continue;
1073
+ }
1074
+ break;
1075
+ }
1076
+ const verdict = accepted || (!budgetHit && onDissent === 'keep') ? 'accepted' : 'rejected';
1077
+ return { answer, verdict, dissent, rounds, tokenUsage: usage };
1078
+ }
1079
+ /**
1080
+ * Run the per-task `verify` hook before a task is finalised: feed the task
1081
+ * result into the consensus loop, fold judge usage into the run's cumulative
1082
+ * budget, surface the verdict, and return the effective result — the accepted
1083
+ * revision when judges revise it, otherwise the original. The caller uses this
1084
+ * to finalise the task so the queue, shared memory, events, and agentResults
1085
+ * all agree on the verified outcome.
1086
+ */
1087
+ async function runTaskVerify(task, assignee, result, sharedMem, ctx) {
1088
+ const verify = task.verify;
1089
+ const { team, config } = ctx;
1090
+ const assigneeConfig = team.getAgents().find((a) => a.name === assignee);
1091
+ const consensus = await runConsensusCore({
1092
+ team,
1093
+ prompt: task.description,
1094
+ initialAnswer: result.output,
1095
+ initialUsage: ZERO_USAGE,
1096
+ budgetBaseTokens: ctx.cumulativeUsage.input_tokens + ctx.cumulativeUsage.output_tokens,
1097
+ judges: verify.judges,
1098
+ mode: verify.mode ?? 'refute',
1099
+ quorum: Math.min(verify.judges.length, Math.max(1, verify.quorum ?? Math.ceil(verify.judges.length / 2))),
1100
+ maxRounds: Math.max(1, verify.maxRounds ?? 2),
1101
+ verdictSchema: verify.verdictSchema,
1102
+ onDissent: verify.onDissent ?? 'revise',
1103
+ judgePrompt: verify.judgePrompt,
1104
+ budget: ctx.maxTokenBudget,
1105
+ reviseProposer: assigneeConfig,
1106
+ defaults: {
1107
+ defaultProvider: config.defaultProvider,
1108
+ defaultBaseURL: config.defaultBaseURL,
1109
+ defaultApiKey: config.defaultApiKey,
1110
+ defaultCwd: config.defaultCwd,
1111
+ maxConcurrency: config.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY,
1112
+ },
1113
+ onTrace: config.onTrace,
1114
+ ...(ctx.runId ? { runId: ctx.runId } : {}),
1115
+ });
1116
+ ctx.cumulativeUsage = addUsage(ctx.cumulativeUsage, consensus.tokenUsage);
1117
+ // Surface the verdict as a task-level outcome so downstream agents and the
1118
+ // final synthesis can see whether the result survived scrutiny.
1119
+ if (sharedMem) {
1120
+ const summary = consensus.verdict === 'accepted'
1121
+ ? 'accepted'
1122
+ : `rejected${consensus.dissent.length ? `: ${consensus.dissent.join('; ')}` : ''}`;
1123
+ await sharedMem.write(assignee, `task:${task.id}:verdict`, summary);
1124
+ }
1125
+ const total = ctx.cumulativeUsage.input_tokens + ctx.cumulativeUsage.output_tokens;
1126
+ if (!ctx.budgetExceededTriggered && ctx.maxTokenBudget !== undefined && total > ctx.maxTokenBudget) {
1127
+ ctx.budgetExceededTriggered = true;
1128
+ const err = new TokenBudgetExceededError('orchestrator', total, ctx.maxTokenBudget);
1129
+ ctx.budgetExceededReason = err.message;
1130
+ config.onProgress?.({
1131
+ type: 'budget_exceeded',
1132
+ agent: assignee,
1133
+ task: task.id,
1134
+ data: err,
1135
+ });
1136
+ }
1137
+ // Only an *accepted* revision supersedes the task result; a rejected revision is
1138
+ // recorded as dissent but the caller finalises with the original output. Judge
1139
+ // usage rolls into the per-task usage (mirrors how delegation usage rolls in).
1140
+ const useRevision = consensus.verdict === 'accepted' && consensus.answer && consensus.answer !== result.output;
1141
+ return {
1142
+ ...result,
1143
+ output: useRevision ? consensus.answer : result.output,
1144
+ tokenUsage: addUsage(result.tokenUsage, consensus.tokenUsage),
1145
+ };
1146
+ }
694
1147
  // ---------------------------------------------------------------------------
695
1148
  // OpenMultiAgent
696
1149
  // ---------------------------------------------------------------------------
@@ -703,6 +1156,7 @@ async function buildTaskPrompt(task, team, queue, revealContext) {
703
1156
  export class OpenMultiAgent {
704
1157
  config;
705
1158
  teams = new Map();
1159
+ fallbackCheckpointStore = new InMemoryStore();
706
1160
  completedTaskCount = 0;
707
1161
  /**
708
1162
  * @param config - Optional top-level configuration.
@@ -726,6 +1180,8 @@ export class OpenMultiAgent {
726
1180
  // disable the filesystem sandbox; a string sets a custom sandbox root.
727
1181
  defaultCwd: config.defaultCwd === undefined ? defaultWorkspaceDir() : config.defaultCwd,
728
1182
  maxTokenBudget: config.maxTokenBudget,
1183
+ defaultToolPreset: config.defaultToolPreset,
1184
+ checkpoint: config.checkpoint,
729
1185
  onApproval: config.onApproval,
730
1186
  onPlanReady: config.onPlanReady,
731
1187
  onAgentStream: config.onAgentStream,
@@ -770,14 +1226,14 @@ export class OpenMultiAgent {
770
1226
  */
771
1227
  async runAgent(config, prompt, options) {
772
1228
  const effectiveBudget = resolveTokenBudget(config.maxTokenBudget, this.config.maxTokenBudget);
773
- const effective = {
1229
+ const effective = applyDefaultToolPreset({
774
1230
  ...config,
775
1231
  provider: config.provider ?? this.config.defaultProvider,
776
1232
  baseURL: config.baseURL ?? this.config.defaultBaseURL,
777
1233
  apiKey: config.apiKey ?? this.config.defaultApiKey,
778
1234
  cwd: config.cwd === undefined ? this.config.defaultCwd : config.cwd,
779
1235
  maxTokenBudget: effectiveBudget,
780
- };
1236
+ }, this.config.defaultToolPreset);
781
1237
  const agent = buildAgent(effective);
782
1238
  this.config.onProgress?.({
783
1239
  type: 'agent_start',
@@ -858,14 +1314,14 @@ export class OpenMultiAgent {
858
1314
  // to avoid duplicate progress events and double completedTaskCount.
859
1315
  // Events are emitted here; counting is handled by buildTeamRunResult().
860
1316
  const effectiveBudget = resolveTokenBudget(bestAgent.maxTokenBudget, this.config.maxTokenBudget);
861
- const effective = {
1317
+ const effective = withModelRoute(applyDefaultToolPreset({
862
1318
  ...bestAgent,
863
1319
  provider: bestAgent.provider ?? this.config.defaultProvider,
864
1320
  baseURL: bestAgent.baseURL ?? this.config.defaultBaseURL,
865
1321
  apiKey: bestAgent.apiKey ?? this.config.defaultApiKey,
866
1322
  cwd: bestAgent.cwd === undefined ? this.config.defaultCwd : bestAgent.cwd,
867
1323
  maxTokenBudget: effectiveBudget,
868
- };
1324
+ }, this.config.defaultToolPreset), routeMatches(options?.modelRouting, { phase: 'short-circuit', agent: bestAgent.name }));
869
1325
  const agent = buildAgent(effective);
870
1326
  this.config.onProgress?.({
871
1327
  type: 'agent_start',
@@ -915,33 +1371,8 @@ export class OpenMultiAgent {
915
1371
  // ------------------------------------------------------------------
916
1372
  // Step 1: Coordinator decomposes goal into tasks
917
1373
  // ------------------------------------------------------------------
918
- const coordinatorConfig = {
919
- name: 'coordinator',
920
- model: coordinatorOverrides?.model ?? this.config.defaultModel,
921
- ...(coordinatorOverrides?.adapter !== undefined ? { adapter: coordinatorOverrides.adapter } : {}),
922
- provider: coordinatorOverrides?.provider ?? this.config.defaultProvider,
923
- baseURL: coordinatorOverrides?.baseURL ?? this.config.defaultBaseURL,
924
- apiKey: coordinatorOverrides?.apiKey ?? this.config.defaultApiKey,
925
- systemPrompt: this.buildCoordinatorPrompt(agentConfigs, coordinatorOverrides),
926
- maxTurns: coordinatorOverrides?.maxTurns ?? 3,
927
- maxTokens: coordinatorOverrides?.maxTokens,
928
- temperature: coordinatorOverrides?.temperature,
929
- topP: coordinatorOverrides?.topP,
930
- topK: coordinatorOverrides?.topK,
931
- minP: coordinatorOverrides?.minP,
932
- parallelToolCalls: coordinatorOverrides?.parallelToolCalls,
933
- frequencyPenalty: coordinatorOverrides?.frequencyPenalty,
934
- presencePenalty: coordinatorOverrides?.presencePenalty,
935
- extraBody: coordinatorOverrides?.extraBody,
936
- toolPreset: coordinatorOverrides?.toolPreset,
937
- tools: coordinatorOverrides?.tools,
938
- disallowedTools: coordinatorOverrides?.disallowedTools,
939
- cwd: coordinatorOverrides?.cwd === undefined
940
- ? this.config.defaultCwd
941
- : coordinatorOverrides.cwd,
942
- loopDetection: coordinatorOverrides?.loopDetection,
943
- timeoutMs: coordinatorOverrides?.timeoutMs,
944
- };
1374
+ const coordinatorBaseConfig = this.buildCoordinatorBaseConfig(coordinatorOverrides, agentConfigs, (options?.verifyJudges?.length ?? 0) > 0);
1375
+ const coordinatorConfig = withModelRoute(coordinatorBaseConfig, routeMatches(options?.modelRouting, { phase: 'coordinator', agent: 'coordinator' }));
945
1376
  const decompositionPrompt = this.buildDecompositionPrompt(goal, agentConfigs);
946
1377
  const coordinatorAgent = buildAgent(coordinatorConfig);
947
1378
  const runId = this.config.onTrace ? generateRunId() : undefined;
@@ -977,7 +1408,7 @@ export class OpenMultiAgent {
977
1408
  if (taskSpecs && taskSpecs.length > 0) {
978
1409
  // Map title-based dependsOn references to real task IDs so we can
979
1410
  // build the dependency graph before adding tasks to the queue.
980
- this.loadSpecsIntoQueue(taskSpecs, agentConfigs, queue);
1411
+ this.loadSpecsIntoQueue(taskSpecs, agentConfigs, queue, options?.verifyJudges);
981
1412
  }
982
1413
  else {
983
1414
  // Coordinator failed to produce structured output — fall back to
@@ -999,12 +1430,14 @@ export class OpenMultiAgent {
999
1430
  // Step 4: Build pool and execute
1000
1431
  // ------------------------------------------------------------------
1001
1432
  const pool = this.buildPool(agentConfigs);
1433
+ const activeCheckpoint = this.createActiveCheckpoint(team, options?.checkpoint ?? this.config.checkpoint, 'runTeam', goal);
1002
1434
  const ctx = {
1003
1435
  team,
1004
1436
  pool,
1005
1437
  scheduler,
1006
1438
  agentResults,
1007
1439
  config: this.config,
1440
+ ...(activeCheckpoint ? { checkpoint: activeCheckpoint } : {}),
1008
1441
  runId,
1009
1442
  abortSignal: options?.abortSignal,
1010
1443
  cumulativeUsage,
@@ -1020,6 +1453,9 @@ export class OpenMultiAgent {
1020
1453
  },
1021
1454
  }
1022
1455
  : {}),
1456
+ modelRouting: options?.modelRouting,
1457
+ taskById: new Map(queue.list().map((task) => [task.id, task])),
1458
+ taskLeafById: new Map(queue.list().map((task) => [task.id, isLeafTask(task, queue.list())])),
1023
1459
  };
1024
1460
  const planTasks = queue.list();
1025
1461
  const planReadyStartMs = Date.now();
@@ -1060,6 +1496,7 @@ export class OpenMultiAgent {
1060
1496
  maxRetries: task.maxRetries,
1061
1497
  retryDelayMs: task.retryDelayMs,
1062
1498
  retryBackoff: task.retryBackoff,
1499
+ verify: task.verify,
1063
1500
  metrics: undefined,
1064
1501
  }));
1065
1502
  this.config.onProgress?.({
@@ -1085,38 +1522,25 @@ export class OpenMultiAgent {
1085
1522
  maxRetries: task.maxRetries,
1086
1523
  retryDelayMs: task.retryDelayMs,
1087
1524
  retryBackoff: task.retryBackoff,
1525
+ verify: task.verify,
1088
1526
  metrics: taskMetrics.get(task.id),
1089
1527
  }));
1090
1528
  // ------------------------------------------------------------------
1091
1529
  // Step 5: Coordinator synthesises final result
1092
1530
  // ------------------------------------------------------------------
1093
- if (options?.abortSignal?.aborted) {
1094
- return this.buildTeamRunResult(agentResults, goal, taskRecords);
1095
- }
1096
- if (maxTokenBudget !== undefined
1097
- && cumulativeUsage.input_tokens + cumulativeUsage.output_tokens > maxTokenBudget) {
1531
+ const synthesis = await this.runCoordinatorSynthesis(team, queue, goal, coordinatorBaseConfig, {
1532
+ modelRouting: options?.modelRouting,
1533
+ runId,
1534
+ abortSignal: options?.abortSignal,
1535
+ cumulativeUsage,
1536
+ maxTokenBudget,
1537
+ });
1538
+ if (synthesis === null) {
1539
+ // Aborted or already over budget — return raw task outputs, no synthesis.
1098
1540
  return this.buildTeamRunResult(agentResults, goal, taskRecords);
1099
1541
  }
1100
- const synthesisPrompt = await this.buildSynthesisPrompt(goal, queue.list(), team);
1101
- const synthTraceOptions = this.config.onTrace
1102
- ? { onTrace: this.config.onTrace, runId: runId ?? '', traceAgent: 'coordinator' }
1103
- : undefined;
1104
- const synthesisResult = await coordinatorAgent.run(synthesisPrompt, synthTraceOptions);
1105
- agentResults.set('coordinator', synthesisResult);
1106
- cumulativeUsage = addUsage(cumulativeUsage, synthesisResult.tokenUsage);
1107
- if (maxTokenBudget !== undefined
1108
- && cumulativeUsage.input_tokens + cumulativeUsage.output_tokens > maxTokenBudget) {
1109
- this.config.onProgress?.({
1110
- type: 'budget_exceeded',
1111
- agent: 'coordinator',
1112
- data: new TokenBudgetExceededError('coordinator', cumulativeUsage.input_tokens + cumulativeUsage.output_tokens, maxTokenBudget),
1113
- });
1114
- }
1115
- this.config.onProgress?.({
1116
- type: 'agent_complete',
1117
- agent: 'coordinator',
1118
- data: synthesisResult,
1119
- });
1542
+ agentResults.set('coordinator', synthesis.result);
1543
+ cumulativeUsage = synthesis.cumulativeUsage;
1120
1544
  // Note: coordinator decompose and synthesis are internal meta-steps.
1121
1545
  // Only actual user tasks (non-coordinator keys) are counted in
1122
1546
  // buildTeamRunResult, so we do not increment completedTaskCount here.
@@ -1162,8 +1586,8 @@ export class OpenMultiAgent {
1162
1586
  *
1163
1587
  * Task IDs, dependencies, assignees, titles, and descriptions are used exactly
1164
1588
  * as stored in the artifact. This is intentionally execution-only; it does not
1165
- * synthesize a coordinator final answer and it does not implement durable
1166
- * checkpoints.
1589
+ * synthesize a coordinator final answer. Durable checkpoints are available
1590
+ * through the same opt-in `checkpoint` option used by `runTasks`.
1167
1591
  */
1168
1592
  async runFromPlan(team, plan, options) {
1169
1593
  if (plan.version !== 1) {
@@ -1178,6 +1602,63 @@ export class OpenMultiAgent {
1178
1602
  queue.addBatch(tasks);
1179
1603
  return this.executeExplicitTaskQueue(team, queue, options, plan.goal);
1180
1604
  }
1605
+ async restore(team, tasksOrOptions, maybeOptions) {
1606
+ const hasTaskSource = Array.isArray(tasksOrOptions) || this.isPlanArtifact(tasksOrOptions);
1607
+ const options = hasTaskSource ? maybeOptions : tasksOrOptions;
1608
+ const activeCheckpoint = this.createActiveCheckpoint(team, options?.checkpoint ?? this.config.checkpoint ?? true, 'runTasks', options?.goal);
1609
+ const snapshot = activeCheckpoint ? await activeCheckpoint.manager.loadLatest() : null;
1610
+ if (!snapshot) {
1611
+ if (Array.isArray(tasksOrOptions)) {
1612
+ const queue = new TaskQueue();
1613
+ this.loadSpecsIntoQueue(tasksOrOptions.map((t) => ({
1614
+ title: t.title,
1615
+ description: t.description,
1616
+ assignee: t.assignee,
1617
+ dependsOn: t.dependsOn,
1618
+ memoryScope: t.memoryScope,
1619
+ maxRetries: t.maxRetries,
1620
+ retryDelayMs: t.retryDelayMs,
1621
+ retryBackoff: t.retryBackoff,
1622
+ role: t.role,
1623
+ priority: t.priority,
1624
+ verify: t.verify,
1625
+ })), team.getAgents(), queue);
1626
+ return this.executeExplicitTaskQueue(team, queue, options, options?.goal, undefined, activeCheckpoint);
1627
+ }
1628
+ if (this.isPlanArtifact(tasksOrOptions)) {
1629
+ const queue = new TaskQueue();
1630
+ const tasks = this.tasksFromPlan(tasksOrOptions);
1631
+ const validation = validateTaskDependencies(tasks);
1632
+ if (!validation.valid) {
1633
+ throw new Error(`Invalid plan artifact: ${validation.errors.join(' ')}`);
1634
+ }
1635
+ queue.addBatch(tasks);
1636
+ return this.executeExplicitTaskQueue(team, queue, options, tasksOrOptions.goal ?? options?.goal, undefined, activeCheckpoint);
1637
+ }
1638
+ const queue = new TaskQueue();
1639
+ return this.executeExplicitTaskQueue(team, queue, options, options?.goal, undefined, activeCheckpoint);
1640
+ }
1641
+ const sharedMem = team.getSharedMemoryInstance();
1642
+ if (sharedMem && snapshot.sharedMemory) {
1643
+ await sharedMem.restore(snapshot.sharedMemory);
1644
+ }
1645
+ else if (sharedMem && snapshot.turnCount !== undefined) {
1646
+ // Reused-store checkpoint: entries are already in the store; only the
1647
+ // turn counter needs restoring so TTL expiry resumes correctly.
1648
+ sharedMem.setTurnCount(snapshot.turnCount);
1649
+ }
1650
+ const queue = TaskQueue.fromSnapshot(snapshot.queue, { resetInProgress: true });
1651
+ const agentResults = this.agentResultsFromCheckpoint(snapshot, queue);
1652
+ const checkpointForResume = activeCheckpoint
1653
+ ? {
1654
+ ...activeCheckpoint,
1655
+ mode: snapshot.mode,
1656
+ ...(snapshot.goal !== undefined ? { goal: snapshot.goal } : {}),
1657
+ ...(snapshot.runId !== undefined ? { runId: snapshot.runId } : {}),
1658
+ }
1659
+ : undefined;
1660
+ return this.executeExplicitTaskQueue(team, queue, options, snapshot.goal ?? options?.goal, agentResults, checkpointForResume, options?.coordinator);
1661
+ }
1181
1662
  /**
1182
1663
  * Run a team with an explicitly provided task list.
1183
1664
  *
@@ -1200,10 +1681,94 @@ export class OpenMultiAgent {
1200
1681
  maxRetries: t.maxRetries,
1201
1682
  retryDelayMs: t.retryDelayMs,
1202
1683
  retryBackoff: t.retryBackoff,
1684
+ role: t.role,
1685
+ priority: t.priority,
1686
+ verify: t.verify,
1203
1687
  })), agentConfigs, queue);
1204
1688
  return this.executeExplicitTaskQueue(team, queue, options);
1205
1689
  }
1206
1690
  // -------------------------------------------------------------------------
1691
+ // Consensus
1692
+ // -------------------------------------------------------------------------
1693
+ /**
1694
+ * Run a proposer→judge consensus over a single prompt.
1695
+ *
1696
+ * The proposer emits an answer; judges try to refute it over up to
1697
+ * `maxRounds`, exiting early once `quorum` accept. Proposer and judge token
1698
+ * usage all count against the orchestrator's `maxTokenBudget` — crossing it
1699
+ * stops issuing further judge calls, exactly like delegation and `runTasks`.
1700
+ */
1701
+ async runConsensus(team, prompt, options) {
1702
+ const proposers = Array.isArray(options.proposer) ? options.proposer : [options.proposer];
1703
+ if (proposers.length === 0) {
1704
+ throw new Error('runConsensus: at least one proposer is required.');
1705
+ }
1706
+ if (options.judges.length === 0) {
1707
+ throw new Error('runConsensus: at least one judge is required.');
1708
+ }
1709
+ const mode = options.mode ?? 'refute';
1710
+ const maxRounds = Math.max(1, options.maxRounds ?? 2);
1711
+ const quorum = Math.min(options.judges.length, Math.max(1, options.quorum ?? Math.ceil(options.judges.length / 2)));
1712
+ const onDissent = options.onDissent ?? 'revise';
1713
+ const budget = this.config.maxTokenBudget;
1714
+ const defaults = {
1715
+ defaultProvider: this.config.defaultProvider,
1716
+ defaultBaseURL: this.config.defaultBaseURL,
1717
+ defaultApiKey: this.config.defaultApiKey,
1718
+ defaultCwd: this.config.defaultCwd,
1719
+ maxConcurrency: this.config.maxConcurrency,
1720
+ };
1721
+ const pool = new AgentPool(Math.max(1, this.config.maxConcurrency));
1722
+ let usage = ZERO_USAGE;
1723
+ // Step 2: run proposer(s); accumulate usage and honour the budget before judging.
1724
+ const candidates = [];
1725
+ for (const proposerConfig of proposers) {
1726
+ const r = await pool.runEphemeral(buildAgent(applyConsensusDefaults(proposerConfig, defaults)), prompt);
1727
+ usage = addUsage(usage, r.tokenUsage);
1728
+ if (r.success && r.output)
1729
+ candidates.push(r.output);
1730
+ if (budget !== undefined && usage.input_tokens + usage.output_tokens > budget) {
1731
+ this.config.onProgress?.({
1732
+ type: 'budget_exceeded',
1733
+ agent: proposerConfig.name,
1734
+ data: new TokenBudgetExceededError(proposerConfig.name, usage.input_tokens + usage.output_tokens, budget),
1735
+ });
1736
+ return {
1737
+ answer: candidates.join('\n\n---\n\n'),
1738
+ verdict: 'rejected',
1739
+ dissent: [],
1740
+ rounds: 0,
1741
+ tokenUsage: usage,
1742
+ };
1743
+ }
1744
+ }
1745
+ // Every proposer failed or returned empty output: there is nothing to judge.
1746
+ // Bail with a rejected verdict so an empty answer can never come back accepted.
1747
+ if (candidates.length === 0) {
1748
+ return { answer: '', verdict: 'rejected', dissent: [], rounds: 0, tokenUsage: usage };
1749
+ }
1750
+ return runConsensusCore({
1751
+ team,
1752
+ prompt,
1753
+ initialAnswer: candidates.join('\n\n---\n\n'),
1754
+ initialUsage: usage,
1755
+ budgetBaseTokens: 0,
1756
+ judges: options.judges,
1757
+ mode,
1758
+ quorum,
1759
+ maxRounds,
1760
+ verdictSchema: options.verdictSchema,
1761
+ onDissent,
1762
+ judgePrompt: options.judgePrompt,
1763
+ budget,
1764
+ reviseProposer: proposers[0],
1765
+ defaults,
1766
+ onTrace: this.config.onTrace,
1767
+ runId: this.config.onTrace ? generateRunId() : undefined,
1768
+ pool,
1769
+ });
1770
+ }
1771
+ // -------------------------------------------------------------------------
1207
1772
  // Observability
1208
1773
  // -------------------------------------------------------------------------
1209
1774
  /**
@@ -1241,32 +1806,32 @@ export class OpenMultiAgent {
1241
1806
  // Private helpers
1242
1807
  // -------------------------------------------------------------------------
1243
1808
  /** Build the system prompt given to the coordinator agent. */
1244
- buildCoordinatorSystemPrompt(agents) {
1809
+ buildCoordinatorSystemPrompt(agents, hasVerifyJudges) {
1245
1810
  return [
1246
1811
  'You are a task coordinator responsible for decomposing high-level goals',
1247
1812
  'into concrete, actionable tasks and assigning them to the right team members.',
1248
1813
  '',
1249
1814
  this.buildCoordinatorRosterSection(agents),
1250
1815
  '',
1251
- this.buildCoordinatorOutputFormatSection(),
1816
+ this.buildCoordinatorOutputFormatSection(hasVerifyJudges),
1252
1817
  '',
1253
1818
  this.buildCoordinatorSynthesisSection(),
1254
1819
  ].join('\n');
1255
1820
  }
1256
1821
  /** Build coordinator system prompt with optional caller overrides. */
1257
- buildCoordinatorPrompt(agents, config) {
1822
+ buildCoordinatorPrompt(agents, config, hasVerifyJudges) {
1258
1823
  if (config?.systemPrompt) {
1259
1824
  return [
1260
1825
  config.systemPrompt,
1261
1826
  '',
1262
1827
  this.buildCoordinatorRosterSection(agents),
1263
1828
  '',
1264
- this.buildCoordinatorOutputFormatSection(),
1829
+ this.buildCoordinatorOutputFormatSection(hasVerifyJudges),
1265
1830
  '',
1266
1831
  this.buildCoordinatorSynthesisSection(),
1267
1832
  ].join('\n');
1268
1833
  }
1269
- const base = this.buildCoordinatorSystemPrompt(agents);
1834
+ const base = this.buildCoordinatorSystemPrompt(agents, hasVerifyJudges);
1270
1835
  if (!config?.instructions) {
1271
1836
  return base;
1272
1837
  }
@@ -1288,8 +1853,8 @@ export class OpenMultiAgent {
1288
1853
  ].join('\n');
1289
1854
  }
1290
1855
  /** Build the coordinator JSON output-format section. */
1291
- buildCoordinatorOutputFormatSection() {
1292
- return [
1856
+ buildCoordinatorOutputFormatSection(hasVerifyJudges) {
1857
+ const lines = [
1293
1858
  '## Output Format',
1294
1859
  'When asked to decompose a goal, respond ONLY with a JSON array of task objects.',
1295
1860
  'Each task must have:',
@@ -1297,17 +1862,12 @@ export class OpenMultiAgent {
1297
1862
  ' - "description": Full task description with context and expected output (string)',
1298
1863
  ' - "assignee": One of the agent names listed in the roster (string)',
1299
1864
  ' - "dependsOn": Array of titles of tasks this task depends on (string[], may be empty).',
1300
- '',
1301
- '## Dependency Guidance',
1302
- 'Prefer the minimum set of upstream tasks each assignee needs. When deciding dependsOn for agent X:',
1303
- ' 1. Use X\'s system prompt as the primary signal for what inputs it consumes.',
1304
- ' 2. Lean toward including a task as a dependency only when X\'s system prompt names or describes needing that kind of input.',
1305
- ' 3. Avoid adding a dependency just because the information "would be useful" or matches general best practice; if X\'s system prompt gives no indication it consumes that input, prefer to leave it out.',
1306
- ' 4. When uncertain, prefer fewer dependencies over more — extra parents cost parallelism and tokens.',
1307
- '',
1308
- 'Wrap the JSON in a ```json code fence.',
1309
- 'Do not include any text outside the code fence.',
1310
- ].join('\n');
1865
+ ];
1866
+ if (hasVerifyJudges) {
1867
+ lines.push(' - "verify": (optional) Set to true to apply consensus judge verification on this task\'s result.', ' Or set to an object with any of: "mode" ("refute"|"lens"), "quorum" (number),', ' "maxRounds" (number), "onDissent" ("revise"|"reject"|"keep").', ' Omit for tasks where a single agent\'s answer is sufficient.');
1868
+ }
1869
+ lines.push('', '## Dependency Guidance', 'Prefer the minimum set of upstream tasks each assignee needs. When deciding dependsOn for agent X:', ' 1. Use X\'s system prompt as the primary signal for what inputs it consumes.', ' 2. Lean toward including a task as a dependency only when X\'s system prompt names or describes needing that kind of input.', ' 3. Avoid adding a dependency just because the information "would be useful" or matches general best practice; if X\'s system prompt gives no indication it consumes that input, prefer to leave it out.', ' 4. When uncertain, prefer fewer dependencies over more — extra parents cost parallelism and tokens.', '', 'Wrap the JSON in a ```json code fence.', 'Do not include any text outside the code fence.');
1870
+ return lines.join('\n');
1311
1871
  }
1312
1872
  /** Build the coordinator synthesis guidance section. */
1313
1873
  buildCoordinatorSynthesisSection() {
@@ -1329,6 +1889,77 @@ export class OpenMultiAgent {
1329
1889
  'Return ONLY the JSON task array in a ```json code fence.',
1330
1890
  ].join('\n');
1331
1891
  }
1892
+ /**
1893
+ * Build the base coordinator {@link AgentConfig} shared by the decomposition
1894
+ * and synthesis passes. Falls back to orchestrator defaults for any field the
1895
+ * caller's {@link CoordinatorConfig} leaves unset.
1896
+ */
1897
+ buildCoordinatorBaseConfig(coordinatorOverrides, agentConfigs, hasVerifyJudges) {
1898
+ return {
1899
+ name: 'coordinator',
1900
+ model: coordinatorOverrides?.model ?? this.config.defaultModel,
1901
+ ...(coordinatorOverrides?.adapter !== undefined ? { adapter: coordinatorOverrides.adapter } : {}),
1902
+ provider: coordinatorOverrides?.provider ?? this.config.defaultProvider,
1903
+ baseURL: coordinatorOverrides?.baseURL ?? this.config.defaultBaseURL,
1904
+ apiKey: coordinatorOverrides?.apiKey ?? this.config.defaultApiKey,
1905
+ systemPrompt: this.buildCoordinatorPrompt(agentConfigs, coordinatorOverrides, hasVerifyJudges),
1906
+ maxTurns: coordinatorOverrides?.maxTurns ?? 3,
1907
+ maxTokens: coordinatorOverrides?.maxTokens,
1908
+ temperature: coordinatorOverrides?.temperature,
1909
+ topP: coordinatorOverrides?.topP,
1910
+ topK: coordinatorOverrides?.topK,
1911
+ minP: coordinatorOverrides?.minP,
1912
+ parallelToolCalls: coordinatorOverrides?.parallelToolCalls,
1913
+ frequencyPenalty: coordinatorOverrides?.frequencyPenalty,
1914
+ presencePenalty: coordinatorOverrides?.presencePenalty,
1915
+ extraBody: coordinatorOverrides?.extraBody,
1916
+ toolPreset: coordinatorOverrides?.toolPreset,
1917
+ tools: coordinatorOverrides?.tools,
1918
+ disallowedTools: coordinatorOverrides?.disallowedTools,
1919
+ cwd: coordinatorOverrides?.cwd === undefined
1920
+ ? this.config.defaultCwd
1921
+ : coordinatorOverrides.cwd,
1922
+ loopDetection: coordinatorOverrides?.loopDetection,
1923
+ timeoutMs: coordinatorOverrides?.timeoutMs,
1924
+ };
1925
+ }
1926
+ /**
1927
+ * Run the coordinator synthesis pass over completed task results. Returns the
1928
+ * synthesis result plus updated cumulative usage, or `null` when synthesis is
1929
+ * skipped (run aborted, or the token budget was already exhausted before the
1930
+ * pass). Emits `budget_exceeded` (when synthesis tips over budget) and
1931
+ * `agent_complete`, mirroring the inline `runTeam` path. Does not mutate
1932
+ * `agentResults` — the caller records the `'coordinator'` entry.
1933
+ */
1934
+ async runCoordinatorSynthesis(team, queue, goal, coordinatorBaseConfig, opts) {
1935
+ if (opts.abortSignal?.aborted)
1936
+ return null;
1937
+ if (opts.maxTokenBudget !== undefined
1938
+ && opts.cumulativeUsage.input_tokens + opts.cumulativeUsage.output_tokens > opts.maxTokenBudget) {
1939
+ return null;
1940
+ }
1941
+ const synthesisPrompt = await this.buildSynthesisPrompt(goal, queue.list(), team);
1942
+ const synthesisAgent = buildAgent(withModelRoute(coordinatorBaseConfig, routeMatches(opts.modelRouting, { phase: 'synthesis', agent: 'coordinator' })));
1943
+ const synthTraceOptions = this.config.onTrace
1944
+ ? { onTrace: this.config.onTrace, runId: opts.runId ?? '', traceAgent: 'coordinator' }
1945
+ : undefined;
1946
+ const result = await synthesisAgent.run(synthesisPrompt, synthTraceOptions);
1947
+ const cumulativeUsage = addUsage(opts.cumulativeUsage, result.tokenUsage);
1948
+ if (opts.maxTokenBudget !== undefined
1949
+ && cumulativeUsage.input_tokens + cumulativeUsage.output_tokens > opts.maxTokenBudget) {
1950
+ this.config.onProgress?.({
1951
+ type: 'budget_exceeded',
1952
+ agent: 'coordinator',
1953
+ data: new TokenBudgetExceededError('coordinator', cumulativeUsage.input_tokens + cumulativeUsage.output_tokens, opts.maxTokenBudget),
1954
+ });
1955
+ }
1956
+ this.config.onProgress?.({
1957
+ type: 'agent_complete',
1958
+ agent: 'coordinator',
1959
+ data: result,
1960
+ });
1961
+ return { result, cumulativeUsage };
1962
+ }
1332
1963
  /** Build the synthesis prompt shown to the coordinator after all tasks complete. */
1333
1964
  async buildSynthesisPrompt(goal, tasks, team) {
1334
1965
  const completedTasks = tasks.filter((t) => t.status === 'completed');
@@ -1379,18 +2010,20 @@ export class OpenMultiAgent {
1379
2010
  ...(task.retryBackoff !== undefined ? { retryBackoff: task.retryBackoff } : {}),
1380
2011
  }));
1381
2012
  }
1382
- async executeExplicitTaskQueue(team, queue, options, goal) {
2013
+ async executeExplicitTaskQueue(team, queue, options, goal, initialAgentResults, activeCheckpoint, coordinatorForSynthesis) {
1383
2014
  const agentConfigs = team.getAgents();
1384
2015
  const scheduler = new Scheduler('dependency-first');
1385
2016
  scheduler.autoAssign(queue, agentConfigs);
1386
2017
  const pool = this.buildPool(agentConfigs);
1387
- const agentResults = new Map();
2018
+ const agentResults = initialAgentResults ?? new Map();
2019
+ const checkpoint = activeCheckpoint ?? this.createActiveCheckpoint(team, options?.checkpoint ?? this.config.checkpoint, 'runTasks', goal);
1388
2020
  const ctx = {
1389
2021
  team,
1390
2022
  pool,
1391
2023
  scheduler,
1392
2024
  agentResults,
1393
2025
  config: this.config,
2026
+ ...(checkpoint ? { checkpoint } : {}),
1394
2027
  runId: this.config.onTrace ? generateRunId() : undefined,
1395
2028
  abortSignal: options?.abortSignal,
1396
2029
  cumulativeUsage: ZERO_USAGE,
@@ -1398,8 +2031,50 @@ export class OpenMultiAgent {
1398
2031
  budgetExceededTriggered: false,
1399
2032
  budgetExceededReason: undefined,
1400
2033
  taskMetrics: new Map(),
2034
+ modelRouting: options?.modelRouting,
2035
+ taskById: new Map(queue.list().map((task) => [task.id, task])),
2036
+ taskLeafById: new Map(queue.list().map((task) => [task.id, isLeafTask(task, queue.list())])),
1401
2037
  };
1402
2038
  await executeQueue(queue, ctx);
2039
+ // A resumed `runTeam` re-runs the coordinator synthesis so the restored
2040
+ // result matches a fresh `runTeam` (a synthesized final answer, not raw
2041
+ // per-task outputs). Best-effort: a missing/unusable coordinator config or
2042
+ // a failing synthesis call must not discard the recovered work — on failure
2043
+ // we surface `synthesis_failed` and fall back to raw outputs.
2044
+ if (checkpoint?.mode === 'runTeam' && goal !== undefined) {
2045
+ try {
2046
+ const coordinatorBaseConfig = this.buildCoordinatorBaseConfig(coordinatorForSynthesis, agentConfigs, false);
2047
+ const synthesis = await this.runCoordinatorSynthesis(team, queue, goal, coordinatorBaseConfig, {
2048
+ modelRouting: options?.modelRouting,
2049
+ runId: ctx.runId,
2050
+ abortSignal: options?.abortSignal,
2051
+ cumulativeUsage: ctx.cumulativeUsage,
2052
+ maxTokenBudget: ctx.maxTokenBudget,
2053
+ });
2054
+ if (synthesis !== null && synthesis.result.success) {
2055
+ agentResults.set('coordinator', synthesis.result);
2056
+ ctx.cumulativeUsage = synthesis.cumulativeUsage;
2057
+ }
2058
+ else if (synthesis !== null) {
2059
+ // Synthesis ran but the coordinator agent failed (e.g. the LLM call
2060
+ // errored). Keep the recovered task outputs and surface the failure
2061
+ // rather than attaching a failed answer under `'coordinator'`.
2062
+ this.config.onProgress?.({
2063
+ type: 'error',
2064
+ data: {
2065
+ kind: 'synthesis_failed',
2066
+ error: new Error(synthesis.result.output || 'coordinator synthesis failed'),
2067
+ },
2068
+ });
2069
+ }
2070
+ }
2071
+ catch (error) {
2072
+ this.config.onProgress?.({
2073
+ type: 'error',
2074
+ data: { kind: 'synthesis_failed', error },
2075
+ });
2076
+ }
2077
+ }
1403
2078
  const taskRecords = queue.list().map((task) => ({
1404
2079
  id: task.id,
1405
2080
  title: task.title,
@@ -1411,17 +2086,68 @@ export class OpenMultiAgent {
1411
2086
  maxRetries: task.maxRetries,
1412
2087
  retryDelayMs: task.retryDelayMs,
1413
2088
  retryBackoff: task.retryBackoff,
2089
+ verify: task.verify,
1414
2090
  metrics: ctx.taskMetrics.get(task.id),
1415
2091
  }));
1416
2092
  return this.buildTeamRunResult(agentResults, goal, taskRecords);
1417
2093
  }
2094
+ createActiveCheckpoint(team, config, mode, goal) {
2095
+ if (config === undefined || config === false)
2096
+ return undefined;
2097
+ const options = config === true ? {} : config;
2098
+ if (options.enabled === false)
2099
+ return undefined;
2100
+ // The instance-level fallback store is shared across every run on this
2101
+ // orchestrator, so concurrent runs would overwrite each other at the
2102
+ // default checkpoint key. Require a `runId` (or an explicit `key`/`store`)
2103
+ // before falling back, so each run resolves to a distinct, resumable key.
2104
+ const sharedStore = team.getSharedMemory();
2105
+ const explicitStore = options.store ?? sharedStore;
2106
+ if (!explicitStore && options.runId === undefined && options.key === undefined) {
2107
+ throw new Error('Checkpoint requires a `runId` (or an explicit `store`/`key`) when the team has no ' +
2108
+ 'shared-memory store. Without one, concurrent runs would share the fallback store and ' +
2109
+ "overwrite each other's checkpoint at the default key.");
2110
+ }
2111
+ const store = explicitStore ?? this.fallbackCheckpointStore;
2112
+ return {
2113
+ manager: new Checkpoint(store, options),
2114
+ mode,
2115
+ ...(goal !== undefined ? { goal } : {}),
2116
+ ...(options.runId !== undefined ? { runId: options.runId } : {}),
2117
+ reusesSharedMemoryStore: sharedStore !== undefined && store === sharedStore,
2118
+ saveChain: Promise.resolve(),
2119
+ };
2120
+ }
2121
+ agentResultsFromCheckpoint(snapshot, queue) {
2122
+ const taskById = new Map(queue.list().map((task) => [task.id, task]));
2123
+ const agentResults = new Map();
2124
+ for (const completed of snapshot.completedTaskResults) {
2125
+ const task = taskById.get(completed.taskId);
2126
+ const assignee = completed.assignee ?? task?.assignee ?? 'unknown';
2127
+ const output = completed.result ?? task?.result ?? '';
2128
+ agentResults.set(`${assignee}:${completed.taskId}`, {
2129
+ success: true,
2130
+ output,
2131
+ messages: [],
2132
+ tokenUsage: ZERO_USAGE,
2133
+ toolCalls: [],
2134
+ });
2135
+ }
2136
+ return agentResults;
2137
+ }
2138
+ isPlanArtifact(value) {
2139
+ if (value === null || typeof value !== 'object')
2140
+ return false;
2141
+ const artifact = value;
2142
+ return artifact['version'] === 1 && Array.isArray(artifact['tasks']);
2143
+ }
1418
2144
  /**
1419
2145
  * Load a list of task specs into a queue.
1420
2146
  *
1421
2147
  * Handles title-based `dependsOn` references by building a title→id map first,
1422
2148
  * then resolving them to real IDs before adding tasks to the queue.
1423
2149
  */
1424
- loadSpecsIntoQueue(specs, agentConfigs, queue) {
2150
+ loadSpecsIntoQueue(specs, agentConfigs, queue, verifyJudges) {
1425
2151
  const agentNames = new Set(agentConfigs.map((a) => a.name));
1426
2152
  const normalizeTitle = (title) => title.toLowerCase().trim();
1427
2153
  const titleCounts = new Map();
@@ -1443,6 +2169,9 @@ export class OpenMultiAgent {
1443
2169
  maxRetries: spec.maxRetries,
1444
2170
  retryDelayMs: spec.retryDelayMs,
1445
2171
  retryBackoff: spec.retryBackoff,
2172
+ role: spec.role,
2173
+ priority: spec.priority,
2174
+ verify: resolveVerify(spec.verify, verifyJudges),
1446
2175
  });
1447
2176
  const titleKey = normalizeTitle(spec.title);
1448
2177
  if ((titleCounts.get(titleKey) ?? 0) === 1) {
@@ -1488,14 +2217,14 @@ export class OpenMultiAgent {
1488
2217
  buildPool(agentConfigs) {
1489
2218
  const pool = new AgentPool(this.config.maxConcurrency);
1490
2219
  for (const config of agentConfigs) {
1491
- const effective = {
2220
+ const effective = applyDefaultToolPreset({
1492
2221
  ...config,
1493
2222
  model: config.model,
1494
2223
  provider: config.provider ?? this.config.defaultProvider,
1495
2224
  baseURL: config.baseURL ?? this.config.defaultBaseURL,
1496
2225
  apiKey: config.apiKey ?? this.config.defaultApiKey,
1497
2226
  cwd: config.cwd === undefined ? this.config.defaultCwd : config.cwd,
1498
- };
2227
+ }, this.config.defaultToolPreset);
1499
2228
  pool.add(buildAgent(effective, { includeDelegateTool: true }));
1500
2229
  }
1501
2230
  return pool;