@voybio/ace-swarm 2.4.0 → 2.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/README.md +1 -0
  3. package/assets/.agents/ACE/agent-qa/instructions.md +11 -0
  4. package/assets/agent-state/MODULES/schemas/RUNTIME_TOOL_SPEC_REGISTRY.schema.json +43 -0
  5. package/assets/agent-state/runtime-tool-specs.json +70 -2
  6. package/assets/instructions/ACE_Coder.instructions.md +13 -0
  7. package/assets/instructions/ACE_UI.instructions.md +11 -0
  8. package/dist/ace-context.js +70 -11
  9. package/dist/ace-internal-tools.d.ts +3 -1
  10. package/dist/ace-internal-tools.js +10 -2
  11. package/dist/agent-runtime/role-adapters.d.ts +18 -1
  12. package/dist/agent-runtime/role-adapters.js +49 -5
  13. package/dist/astgrep-index.d.ts +48 -0
  14. package/dist/astgrep-index.js +126 -1
  15. package/dist/cli.js +205 -15
  16. package/dist/discovery-runtime-wrappers.d.ts +108 -0
  17. package/dist/discovery-runtime-wrappers.js +615 -0
  18. package/dist/helpers/bootstrap.js +1 -1
  19. package/dist/helpers/constants.d.ts +2 -2
  20. package/dist/helpers/constants.js +7 -0
  21. package/dist/helpers/path-utils.d.ts +8 -1
  22. package/dist/helpers/path-utils.js +27 -8
  23. package/dist/helpers/store-resolution.js +7 -3
  24. package/dist/job-scheduler.js +30 -4
  25. package/dist/json-sanitizer.d.ts +16 -0
  26. package/dist/json-sanitizer.js +26 -0
  27. package/dist/local-model-policy.d.ts +27 -0
  28. package/dist/local-model-policy.js +84 -0
  29. package/dist/local-model-runtime.d.ts +6 -0
  30. package/dist/local-model-runtime.js +21 -20
  31. package/dist/model-bridge.d.ts +6 -1
  32. package/dist/model-bridge.js +338 -21
  33. package/dist/orchestrator-supervisor.d.ts +42 -0
  34. package/dist/orchestrator-supervisor.js +110 -3
  35. package/dist/plan-proposal.d.ts +115 -0
  36. package/dist/plan-proposal.js +1073 -0
  37. package/dist/runtime-executor.d.ts +6 -1
  38. package/dist/runtime-executor.js +72 -5
  39. package/dist/runtime-tool-specs.d.ts +19 -1
  40. package/dist/runtime-tool-specs.js +67 -26
  41. package/dist/schemas.js +29 -1
  42. package/dist/server.js +51 -0
  43. package/dist/shared.d.ts +1 -0
  44. package/dist/shared.js +2 -0
  45. package/dist/store/bootstrap-store.d.ts +1 -0
  46. package/dist/store/bootstrap-store.js +8 -2
  47. package/dist/store/repositories/local-model-runtime-repository.d.ts +1 -1
  48. package/dist/store/repositories/local-model-runtime-repository.js +1 -1
  49. package/dist/store/repositories/vericify-repository.d.ts +1 -1
  50. package/dist/tools-agent.d.ts +20 -0
  51. package/dist/tools-agent.js +538 -28
  52. package/dist/tools-discovery.js +135 -0
  53. package/dist/tools-files.js +768 -66
  54. package/dist/tools-framework.js +80 -61
  55. package/dist/tui/index.js +10 -1
  56. package/dist/tui/ollama.d.ts +8 -1
  57. package/dist/tui/ollama.js +53 -12
  58. package/dist/tui/openai-compatible.d.ts +13 -0
  59. package/dist/tui/openai-compatible.js +305 -5
  60. package/dist/tui/provider-discovery.d.ts +1 -0
  61. package/dist/tui/provider-discovery.js +35 -11
  62. package/dist/vericify-bridge.d.ts +1 -1
  63. package/package.json +1 -1
@@ -1,9 +1,23 @@
1
1
  import { randomUUID } from "node:crypto";
2
- import { mkdirSync, writeFileSync } from "node:fs";
3
- import { dirname, resolve } from "node:path";
2
+ import { existsSync, mkdirSync, writeFileSync, realpathSync } from "node:fs";
3
+ import { dirname, isAbsolute, resolve, sep } from "node:path";
4
4
  import { executeAceInternalTool, listAceInternalToolCatalog, } from "./ace-internal-tools.js";
5
5
  import { appendVericifyProcessPostSafe, deriveWorkspaceVericifyRunRef, } from "./vericify-bridge.js";
6
6
  import { buildToolPlan, renderAceContext } from "./ace-context.js";
7
+ import { appendRunLedgerEntrySafe } from "./run-ledger.js";
8
+ import { appendStatusEventSafe } from "./status-events.js";
9
+ import { normalizeRelPath } from "./helpers.js";
10
+ import { sanitizeJsonLikeText } from "./json-sanitizer.js";
11
+ /**
12
+ * Roles that MUST produce a valid JSON envelope in every response.
13
+ * A parse_error from these roles is a contract failure, not a plain-text fallback.
14
+ * The bridge will inject a repair prompt (up to MAX_PARSE_REPAIR_ATTEMPTS) before
15
+ * marking the run as failed with status "role_contract_violation".
16
+ */
17
+ const ROLES_REQUIRING_JSON_ENVELOPE = new Set(["coders", "builder", "qa"]);
18
+ const VALID_ENVELOPE_STATUSES = new Set(["tool", "message", "complete", "need_input"]);
19
+ const MAX_PARSE_REPAIR_ATTEMPTS = 2;
20
+ const MAX_OUTPUT_DRIFT_REPAIRS = 1;
7
21
  function resolveProviderClient(input) {
8
22
  const display = input.trim() || "ollama";
9
23
  const normalized = display.toLowerCase();
@@ -11,8 +25,11 @@ function resolveProviderClient(input) {
11
25
  ? { display, client: "ollama" }
12
26
  : { display, client: "openai-compatible" };
13
27
  }
28
+ function sanitizeModelOutput(raw) {
29
+ return sanitizeJsonLikeText(raw).text;
30
+ }
14
31
  function extractJsonEnvelope(raw) {
15
- const trimmed = raw.trim();
32
+ const trimmed = sanitizeModelOutput(raw).trim();
16
33
  if (trimmed.startsWith("{") && trimmed.endsWith("}")) {
17
34
  return trimmed;
18
35
  }
@@ -29,11 +46,11 @@ function extractJsonEnvelope(raw) {
29
46
  let escape = false;
30
47
  for (let index = firstBrace; index < trimmed.length; index += 1) {
31
48
  const ch = trimmed[index];
32
- if (escape) {
49
+ if (inString && escape) {
33
50
  escape = false;
34
51
  continue;
35
52
  }
36
- if (ch === "\\") {
53
+ if (inString && ch === "\\") {
37
54
  escape = true;
38
55
  continue;
39
56
  }
@@ -61,18 +78,60 @@ function parseEnvelope(raw) {
61
78
  if (!parsed || typeof parsed !== "object") {
62
79
  throw new Error("response is not an object");
63
80
  }
64
- if (!parsed.status) {
65
- throw new Error("missing status");
81
+ if (typeof parsed.status !== "string" || !VALID_ENVELOPE_STATUSES.has(parsed.status)) {
82
+ throw new Error("missing or invalid status");
83
+ }
84
+ if (parsed.tool_calls !== undefined && !Array.isArray(parsed.tool_calls)) {
85
+ throw new Error("tool_calls must be an array");
86
+ }
87
+ if (parsed.evidence_refs !== undefined) {
88
+ if (!Array.isArray(parsed.evidence_refs)) {
89
+ throw new Error("evidence_refs must be an array");
90
+ }
91
+ parsed.evidence_refs = parsed.evidence_refs
92
+ .filter((ref) => typeof ref === "string")
93
+ .map((ref) => ref.trim())
94
+ .filter(Boolean);
66
95
  }
67
96
  return parsed;
68
97
  }
69
98
  catch {
70
99
  return {
71
100
  status: "parse_error",
72
- message: summarizeSnippet(raw.trim() || "[empty response]", 240),
101
+ message: summarizeSnippet(sanitizeModelOutput(raw).trim() || "[empty response]", 240),
73
102
  };
74
103
  }
75
104
  }
105
+ /**
106
+ * Validates the semantic shape of a completed bridge output against role-specific
107
+ * output contracts. Returns a violation description if drift is detected, or null
108
+ * if the output is clean.
109
+ *
110
+ * Thresholds are conservative to avoid false positives on legitimate inline snippets.
111
+ */
112
+ function checkOutputShapeDrift(role, text, _toolResults) {
113
+ if (role === "qa") {
114
+ // qa must return a short structured verdict, not a rewritten artifact.
115
+ // Large code fences in the output strongly suggest artifact rewriting.
116
+ const codeBlocks = [...text.matchAll(/```[\s\S]*?```/g)];
117
+ const totalCodeChars = codeBlocks.reduce((sum, match) => sum + match[0].length, 0);
118
+ if (totalCodeChars > 800) {
119
+ return ("output contains large code blocks — qa must return a short structured verdict, " +
120
+ "not a rewritten artifact. Include a one-paragraph verdict and failure classification, " +
121
+ "not the full file content.");
122
+ }
123
+ }
124
+ if (role === "vos" || role === "ui") {
125
+ // vos/ui primary output is prose; high HTML-tag density indicates a full document
126
+ // was produced instead of planning prose.
127
+ const htmlTagCount = (text.match(/<[a-z][^>]*>/gi) ?? []).length;
128
+ if (htmlTagCount > 8) {
129
+ return (`${role} output contains ${htmlTagCount} HTML opening tags — primary output must be ` +
130
+ `prose. HTML authoring is the coders role's responsibility. Restate as plain prose.`);
131
+ }
132
+ }
133
+ return null;
134
+ }
76
135
  function parseToolPlan(raw, catalog) {
77
136
  const candidate = extractJsonEnvelope(raw);
78
137
  const allowedTools = new Set(catalog.map((tool) => tool.name));
@@ -127,6 +186,9 @@ function summarizeSnippet(text, maxChars = 240) {
127
186
  return normalized;
128
187
  return `${normalized.slice(0, Math.max(0, maxChars - 1)).trimEnd()}…`;
129
188
  }
189
+ function readEnvelopeText(value) {
190
+ return typeof value === "string" ? value.trim() : "";
191
+ }
130
192
  function buildHistorySummary(messages, maxChars) {
131
193
  const lines = messages.slice(-8).map((message) => {
132
194
  const content = summarizeSnippet(messageText(message).replace(/^Conversation summary:\s*/i, ""), 220);
@@ -209,6 +271,105 @@ function truncateToolResult(result, workspace, maxChars = 3000) {
209
271
  function formatErrorMessage(error) {
210
272
  return error instanceof Error ? error.message : String(error);
211
273
  }
274
+ function normalizeEvidenceRefPath(ref) {
275
+ const pathPart = ref.split("#", 1)[0]?.trim() ?? "";
276
+ if (!pathPart || pathPart.includes("\0") || pathPart.startsWith("~"))
277
+ return undefined;
278
+ if (isAbsolute(pathPart))
279
+ return undefined;
280
+ const normalized = normalizeRelPath(pathPart);
281
+ if (!normalized || normalized === ".." || normalized.startsWith("../"))
282
+ return undefined;
283
+ return normalized;
284
+ }
285
+ function workspaceEvidenceExists(ref, workspace) {
286
+ const normalized = normalizeEvidenceRefPath(ref);
287
+ if (!normalized)
288
+ return false;
289
+ try {
290
+ const root = resolve(workspace);
291
+ const candidate = resolve(root, normalized);
292
+ // Prevent symlink escape: compare real (resolved) filesystem paths
293
+ const realRoot = realpathSync(root);
294
+ const realCandidate = realpathSync(candidate);
295
+ // Accept candidate only if its real path is equal to the root or nested under it
296
+ if (realCandidate === realRoot || realCandidate.startsWith(realRoot + sep)) {
297
+ return existsSync(realCandidate);
298
+ }
299
+ return false;
300
+ }
301
+ catch {
302
+ return false;
303
+ }
304
+ }
305
+ function mergeEvidenceRefs(...groups) {
306
+ const merged = groups
307
+ .flatMap((group) => group ?? [])
308
+ .map((ref) => ref.trim())
309
+ .filter(Boolean);
310
+ return merged.length > 0 ? [...new Set(merged)] : undefined;
311
+ }
312
+ async function appendFalseCompletionEvidence(input) {
313
+ await appendRunLedgerEntrySafe({
314
+ tool: "model-bridge",
315
+ category: "regression",
316
+ message: input.summary,
317
+ artifacts: input.evidence_refs ?? [],
318
+ metadata: {
319
+ reason_code: "false_completion_no_evidence",
320
+ role: input.role,
321
+ workspace: input.workspace,
322
+ evidence_refs: input.evidence_refs ?? [],
323
+ },
324
+ }).catch(() => undefined);
325
+ await appendStatusEventSafe({
326
+ source_module: "capability-qa",
327
+ event_type: "MODEL_BRIDGE_COMPLETION_BLOCKED",
328
+ status: "blocked",
329
+ summary: input.summary,
330
+ objective_id: "model-bridge-completion-verification",
331
+ payload: {
332
+ reason_code: "false_completion_no_evidence",
333
+ role: input.role,
334
+ workspace: input.workspace,
335
+ evidence_refs: input.evidence_refs ?? [],
336
+ },
337
+ }).catch(() => undefined);
338
+ }
339
+ async function verifyCompletionArtifacts(result, context) {
340
+ if (result.status !== "completed")
341
+ return result;
342
+ const expectedArtifacts = context.expectedArtifacts ?? [];
343
+ const mutationIntent = /\b(write|create|mutate|edit|persist|save|generate)\b/i.test(context.task);
344
+ const shouldVerify = ((context.role === "coders" || context.role === "builder") && mutationIntent) ||
345
+ expectedArtifacts.length > 0 ||
346
+ result.tool_calls.some((toolCall) => toolCall.tool === "write_workspace_file");
347
+ if (!shouldVerify)
348
+ return result;
349
+ const writeEvidenceOk = result.tool_calls.some((toolCall) => toolCall.tool === "write_workspace_file" && toolCall.ok) &&
350
+ context.touchedPaths.some((path) => workspaceEvidenceExists(path, context.workspace));
351
+ const evidenceRefsOk = (result.evidence_refs ?? []).length > 0 &&
352
+ (result.evidence_refs ?? []).some((ref) => workspaceEvidenceExists(ref, context.workspace));
353
+ const expectedArtifactsOk = expectedArtifacts.length > 0 &&
354
+ expectedArtifacts
355
+ .filter((artifact) => artifact.required !== false)
356
+ .every((artifact) => workspaceEvidenceExists(artifact.path, context.workspace));
357
+ if (writeEvidenceOk || evidenceRefsOk || expectedArtifactsOk)
358
+ return result;
359
+ const summary = "Model claimed completion but no persisted evidence or tool-calls found.";
360
+ await appendFalseCompletionEvidence({
361
+ role: context.role,
362
+ workspace: context.workspace,
363
+ summary,
364
+ evidence_refs: result.evidence_refs,
365
+ });
366
+ return {
367
+ ...result,
368
+ status: "blocked",
369
+ reason_code: "false_completion_no_evidence",
370
+ summary,
371
+ };
372
+ }
212
373
  function isRetryableProviderError(error) {
213
374
  const message = formatErrorMessage(error).toLowerCase();
214
375
  return !/(abort|aborted|cancelled|canceled|interrupted)/.test(message);
@@ -254,6 +415,16 @@ async function collectOpenAiCompatibleResponse(client, provider, model, messages
254
415
  messages,
255
416
  temperature: 0.2,
256
417
  topP: 0.9,
418
+ onProviderEvent: (event) => {
419
+ onProgress?.({
420
+ kind: "thinking",
421
+ at: Date.now(),
422
+ detail: {
423
+ reason: "provider_adapter_event",
424
+ ...event,
425
+ },
426
+ });
427
+ },
257
428
  })) {
258
429
  combined += chunk.text;
259
430
  if (chunk.text || chunk.done) {
@@ -378,9 +549,12 @@ export class ModelBridge {
378
549
  const provider = resolveProviderClient(options.provider);
379
550
  const numCtx = options.numCtx ??
380
551
  (requestedTier === "brief" ? 4096 : requestedTier === "compressed" ? 8192 : 16384);
381
- const explicitToolScope = options.toolScope?.map((tool) => tool.trim()).filter((tool) => tool.length > 0) ?? [];
382
- const toolScopeLocked = explicitToolScope.length > 0;
383
- const selectedToolScope = explicitToolScope.length > 0
552
+ const toolScopeProvided = Array.isArray(options.toolScope);
553
+ const explicitToolScope = toolScopeProvided
554
+ ? options.toolScope.map((tool) => tool.trim()).filter((tool) => tool.length > 0)
555
+ : [];
556
+ const toolScopeLocked = toolScopeProvided;
557
+ const selectedToolScope = toolScopeProvided
384
558
  ? explicitToolScope
385
559
  : await this.selectToolScope({
386
560
  task: options.task,
@@ -421,6 +595,9 @@ export class ModelBridge {
421
595
  ];
422
596
  const toolResults = [];
423
597
  const childResults = [];
598
+ const touchedPaths = [];
599
+ const declaredEvidenceRefs = [];
600
+ const evidenceRefs = () => mergeEvidenceRefs(touchedPaths, declaredEvidenceRefs);
424
601
  const sessionId = this.bridgeId;
425
602
  const refs = deriveWorkspaceVericifyRunRef({
426
603
  session_id: this.bridgeId,
@@ -432,6 +609,10 @@ export class ModelBridge {
432
609
  const noteProgress = (kind, detail) => {
433
610
  options.onProgress?.({ kind, at: Date.now(), detail });
434
611
  };
612
+ // Tracks repair attempts for roles that require a JSON envelope (coders, builder, qa).
613
+ let parseRepairAttempts = 0;
614
+ // Tracks correction attempts for roles that produce semantically drifted output.
615
+ let outputDriftRepairs = 0;
435
616
  try {
436
617
  await appendVericifyProcessPostSafe({
437
618
  run_id: refs.run_id,
@@ -459,15 +640,79 @@ export class ModelBridge {
459
640
  }, options.onThinking);
460
641
  this.activeProviderClient = null;
461
642
  const envelope = parseEnvelope(rawResponse);
643
+ if (envelope.status !== "parse_error") {
644
+ const refs = mergeEvidenceRefs(envelope.evidence_refs);
645
+ if (refs)
646
+ declaredEvidenceRefs.push(...refs);
647
+ }
462
648
  if (envelope.thinking) {
463
649
  options.onThinking?.(envelope.thinking);
464
650
  noteProgress("thinking", { turn });
465
651
  }
466
652
  messages.push({ role: "assistant", content: rawResponse });
467
653
  if (envelope.status === "parse_error") {
468
- const summary = `Model bridge returned malformed or non-JSON output: ${envelope.message ?? "[empty response]"}`;
654
+ // Plain-text fallback: accepted only for roles that do NOT require a JSON envelope
655
+ // and only when they have no tool scope (i.e. they legitimately produce prose output).
656
+ if ((selectedToolScope ?? []).length === 0 && !ROLES_REQUIRING_JSON_ENVELOPE.has(role)) {
657
+ const summary = rawResponse.trim() || "Bridge completed.";
658
+ options.onOutput?.(summary);
659
+ noteProgress("output", { status: "complete", fallback: "plain_text" });
660
+ await appendVericifyProcessPostSafe({
661
+ run_id: refs.run_id,
662
+ branch_id: refs.branch_id,
663
+ lane_id: refs.lane_id,
664
+ agent_id: `agent-${role}`,
665
+ kind: "completion",
666
+ summary,
667
+ tool_refs: [],
668
+ });
669
+ noteProgress("process_post", { kind: "completion", fallback: "plain_text" });
670
+ return verifyCompletionArtifacts({
671
+ bridge_id: this.bridgeId,
672
+ role,
673
+ status: "completed",
674
+ summary,
675
+ turns: turn,
676
+ tool_calls: toolResults,
677
+ child_results: childResults,
678
+ evidence_refs: evidenceRefs(),
679
+ }, {
680
+ role,
681
+ task: options.task,
682
+ workspace: options.workspace,
683
+ touchedPaths,
684
+ expectedArtifacts: options.expectedArtifacts,
685
+ });
686
+ }
687
+ // Repair path: roles that require a JSON envelope get up to MAX_PARSE_REPAIR_ATTEMPTS
688
+ // chances to emit a valid response before the run is marked as failed.
689
+ if (ROLES_REQUIRING_JSON_ENVELOPE.has(role) && parseRepairAttempts < MAX_PARSE_REPAIR_ATTEMPTS) {
690
+ parseRepairAttempts += 1;
691
+ const repairPrompt = `Your previous response was not a valid JSON envelope and cannot be accepted. ` +
692
+ `As the ${role} role, every response MUST be a JSON object with a "status" key. ` +
693
+ `Valid response shapes:\n` +
694
+ ` {"status":"tool","tool_calls":[{"tool":"name","input":{}}]}\n` +
695
+ ` {"status":"complete","summary":"what you accomplished"}\n` +
696
+ `Do NOT output plain text, HTML, markdown, or code outside a JSON envelope. ` +
697
+ `Respond with valid JSON only. (repair attempt ${parseRepairAttempts}/${MAX_PARSE_REPAIR_ATTEMPTS})`;
698
+ messages.push({ role: "user", content: repairPrompt });
699
+ options.onThinking?.(`[drift-repair] ${role} parse_error — injecting repair prompt (attempt ${parseRepairAttempts}/${MAX_PARSE_REPAIR_ATTEMPTS})`);
700
+ noteProgress("thinking", {
701
+ reason: "role_contract_repair",
702
+ role,
703
+ attempt: parseRepairAttempts,
704
+ });
705
+ continue;
706
+ }
707
+ const isContractViolation = ROLES_REQUIRING_JSON_ENVELOPE.has(role);
708
+ const summary = isContractViolation
709
+ ? `[role_contract_violation] ${role} returned malformed JSON after ${parseRepairAttempts} repair attempt(s): ${envelope.message ?? "[empty response]"}`
710
+ : `Model bridge returned malformed or non-JSON output: ${envelope.message ?? "[empty response]"}`;
469
711
  options.onOutput?.(summary);
470
- noteProgress("output", { status: "parse_error" });
712
+ noteProgress("output", {
713
+ status: "parse_error",
714
+ role_contract_violation: isContractViolation,
715
+ });
471
716
  await appendVericifyProcessPostSafe({
472
717
  run_id: refs.run_id,
473
718
  branch_id: refs.branch_id,
@@ -486,6 +731,7 @@ export class ModelBridge {
486
731
  turns: turn,
487
732
  tool_calls: toolResults,
488
733
  child_results: childResults,
734
+ evidence_refs: evidenceRefs(),
489
735
  };
490
736
  }
491
737
  if (envelope.status === "tool" &&
@@ -534,7 +780,9 @@ export class ModelBridge {
534
780
  const args = toolCall.input ?? {};
535
781
  options.onToolCall?.(toolCall.tool, args);
536
782
  noteProgress("tool_start", { tool: toolCall.tool });
537
- const rawToolResult = await executeAceInternalTool(toolCall.tool, args, sessionId);
783
+ const rawToolResult = await executeAceInternalTool(toolCall.tool, args, sessionId, {
784
+ workspace_path: options.workspace,
785
+ });
538
786
  const result = truncateToolResult({
539
787
  tool: toolCall.tool,
540
788
  ok: !Boolean(rawToolResult?.isError),
@@ -544,6 +792,15 @@ export class ModelBridge {
544
792
  options.onToolResult?.(toolCall.tool, result);
545
793
  noteProgress("tool_finish", { tool: toolCall.tool, ok: result.ok });
546
794
  toolResults.push(result);
795
+ if (result.ok) {
796
+ const pathArg = typeof args.path === "string"
797
+ ? args.path
798
+ : typeof args.file_path === "string"
799
+ ? args.file_path
800
+ : undefined;
801
+ if (pathArg)
802
+ touchedPaths.push(pathArg);
803
+ }
547
804
  return result;
548
805
  }));
549
806
  messages.push({
@@ -555,7 +812,7 @@ export class ModelBridge {
555
812
  continue;
556
813
  }
557
814
  if (envelope.status === "message") {
558
- const message = envelope.message?.trim() || rawResponse.trim();
815
+ const message = readEnvelopeText(envelope.message) || rawResponse.trim();
559
816
  options.onOutput?.(message);
560
817
  noteProgress("output", { status: "message" });
561
818
  await appendVericifyProcessPostSafe({
@@ -568,7 +825,7 @@ export class ModelBridge {
568
825
  tool_refs: [],
569
826
  });
570
827
  noteProgress("process_post", { kind: "progress" });
571
- return {
828
+ return verifyCompletionArtifacts({
572
829
  bridge_id: this.bridgeId,
573
830
  role,
574
831
  status: "completed",
@@ -576,10 +833,17 @@ export class ModelBridge {
576
833
  turns: turn,
577
834
  tool_calls: toolResults,
578
835
  child_results: childResults,
579
- };
836
+ evidence_refs: evidenceRefs(),
837
+ }, {
838
+ role,
839
+ task: options.task,
840
+ workspace: options.workspace,
841
+ touchedPaths,
842
+ expectedArtifacts: options.expectedArtifacts,
843
+ });
580
844
  }
581
845
  if (envelope.status === "need_input") {
582
- const message = envelope.message?.trim() || "Additional operator input required.";
846
+ const message = readEnvelopeText(envelope.message) || "Additional operator input required.";
583
847
  options.onOutput?.(message);
584
848
  noteProgress("output", { status: "need_input" });
585
849
  await appendVericifyProcessPostSafe({
@@ -600,10 +864,55 @@ export class ModelBridge {
600
864
  turns: turn,
601
865
  tool_calls: toolResults,
602
866
  child_results: childResults,
867
+ evidence_refs: evidenceRefs(),
603
868
  };
604
869
  }
605
870
  if (envelope.status === "complete") {
606
- const summary = envelope.summary?.trim() || "Bridge completed.";
871
+ const summary = readEnvelopeText(envelope.summary) || "Bridge completed.";
872
+ // Output shape drift check: detect semantic violations before accepting the result.
873
+ const driftViolation = checkOutputShapeDrift(role, summary, toolResults);
874
+ if (driftViolation) {
875
+ if (outputDriftRepairs < MAX_OUTPUT_DRIFT_REPAIRS) {
876
+ outputDriftRepairs += 1;
877
+ const correctionPrompt = `Your previous completion violated the output contract for the ${role} role: ` +
878
+ `${driftViolation} Please restate your output, correcting the violation.`;
879
+ messages.push({ role: "user", content: correctionPrompt });
880
+ options.onThinking?.(`[drift-correction] ${role} output drift — injecting correction ` +
881
+ `(attempt ${outputDriftRepairs}/${MAX_OUTPUT_DRIFT_REPAIRS}): ${driftViolation}`);
882
+ noteProgress("thinking", {
883
+ reason: "output_drift_correction",
884
+ role,
885
+ attempt: outputDriftRepairs,
886
+ violation: driftViolation,
887
+ });
888
+ continue;
889
+ }
890
+ // Correction exhausted — reject the drifted output.
891
+ const driftSummary = `[output_drift_violation] ${role}: ${driftViolation}`;
892
+ options.onOutput?.(driftSummary);
893
+ noteProgress("output", { status: "output_drift_violation" });
894
+ await appendVericifyProcessPostSafe({
895
+ run_id: refs.run_id,
896
+ branch_id: refs.branch_id,
897
+ lane_id: refs.lane_id,
898
+ agent_id: `agent-${role}`,
899
+ kind: "blocker",
900
+ summary: driftSummary,
901
+ tool_refs: toolResults.map((entry) => entry.tool),
902
+ });
903
+ noteProgress("process_post", { kind: "blocker" });
904
+ return {
905
+ bridge_id: this.bridgeId,
906
+ role,
907
+ status: "failed",
908
+ summary: driftSummary,
909
+ turns: turn,
910
+ tool_calls: toolResults,
911
+ child_results: childResults,
912
+ evidence_refs: evidenceRefs(),
913
+ };
914
+ }
915
+ // Clean output — accept the completion.
607
916
  options.onOutput?.(summary);
608
917
  noteProgress("output", { status: "complete" });
609
918
  await appendVericifyProcessPostSafe({
@@ -616,7 +925,7 @@ export class ModelBridge {
616
925
  tool_refs: toolResults.map((entry) => entry.tool),
617
926
  });
618
927
  noteProgress("process_post", { kind: "completion" });
619
- return {
928
+ return verifyCompletionArtifacts({
620
929
  bridge_id: this.bridgeId,
621
930
  role,
622
931
  status: "completed",
@@ -624,7 +933,14 @@ export class ModelBridge {
624
933
  turns: turn,
625
934
  tool_calls: toolResults,
626
935
  child_results: childResults,
627
- };
936
+ evidence_refs: evidenceRefs(),
937
+ }, {
938
+ role,
939
+ task: options.task,
940
+ workspace: options.workspace,
941
+ touchedPaths,
942
+ expectedArtifacts: options.expectedArtifacts,
943
+ });
628
944
  }
629
945
  }
630
946
  const summary = "Bridge stopped after reaching max turns.";
@@ -646,6 +962,7 @@ export class ModelBridge {
646
962
  turns: options.maxTurns,
647
963
  tool_calls: toolResults,
648
964
  child_results: childResults,
965
+ evidence_refs: evidenceRefs(),
649
966
  };
650
967
  }
651
968
  finally {
@@ -20,6 +20,20 @@ export interface TaskStep {
20
20
  result_summary: string;
21
21
  evidence_refs: string[];
22
22
  }[];
23
+ expected_output_class?: "plain_text_plan" | "tool_envelope" | "code_artifact" | "structural_edit_plan" | "qa_verdict";
24
+ expected_artifacts?: Array<{
25
+ path: string;
26
+ required?: boolean;
27
+ evidence_ref_kind?: "artifact" | "diff" | "hash" | "test" | "gate";
28
+ }>;
29
+ allowed_tools?: string[];
30
+ forbidden_patterns?: string[];
31
+ required_evidence_refs?: string[];
32
+ structural_edit_plan_required?: boolean;
33
+ structural_edit_waiver?: {
34
+ reason: string;
35
+ evidence_ref: string;
36
+ };
23
37
  }
24
38
  export interface TaskPlan {
25
39
  plan_id: string;
@@ -44,6 +58,13 @@ export interface TaskPlanAmendment {
44
58
  vcx_cursor?: string;
45
59
  evidence_refs?: string[];
46
60
  }
61
+ export type IntentVerificationOutcome = "ok" | "revisit_step" | "replan_required";
62
+ export interface IntentVerificationResult {
63
+ outcome: IntentVerificationOutcome;
64
+ reason: string;
65
+ reason_code?: string;
66
+ uncovered_clauses?: string[];
67
+ }
47
68
  export interface SupervisorHooks {
48
69
  spawnStep: (step: TaskStep, plan: TaskPlan) => Promise<BridgeResult>;
49
70
  createHandoff?: (input: {
@@ -80,6 +101,26 @@ export interface SupervisorHooks {
80
101
  summary: string;
81
102
  step_id?: string;
82
103
  }) => Promise<void>;
104
+ verifyIntent?: (input: {
105
+ plan: TaskPlan;
106
+ step: TaskStep;
107
+ result: BridgeResult;
108
+ intent_contract: unknown;
109
+ vericify_delta?: unknown;
110
+ }) => Promise<IntentVerificationResult>;
111
+ recordIntentVerificationFailure?: (input: {
112
+ plan: TaskPlan;
113
+ step: TaskStep;
114
+ result: BridgeResult;
115
+ verification: IntentVerificationResult;
116
+ from: TaskStepStatus;
117
+ to: TaskStepStatus;
118
+ }) => Promise<void>;
119
+ /** Called when replan_required: returns an amendment to insert steps covering uncovered clauses. */
120
+ replanForClauses?: (input: {
121
+ plan: TaskPlan;
122
+ uncovered_clauses: string[];
123
+ }) => Promise<TaskPlanAmendment | undefined>;
83
124
  }
84
125
  export interface SupervisorRunResult {
85
126
  plan: TaskPlan;
@@ -87,6 +128,7 @@ export interface SupervisorRunResult {
87
128
  job_ids: string[];
88
129
  circuit_opened: boolean;
89
130
  final_gate?: unknown;
131
+ blocked_reason?: string;
90
132
  }
91
133
  export declare function deriveTaskPlanStatus(plan: TaskPlan): TaskPlanStatus;
92
134
  export declare function createTaskPlan(input: {