@voybio/ace-swarm 2.4.0 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +1 -0
- package/assets/.agents/ACE/agent-qa/instructions.md +11 -0
- package/assets/agent-state/MODULES/schemas/RUNTIME_TOOL_SPEC_REGISTRY.schema.json +43 -0
- package/assets/agent-state/runtime-tool-specs.json +70 -2
- package/assets/instructions/ACE_Coder.instructions.md +13 -0
- package/assets/instructions/ACE_UI.instructions.md +11 -0
- package/dist/ace-context.js +70 -11
- package/dist/ace-internal-tools.d.ts +3 -1
- package/dist/ace-internal-tools.js +10 -2
- package/dist/agent-runtime/role-adapters.d.ts +18 -1
- package/dist/agent-runtime/role-adapters.js +49 -5
- package/dist/astgrep-index.d.ts +48 -0
- package/dist/astgrep-index.js +126 -1
- package/dist/cli.js +205 -15
- package/dist/discovery-runtime-wrappers.d.ts +108 -0
- package/dist/discovery-runtime-wrappers.js +615 -0
- package/dist/helpers/bootstrap.js +1 -1
- package/dist/helpers/constants.d.ts +2 -2
- package/dist/helpers/constants.js +7 -0
- package/dist/helpers/path-utils.d.ts +8 -1
- package/dist/helpers/path-utils.js +27 -8
- package/dist/helpers/store-resolution.js +7 -3
- package/dist/job-scheduler.js +30 -4
- package/dist/json-sanitizer.d.ts +16 -0
- package/dist/json-sanitizer.js +26 -0
- package/dist/local-model-policy.d.ts +27 -0
- package/dist/local-model-policy.js +84 -0
- package/dist/local-model-runtime.d.ts +6 -0
- package/dist/local-model-runtime.js +21 -20
- package/dist/model-bridge.d.ts +6 -1
- package/dist/model-bridge.js +338 -21
- package/dist/orchestrator-supervisor.d.ts +42 -0
- package/dist/orchestrator-supervisor.js +110 -3
- package/dist/plan-proposal.d.ts +115 -0
- package/dist/plan-proposal.js +1073 -0
- package/dist/runtime-executor.d.ts +6 -1
- package/dist/runtime-executor.js +72 -5
- package/dist/runtime-tool-specs.d.ts +19 -1
- package/dist/runtime-tool-specs.js +67 -26
- package/dist/schemas.js +29 -1
- package/dist/server.js +51 -0
- package/dist/shared.d.ts +1 -0
- package/dist/shared.js +2 -0
- package/dist/store/bootstrap-store.d.ts +1 -0
- package/dist/store/bootstrap-store.js +8 -2
- package/dist/store/repositories/local-model-runtime-repository.d.ts +1 -1
- package/dist/store/repositories/local-model-runtime-repository.js +1 -1
- package/dist/store/repositories/vericify-repository.d.ts +1 -1
- package/dist/tools-agent.d.ts +20 -0
- package/dist/tools-agent.js +538 -28
- package/dist/tools-discovery.js +135 -0
- package/dist/tools-files.js +768 -66
- package/dist/tools-framework.js +80 -61
- package/dist/tui/index.js +10 -1
- package/dist/tui/ollama.d.ts +8 -1
- package/dist/tui/ollama.js +53 -12
- package/dist/tui/openai-compatible.d.ts +13 -0
- package/dist/tui/openai-compatible.js +305 -5
- package/dist/tui/provider-discovery.d.ts +1 -0
- package/dist/tui/provider-discovery.js +35 -11
- package/dist/vericify-bridge.d.ts +1 -1
- package/package.json +1 -1
package/dist/model-bridge.js
CHANGED
|
@@ -1,9 +1,23 @@
|
|
|
1
1
|
import { randomUUID } from "node:crypto";
|
|
2
|
-
import { mkdirSync, writeFileSync } from "node:fs";
|
|
3
|
-
import { dirname, resolve } from "node:path";
|
|
2
|
+
import { existsSync, mkdirSync, writeFileSync, realpathSync } from "node:fs";
|
|
3
|
+
import { dirname, isAbsolute, resolve, sep } from "node:path";
|
|
4
4
|
import { executeAceInternalTool, listAceInternalToolCatalog, } from "./ace-internal-tools.js";
|
|
5
5
|
import { appendVericifyProcessPostSafe, deriveWorkspaceVericifyRunRef, } from "./vericify-bridge.js";
|
|
6
6
|
import { buildToolPlan, renderAceContext } from "./ace-context.js";
|
|
7
|
+
import { appendRunLedgerEntrySafe } from "./run-ledger.js";
|
|
8
|
+
import { appendStatusEventSafe } from "./status-events.js";
|
|
9
|
+
import { normalizeRelPath } from "./helpers.js";
|
|
10
|
+
import { sanitizeJsonLikeText } from "./json-sanitizer.js";
|
|
11
|
+
/**
|
|
12
|
+
* Roles that MUST produce a valid JSON envelope in every response.
|
|
13
|
+
* A parse_error from these roles is a contract failure, not a plain-text fallback.
|
|
14
|
+
* The bridge will inject a repair prompt (up to MAX_PARSE_REPAIR_ATTEMPTS) before
|
|
15
|
+
* marking the run as failed with status "role_contract_violation".
|
|
16
|
+
*/
|
|
17
|
+
const ROLES_REQUIRING_JSON_ENVELOPE = new Set(["coders", "builder", "qa"]);
|
|
18
|
+
const VALID_ENVELOPE_STATUSES = new Set(["tool", "message", "complete", "need_input"]);
|
|
19
|
+
const MAX_PARSE_REPAIR_ATTEMPTS = 2;
|
|
20
|
+
const MAX_OUTPUT_DRIFT_REPAIRS = 1;
|
|
7
21
|
function resolveProviderClient(input) {
|
|
8
22
|
const display = input.trim() || "ollama";
|
|
9
23
|
const normalized = display.toLowerCase();
|
|
@@ -11,8 +25,11 @@ function resolveProviderClient(input) {
|
|
|
11
25
|
? { display, client: "ollama" }
|
|
12
26
|
: { display, client: "openai-compatible" };
|
|
13
27
|
}
|
|
28
|
+
function sanitizeModelOutput(raw) {
|
|
29
|
+
return sanitizeJsonLikeText(raw).text;
|
|
30
|
+
}
|
|
14
31
|
function extractJsonEnvelope(raw) {
|
|
15
|
-
const trimmed = raw.trim();
|
|
32
|
+
const trimmed = sanitizeModelOutput(raw).trim();
|
|
16
33
|
if (trimmed.startsWith("{") && trimmed.endsWith("}")) {
|
|
17
34
|
return trimmed;
|
|
18
35
|
}
|
|
@@ -29,11 +46,11 @@ function extractJsonEnvelope(raw) {
|
|
|
29
46
|
let escape = false;
|
|
30
47
|
for (let index = firstBrace; index < trimmed.length; index += 1) {
|
|
31
48
|
const ch = trimmed[index];
|
|
32
|
-
if (escape) {
|
|
49
|
+
if (inString && escape) {
|
|
33
50
|
escape = false;
|
|
34
51
|
continue;
|
|
35
52
|
}
|
|
36
|
-
if (ch === "\\") {
|
|
53
|
+
if (inString && ch === "\\") {
|
|
37
54
|
escape = true;
|
|
38
55
|
continue;
|
|
39
56
|
}
|
|
@@ -61,18 +78,60 @@ function parseEnvelope(raw) {
|
|
|
61
78
|
if (!parsed || typeof parsed !== "object") {
|
|
62
79
|
throw new Error("response is not an object");
|
|
63
80
|
}
|
|
64
|
-
if (!parsed.status) {
|
|
65
|
-
throw new Error("missing status");
|
|
81
|
+
if (typeof parsed.status !== "string" || !VALID_ENVELOPE_STATUSES.has(parsed.status)) {
|
|
82
|
+
throw new Error("missing or invalid status");
|
|
83
|
+
}
|
|
84
|
+
if (parsed.tool_calls !== undefined && !Array.isArray(parsed.tool_calls)) {
|
|
85
|
+
throw new Error("tool_calls must be an array");
|
|
86
|
+
}
|
|
87
|
+
if (parsed.evidence_refs !== undefined) {
|
|
88
|
+
if (!Array.isArray(parsed.evidence_refs)) {
|
|
89
|
+
throw new Error("evidence_refs must be an array");
|
|
90
|
+
}
|
|
91
|
+
parsed.evidence_refs = parsed.evidence_refs
|
|
92
|
+
.filter((ref) => typeof ref === "string")
|
|
93
|
+
.map((ref) => ref.trim())
|
|
94
|
+
.filter(Boolean);
|
|
66
95
|
}
|
|
67
96
|
return parsed;
|
|
68
97
|
}
|
|
69
98
|
catch {
|
|
70
99
|
return {
|
|
71
100
|
status: "parse_error",
|
|
72
|
-
message: summarizeSnippet(raw.trim() || "[empty response]", 240),
|
|
101
|
+
message: summarizeSnippet(sanitizeModelOutput(raw).trim() || "[empty response]", 240),
|
|
73
102
|
};
|
|
74
103
|
}
|
|
75
104
|
}
|
|
105
|
+
/**
|
|
106
|
+
* Validates the semantic shape of a completed bridge output against role-specific
|
|
107
|
+
* output contracts. Returns a violation description if drift is detected, or null
|
|
108
|
+
* if the output is clean.
|
|
109
|
+
*
|
|
110
|
+
* Thresholds are conservative to avoid false positives on legitimate inline snippets.
|
|
111
|
+
*/
|
|
112
|
+
function checkOutputShapeDrift(role, text, _toolResults) {
|
|
113
|
+
if (role === "qa") {
|
|
114
|
+
// qa must return a short structured verdict, not a rewritten artifact.
|
|
115
|
+
// Large code fences in the output strongly suggest artifact rewriting.
|
|
116
|
+
const codeBlocks = [...text.matchAll(/```[\s\S]*?```/g)];
|
|
117
|
+
const totalCodeChars = codeBlocks.reduce((sum, match) => sum + match[0].length, 0);
|
|
118
|
+
if (totalCodeChars > 800) {
|
|
119
|
+
return ("output contains large code blocks — qa must return a short structured verdict, " +
|
|
120
|
+
"not a rewritten artifact. Include a one-paragraph verdict and failure classification, " +
|
|
121
|
+
"not the full file content.");
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
if (role === "vos" || role === "ui") {
|
|
125
|
+
// vos/ui primary output is prose; high HTML-tag density indicates a full document
|
|
126
|
+
// was produced instead of planning prose.
|
|
127
|
+
const htmlTagCount = (text.match(/<[a-z][^>]*>/gi) ?? []).length;
|
|
128
|
+
if (htmlTagCount > 8) {
|
|
129
|
+
return (`${role} output contains ${htmlTagCount} HTML opening tags — primary output must be ` +
|
|
130
|
+
`prose. HTML authoring is the coders role's responsibility. Restate as plain prose.`);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
return null;
|
|
134
|
+
}
|
|
76
135
|
function parseToolPlan(raw, catalog) {
|
|
77
136
|
const candidate = extractJsonEnvelope(raw);
|
|
78
137
|
const allowedTools = new Set(catalog.map((tool) => tool.name));
|
|
@@ -127,6 +186,9 @@ function summarizeSnippet(text, maxChars = 240) {
|
|
|
127
186
|
return normalized;
|
|
128
187
|
return `${normalized.slice(0, Math.max(0, maxChars - 1)).trimEnd()}…`;
|
|
129
188
|
}
|
|
189
|
+
function readEnvelopeText(value) {
|
|
190
|
+
return typeof value === "string" ? value.trim() : "";
|
|
191
|
+
}
|
|
130
192
|
function buildHistorySummary(messages, maxChars) {
|
|
131
193
|
const lines = messages.slice(-8).map((message) => {
|
|
132
194
|
const content = summarizeSnippet(messageText(message).replace(/^Conversation summary:\s*/i, ""), 220);
|
|
@@ -209,6 +271,105 @@ function truncateToolResult(result, workspace, maxChars = 3000) {
|
|
|
209
271
|
function formatErrorMessage(error) {
|
|
210
272
|
return error instanceof Error ? error.message : String(error);
|
|
211
273
|
}
|
|
274
|
+
function normalizeEvidenceRefPath(ref) {
|
|
275
|
+
const pathPart = ref.split("#", 1)[0]?.trim() ?? "";
|
|
276
|
+
if (!pathPart || pathPart.includes("\0") || pathPart.startsWith("~"))
|
|
277
|
+
return undefined;
|
|
278
|
+
if (isAbsolute(pathPart))
|
|
279
|
+
return undefined;
|
|
280
|
+
const normalized = normalizeRelPath(pathPart);
|
|
281
|
+
if (!normalized || normalized === ".." || normalized.startsWith("../"))
|
|
282
|
+
return undefined;
|
|
283
|
+
return normalized;
|
|
284
|
+
}
|
|
285
|
+
function workspaceEvidenceExists(ref, workspace) {
|
|
286
|
+
const normalized = normalizeEvidenceRefPath(ref);
|
|
287
|
+
if (!normalized)
|
|
288
|
+
return false;
|
|
289
|
+
try {
|
|
290
|
+
const root = resolve(workspace);
|
|
291
|
+
const candidate = resolve(root, normalized);
|
|
292
|
+
// Prevent symlink escape: compare real (resolved) filesystem paths
|
|
293
|
+
const realRoot = realpathSync(root);
|
|
294
|
+
const realCandidate = realpathSync(candidate);
|
|
295
|
+
// Accept candidate only if its real path is equal to the root or nested under it
|
|
296
|
+
if (realCandidate === realRoot || realCandidate.startsWith(realRoot + sep)) {
|
|
297
|
+
return existsSync(realCandidate);
|
|
298
|
+
}
|
|
299
|
+
return false;
|
|
300
|
+
}
|
|
301
|
+
catch {
|
|
302
|
+
return false;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
function mergeEvidenceRefs(...groups) {
|
|
306
|
+
const merged = groups
|
|
307
|
+
.flatMap((group) => group ?? [])
|
|
308
|
+
.map((ref) => ref.trim())
|
|
309
|
+
.filter(Boolean);
|
|
310
|
+
return merged.length > 0 ? [...new Set(merged)] : undefined;
|
|
311
|
+
}
|
|
312
|
+
async function appendFalseCompletionEvidence(input) {
|
|
313
|
+
await appendRunLedgerEntrySafe({
|
|
314
|
+
tool: "model-bridge",
|
|
315
|
+
category: "regression",
|
|
316
|
+
message: input.summary,
|
|
317
|
+
artifacts: input.evidence_refs ?? [],
|
|
318
|
+
metadata: {
|
|
319
|
+
reason_code: "false_completion_no_evidence",
|
|
320
|
+
role: input.role,
|
|
321
|
+
workspace: input.workspace,
|
|
322
|
+
evidence_refs: input.evidence_refs ?? [],
|
|
323
|
+
},
|
|
324
|
+
}).catch(() => undefined);
|
|
325
|
+
await appendStatusEventSafe({
|
|
326
|
+
source_module: "capability-qa",
|
|
327
|
+
event_type: "MODEL_BRIDGE_COMPLETION_BLOCKED",
|
|
328
|
+
status: "blocked",
|
|
329
|
+
summary: input.summary,
|
|
330
|
+
objective_id: "model-bridge-completion-verification",
|
|
331
|
+
payload: {
|
|
332
|
+
reason_code: "false_completion_no_evidence",
|
|
333
|
+
role: input.role,
|
|
334
|
+
workspace: input.workspace,
|
|
335
|
+
evidence_refs: input.evidence_refs ?? [],
|
|
336
|
+
},
|
|
337
|
+
}).catch(() => undefined);
|
|
338
|
+
}
|
|
339
|
+
async function verifyCompletionArtifacts(result, context) {
|
|
340
|
+
if (result.status !== "completed")
|
|
341
|
+
return result;
|
|
342
|
+
const expectedArtifacts = context.expectedArtifacts ?? [];
|
|
343
|
+
const mutationIntent = /\b(write|create|mutate|edit|persist|save|generate)\b/i.test(context.task);
|
|
344
|
+
const shouldVerify = ((context.role === "coders" || context.role === "builder") && mutationIntent) ||
|
|
345
|
+
expectedArtifacts.length > 0 ||
|
|
346
|
+
result.tool_calls.some((toolCall) => toolCall.tool === "write_workspace_file");
|
|
347
|
+
if (!shouldVerify)
|
|
348
|
+
return result;
|
|
349
|
+
const writeEvidenceOk = result.tool_calls.some((toolCall) => toolCall.tool === "write_workspace_file" && toolCall.ok) &&
|
|
350
|
+
context.touchedPaths.some((path) => workspaceEvidenceExists(path, context.workspace));
|
|
351
|
+
const evidenceRefsOk = (result.evidence_refs ?? []).length > 0 &&
|
|
352
|
+
(result.evidence_refs ?? []).some((ref) => workspaceEvidenceExists(ref, context.workspace));
|
|
353
|
+
const expectedArtifactsOk = expectedArtifacts.length > 0 &&
|
|
354
|
+
expectedArtifacts
|
|
355
|
+
.filter((artifact) => artifact.required !== false)
|
|
356
|
+
.every((artifact) => workspaceEvidenceExists(artifact.path, context.workspace));
|
|
357
|
+
if (writeEvidenceOk || evidenceRefsOk || expectedArtifactsOk)
|
|
358
|
+
return result;
|
|
359
|
+
const summary = "Model claimed completion but no persisted evidence or tool-calls found.";
|
|
360
|
+
await appendFalseCompletionEvidence({
|
|
361
|
+
role: context.role,
|
|
362
|
+
workspace: context.workspace,
|
|
363
|
+
summary,
|
|
364
|
+
evidence_refs: result.evidence_refs,
|
|
365
|
+
});
|
|
366
|
+
return {
|
|
367
|
+
...result,
|
|
368
|
+
status: "blocked",
|
|
369
|
+
reason_code: "false_completion_no_evidence",
|
|
370
|
+
summary,
|
|
371
|
+
};
|
|
372
|
+
}
|
|
212
373
|
function isRetryableProviderError(error) {
|
|
213
374
|
const message = formatErrorMessage(error).toLowerCase();
|
|
214
375
|
return !/(abort|aborted|cancelled|canceled|interrupted)/.test(message);
|
|
@@ -254,6 +415,16 @@ async function collectOpenAiCompatibleResponse(client, provider, model, messages
|
|
|
254
415
|
messages,
|
|
255
416
|
temperature: 0.2,
|
|
256
417
|
topP: 0.9,
|
|
418
|
+
onProviderEvent: (event) => {
|
|
419
|
+
onProgress?.({
|
|
420
|
+
kind: "thinking",
|
|
421
|
+
at: Date.now(),
|
|
422
|
+
detail: {
|
|
423
|
+
reason: "provider_adapter_event",
|
|
424
|
+
...event,
|
|
425
|
+
},
|
|
426
|
+
});
|
|
427
|
+
},
|
|
257
428
|
})) {
|
|
258
429
|
combined += chunk.text;
|
|
259
430
|
if (chunk.text || chunk.done) {
|
|
@@ -378,9 +549,12 @@ export class ModelBridge {
|
|
|
378
549
|
const provider = resolveProviderClient(options.provider);
|
|
379
550
|
const numCtx = options.numCtx ??
|
|
380
551
|
(requestedTier === "brief" ? 4096 : requestedTier === "compressed" ? 8192 : 16384);
|
|
381
|
-
const
|
|
382
|
-
const
|
|
383
|
-
|
|
552
|
+
const toolScopeProvided = Array.isArray(options.toolScope);
|
|
553
|
+
const explicitToolScope = toolScopeProvided
|
|
554
|
+
? options.toolScope.map((tool) => tool.trim()).filter((tool) => tool.length > 0)
|
|
555
|
+
: [];
|
|
556
|
+
const toolScopeLocked = toolScopeProvided;
|
|
557
|
+
const selectedToolScope = toolScopeProvided
|
|
384
558
|
? explicitToolScope
|
|
385
559
|
: await this.selectToolScope({
|
|
386
560
|
task: options.task,
|
|
@@ -421,6 +595,9 @@ export class ModelBridge {
|
|
|
421
595
|
];
|
|
422
596
|
const toolResults = [];
|
|
423
597
|
const childResults = [];
|
|
598
|
+
const touchedPaths = [];
|
|
599
|
+
const declaredEvidenceRefs = [];
|
|
600
|
+
const evidenceRefs = () => mergeEvidenceRefs(touchedPaths, declaredEvidenceRefs);
|
|
424
601
|
const sessionId = this.bridgeId;
|
|
425
602
|
const refs = deriveWorkspaceVericifyRunRef({
|
|
426
603
|
session_id: this.bridgeId,
|
|
@@ -432,6 +609,10 @@ export class ModelBridge {
|
|
|
432
609
|
const noteProgress = (kind, detail) => {
|
|
433
610
|
options.onProgress?.({ kind, at: Date.now(), detail });
|
|
434
611
|
};
|
|
612
|
+
// Tracks repair attempts for roles that require a JSON envelope (coders, builder, qa).
|
|
613
|
+
let parseRepairAttempts = 0;
|
|
614
|
+
// Tracks correction attempts for roles that produce semantically drifted output.
|
|
615
|
+
let outputDriftRepairs = 0;
|
|
435
616
|
try {
|
|
436
617
|
await appendVericifyProcessPostSafe({
|
|
437
618
|
run_id: refs.run_id,
|
|
@@ -459,15 +640,79 @@ export class ModelBridge {
|
|
|
459
640
|
}, options.onThinking);
|
|
460
641
|
this.activeProviderClient = null;
|
|
461
642
|
const envelope = parseEnvelope(rawResponse);
|
|
643
|
+
if (envelope.status !== "parse_error") {
|
|
644
|
+
const refs = mergeEvidenceRefs(envelope.evidence_refs);
|
|
645
|
+
if (refs)
|
|
646
|
+
declaredEvidenceRefs.push(...refs);
|
|
647
|
+
}
|
|
462
648
|
if (envelope.thinking) {
|
|
463
649
|
options.onThinking?.(envelope.thinking);
|
|
464
650
|
noteProgress("thinking", { turn });
|
|
465
651
|
}
|
|
466
652
|
messages.push({ role: "assistant", content: rawResponse });
|
|
467
653
|
if (envelope.status === "parse_error") {
|
|
468
|
-
|
|
654
|
+
// Plain-text fallback: accepted only for roles that do NOT require a JSON envelope
|
|
655
|
+
// and only when they have no tool scope (i.e. they legitimately produce prose output).
|
|
656
|
+
if ((selectedToolScope ?? []).length === 0 && !ROLES_REQUIRING_JSON_ENVELOPE.has(role)) {
|
|
657
|
+
const summary = rawResponse.trim() || "Bridge completed.";
|
|
658
|
+
options.onOutput?.(summary);
|
|
659
|
+
noteProgress("output", { status: "complete", fallback: "plain_text" });
|
|
660
|
+
await appendVericifyProcessPostSafe({
|
|
661
|
+
run_id: refs.run_id,
|
|
662
|
+
branch_id: refs.branch_id,
|
|
663
|
+
lane_id: refs.lane_id,
|
|
664
|
+
agent_id: `agent-${role}`,
|
|
665
|
+
kind: "completion",
|
|
666
|
+
summary,
|
|
667
|
+
tool_refs: [],
|
|
668
|
+
});
|
|
669
|
+
noteProgress("process_post", { kind: "completion", fallback: "plain_text" });
|
|
670
|
+
return verifyCompletionArtifacts({
|
|
671
|
+
bridge_id: this.bridgeId,
|
|
672
|
+
role,
|
|
673
|
+
status: "completed",
|
|
674
|
+
summary,
|
|
675
|
+
turns: turn,
|
|
676
|
+
tool_calls: toolResults,
|
|
677
|
+
child_results: childResults,
|
|
678
|
+
evidence_refs: evidenceRefs(),
|
|
679
|
+
}, {
|
|
680
|
+
role,
|
|
681
|
+
task: options.task,
|
|
682
|
+
workspace: options.workspace,
|
|
683
|
+
touchedPaths,
|
|
684
|
+
expectedArtifacts: options.expectedArtifacts,
|
|
685
|
+
});
|
|
686
|
+
}
|
|
687
|
+
// Repair path: roles that require a JSON envelope get up to MAX_PARSE_REPAIR_ATTEMPTS
|
|
688
|
+
// chances to emit a valid response before the run is marked as failed.
|
|
689
|
+
if (ROLES_REQUIRING_JSON_ENVELOPE.has(role) && parseRepairAttempts < MAX_PARSE_REPAIR_ATTEMPTS) {
|
|
690
|
+
parseRepairAttempts += 1;
|
|
691
|
+
const repairPrompt = `Your previous response was not a valid JSON envelope and cannot be accepted. ` +
|
|
692
|
+
`As the ${role} role, every response MUST be a JSON object with a "status" key. ` +
|
|
693
|
+
`Valid response shapes:\n` +
|
|
694
|
+
` {"status":"tool","tool_calls":[{"tool":"name","input":{}}]}\n` +
|
|
695
|
+
` {"status":"complete","summary":"what you accomplished"}\n` +
|
|
696
|
+
`Do NOT output plain text, HTML, markdown, or code outside a JSON envelope. ` +
|
|
697
|
+
`Respond with valid JSON only. (repair attempt ${parseRepairAttempts}/${MAX_PARSE_REPAIR_ATTEMPTS})`;
|
|
698
|
+
messages.push({ role: "user", content: repairPrompt });
|
|
699
|
+
options.onThinking?.(`[drift-repair] ${role} parse_error — injecting repair prompt (attempt ${parseRepairAttempts}/${MAX_PARSE_REPAIR_ATTEMPTS})`);
|
|
700
|
+
noteProgress("thinking", {
|
|
701
|
+
reason: "role_contract_repair",
|
|
702
|
+
role,
|
|
703
|
+
attempt: parseRepairAttempts,
|
|
704
|
+
});
|
|
705
|
+
continue;
|
|
706
|
+
}
|
|
707
|
+
const isContractViolation = ROLES_REQUIRING_JSON_ENVELOPE.has(role);
|
|
708
|
+
const summary = isContractViolation
|
|
709
|
+
? `[role_contract_violation] ${role} returned malformed JSON after ${parseRepairAttempts} repair attempt(s): ${envelope.message ?? "[empty response]"}`
|
|
710
|
+
: `Model bridge returned malformed or non-JSON output: ${envelope.message ?? "[empty response]"}`;
|
|
469
711
|
options.onOutput?.(summary);
|
|
470
|
-
noteProgress("output", {
|
|
712
|
+
noteProgress("output", {
|
|
713
|
+
status: "parse_error",
|
|
714
|
+
role_contract_violation: isContractViolation,
|
|
715
|
+
});
|
|
471
716
|
await appendVericifyProcessPostSafe({
|
|
472
717
|
run_id: refs.run_id,
|
|
473
718
|
branch_id: refs.branch_id,
|
|
@@ -486,6 +731,7 @@ export class ModelBridge {
|
|
|
486
731
|
turns: turn,
|
|
487
732
|
tool_calls: toolResults,
|
|
488
733
|
child_results: childResults,
|
|
734
|
+
evidence_refs: evidenceRefs(),
|
|
489
735
|
};
|
|
490
736
|
}
|
|
491
737
|
if (envelope.status === "tool" &&
|
|
@@ -534,7 +780,9 @@ export class ModelBridge {
|
|
|
534
780
|
const args = toolCall.input ?? {};
|
|
535
781
|
options.onToolCall?.(toolCall.tool, args);
|
|
536
782
|
noteProgress("tool_start", { tool: toolCall.tool });
|
|
537
|
-
const rawToolResult = await executeAceInternalTool(toolCall.tool, args, sessionId
|
|
783
|
+
const rawToolResult = await executeAceInternalTool(toolCall.tool, args, sessionId, {
|
|
784
|
+
workspace_path: options.workspace,
|
|
785
|
+
});
|
|
538
786
|
const result = truncateToolResult({
|
|
539
787
|
tool: toolCall.tool,
|
|
540
788
|
ok: !Boolean(rawToolResult?.isError),
|
|
@@ -544,6 +792,15 @@ export class ModelBridge {
|
|
|
544
792
|
options.onToolResult?.(toolCall.tool, result);
|
|
545
793
|
noteProgress("tool_finish", { tool: toolCall.tool, ok: result.ok });
|
|
546
794
|
toolResults.push(result);
|
|
795
|
+
if (result.ok) {
|
|
796
|
+
const pathArg = typeof args.path === "string"
|
|
797
|
+
? args.path
|
|
798
|
+
: typeof args.file_path === "string"
|
|
799
|
+
? args.file_path
|
|
800
|
+
: undefined;
|
|
801
|
+
if (pathArg)
|
|
802
|
+
touchedPaths.push(pathArg);
|
|
803
|
+
}
|
|
547
804
|
return result;
|
|
548
805
|
}));
|
|
549
806
|
messages.push({
|
|
@@ -555,7 +812,7 @@ export class ModelBridge {
|
|
|
555
812
|
continue;
|
|
556
813
|
}
|
|
557
814
|
if (envelope.status === "message") {
|
|
558
|
-
const message = envelope.message
|
|
815
|
+
const message = readEnvelopeText(envelope.message) || rawResponse.trim();
|
|
559
816
|
options.onOutput?.(message);
|
|
560
817
|
noteProgress("output", { status: "message" });
|
|
561
818
|
await appendVericifyProcessPostSafe({
|
|
@@ -568,7 +825,7 @@ export class ModelBridge {
|
|
|
568
825
|
tool_refs: [],
|
|
569
826
|
});
|
|
570
827
|
noteProgress("process_post", { kind: "progress" });
|
|
571
|
-
return {
|
|
828
|
+
return verifyCompletionArtifacts({
|
|
572
829
|
bridge_id: this.bridgeId,
|
|
573
830
|
role,
|
|
574
831
|
status: "completed",
|
|
@@ -576,10 +833,17 @@ export class ModelBridge {
|
|
|
576
833
|
turns: turn,
|
|
577
834
|
tool_calls: toolResults,
|
|
578
835
|
child_results: childResults,
|
|
579
|
-
|
|
836
|
+
evidence_refs: evidenceRefs(),
|
|
837
|
+
}, {
|
|
838
|
+
role,
|
|
839
|
+
task: options.task,
|
|
840
|
+
workspace: options.workspace,
|
|
841
|
+
touchedPaths,
|
|
842
|
+
expectedArtifacts: options.expectedArtifacts,
|
|
843
|
+
});
|
|
580
844
|
}
|
|
581
845
|
if (envelope.status === "need_input") {
|
|
582
|
-
const message = envelope.message
|
|
846
|
+
const message = readEnvelopeText(envelope.message) || "Additional operator input required.";
|
|
583
847
|
options.onOutput?.(message);
|
|
584
848
|
noteProgress("output", { status: "need_input" });
|
|
585
849
|
await appendVericifyProcessPostSafe({
|
|
@@ -600,10 +864,55 @@ export class ModelBridge {
|
|
|
600
864
|
turns: turn,
|
|
601
865
|
tool_calls: toolResults,
|
|
602
866
|
child_results: childResults,
|
|
867
|
+
evidence_refs: evidenceRefs(),
|
|
603
868
|
};
|
|
604
869
|
}
|
|
605
870
|
if (envelope.status === "complete") {
|
|
606
|
-
const summary = envelope.summary
|
|
871
|
+
const summary = readEnvelopeText(envelope.summary) || "Bridge completed.";
|
|
872
|
+
// Output shape drift check: detect semantic violations before accepting the result.
|
|
873
|
+
const driftViolation = checkOutputShapeDrift(role, summary, toolResults);
|
|
874
|
+
if (driftViolation) {
|
|
875
|
+
if (outputDriftRepairs < MAX_OUTPUT_DRIFT_REPAIRS) {
|
|
876
|
+
outputDriftRepairs += 1;
|
|
877
|
+
const correctionPrompt = `Your previous completion violated the output contract for the ${role} role: ` +
|
|
878
|
+
`${driftViolation} Please restate your output, correcting the violation.`;
|
|
879
|
+
messages.push({ role: "user", content: correctionPrompt });
|
|
880
|
+
options.onThinking?.(`[drift-correction] ${role} output drift — injecting correction ` +
|
|
881
|
+
`(attempt ${outputDriftRepairs}/${MAX_OUTPUT_DRIFT_REPAIRS}): ${driftViolation}`);
|
|
882
|
+
noteProgress("thinking", {
|
|
883
|
+
reason: "output_drift_correction",
|
|
884
|
+
role,
|
|
885
|
+
attempt: outputDriftRepairs,
|
|
886
|
+
violation: driftViolation,
|
|
887
|
+
});
|
|
888
|
+
continue;
|
|
889
|
+
}
|
|
890
|
+
// Correction exhausted — reject the drifted output.
|
|
891
|
+
const driftSummary = `[output_drift_violation] ${role}: ${driftViolation}`;
|
|
892
|
+
options.onOutput?.(driftSummary);
|
|
893
|
+
noteProgress("output", { status: "output_drift_violation" });
|
|
894
|
+
await appendVericifyProcessPostSafe({
|
|
895
|
+
run_id: refs.run_id,
|
|
896
|
+
branch_id: refs.branch_id,
|
|
897
|
+
lane_id: refs.lane_id,
|
|
898
|
+
agent_id: `agent-${role}`,
|
|
899
|
+
kind: "blocker",
|
|
900
|
+
summary: driftSummary,
|
|
901
|
+
tool_refs: toolResults.map((entry) => entry.tool),
|
|
902
|
+
});
|
|
903
|
+
noteProgress("process_post", { kind: "blocker" });
|
|
904
|
+
return {
|
|
905
|
+
bridge_id: this.bridgeId,
|
|
906
|
+
role,
|
|
907
|
+
status: "failed",
|
|
908
|
+
summary: driftSummary,
|
|
909
|
+
turns: turn,
|
|
910
|
+
tool_calls: toolResults,
|
|
911
|
+
child_results: childResults,
|
|
912
|
+
evidence_refs: evidenceRefs(),
|
|
913
|
+
};
|
|
914
|
+
}
|
|
915
|
+
// Clean output — accept the completion.
|
|
607
916
|
options.onOutput?.(summary);
|
|
608
917
|
noteProgress("output", { status: "complete" });
|
|
609
918
|
await appendVericifyProcessPostSafe({
|
|
@@ -616,7 +925,7 @@ export class ModelBridge {
|
|
|
616
925
|
tool_refs: toolResults.map((entry) => entry.tool),
|
|
617
926
|
});
|
|
618
927
|
noteProgress("process_post", { kind: "completion" });
|
|
619
|
-
return {
|
|
928
|
+
return verifyCompletionArtifacts({
|
|
620
929
|
bridge_id: this.bridgeId,
|
|
621
930
|
role,
|
|
622
931
|
status: "completed",
|
|
@@ -624,7 +933,14 @@ export class ModelBridge {
|
|
|
624
933
|
turns: turn,
|
|
625
934
|
tool_calls: toolResults,
|
|
626
935
|
child_results: childResults,
|
|
627
|
-
|
|
936
|
+
evidence_refs: evidenceRefs(),
|
|
937
|
+
}, {
|
|
938
|
+
role,
|
|
939
|
+
task: options.task,
|
|
940
|
+
workspace: options.workspace,
|
|
941
|
+
touchedPaths,
|
|
942
|
+
expectedArtifacts: options.expectedArtifacts,
|
|
943
|
+
});
|
|
628
944
|
}
|
|
629
945
|
}
|
|
630
946
|
const summary = "Bridge stopped after reaching max turns.";
|
|
@@ -646,6 +962,7 @@ export class ModelBridge {
|
|
|
646
962
|
turns: options.maxTurns,
|
|
647
963
|
tool_calls: toolResults,
|
|
648
964
|
child_results: childResults,
|
|
965
|
+
evidence_refs: evidenceRefs(),
|
|
649
966
|
};
|
|
650
967
|
}
|
|
651
968
|
finally {
|
|
@@ -20,6 +20,20 @@ export interface TaskStep {
|
|
|
20
20
|
result_summary: string;
|
|
21
21
|
evidence_refs: string[];
|
|
22
22
|
}[];
|
|
23
|
+
expected_output_class?: "plain_text_plan" | "tool_envelope" | "code_artifact" | "structural_edit_plan" | "qa_verdict";
|
|
24
|
+
expected_artifacts?: Array<{
|
|
25
|
+
path: string;
|
|
26
|
+
required?: boolean;
|
|
27
|
+
evidence_ref_kind?: "artifact" | "diff" | "hash" | "test" | "gate";
|
|
28
|
+
}>;
|
|
29
|
+
allowed_tools?: string[];
|
|
30
|
+
forbidden_patterns?: string[];
|
|
31
|
+
required_evidence_refs?: string[];
|
|
32
|
+
structural_edit_plan_required?: boolean;
|
|
33
|
+
structural_edit_waiver?: {
|
|
34
|
+
reason: string;
|
|
35
|
+
evidence_ref: string;
|
|
36
|
+
};
|
|
23
37
|
}
|
|
24
38
|
export interface TaskPlan {
|
|
25
39
|
plan_id: string;
|
|
@@ -44,6 +58,13 @@ export interface TaskPlanAmendment {
|
|
|
44
58
|
vcx_cursor?: string;
|
|
45
59
|
evidence_refs?: string[];
|
|
46
60
|
}
|
|
61
|
+
export type IntentVerificationOutcome = "ok" | "revisit_step" | "replan_required";
|
|
62
|
+
export interface IntentVerificationResult {
|
|
63
|
+
outcome: IntentVerificationOutcome;
|
|
64
|
+
reason: string;
|
|
65
|
+
reason_code?: string;
|
|
66
|
+
uncovered_clauses?: string[];
|
|
67
|
+
}
|
|
47
68
|
export interface SupervisorHooks {
|
|
48
69
|
spawnStep: (step: TaskStep, plan: TaskPlan) => Promise<BridgeResult>;
|
|
49
70
|
createHandoff?: (input: {
|
|
@@ -80,6 +101,26 @@ export interface SupervisorHooks {
|
|
|
80
101
|
summary: string;
|
|
81
102
|
step_id?: string;
|
|
82
103
|
}) => Promise<void>;
|
|
104
|
+
verifyIntent?: (input: {
|
|
105
|
+
plan: TaskPlan;
|
|
106
|
+
step: TaskStep;
|
|
107
|
+
result: BridgeResult;
|
|
108
|
+
intent_contract: unknown;
|
|
109
|
+
vericify_delta?: unknown;
|
|
110
|
+
}) => Promise<IntentVerificationResult>;
|
|
111
|
+
recordIntentVerificationFailure?: (input: {
|
|
112
|
+
plan: TaskPlan;
|
|
113
|
+
step: TaskStep;
|
|
114
|
+
result: BridgeResult;
|
|
115
|
+
verification: IntentVerificationResult;
|
|
116
|
+
from: TaskStepStatus;
|
|
117
|
+
to: TaskStepStatus;
|
|
118
|
+
}) => Promise<void>;
|
|
119
|
+
/** Called when replan_required: returns an amendment to insert steps covering uncovered clauses. */
|
|
120
|
+
replanForClauses?: (input: {
|
|
121
|
+
plan: TaskPlan;
|
|
122
|
+
uncovered_clauses: string[];
|
|
123
|
+
}) => Promise<TaskPlanAmendment | undefined>;
|
|
83
124
|
}
|
|
84
125
|
export interface SupervisorRunResult {
|
|
85
126
|
plan: TaskPlan;
|
|
@@ -87,6 +128,7 @@ export interface SupervisorRunResult {
|
|
|
87
128
|
job_ids: string[];
|
|
88
129
|
circuit_opened: boolean;
|
|
89
130
|
final_gate?: unknown;
|
|
131
|
+
blocked_reason?: string;
|
|
90
132
|
}
|
|
91
133
|
export declare function deriveTaskPlanStatus(plan: TaskPlan): TaskPlanStatus;
|
|
92
134
|
export declare function createTaskPlan(input: {
|