patchwork-os 0.2.0-beta.5.canary.95 → 0.2.0-beta.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ajv2020.d.ts +25 -0
- package/dist/ajv2020.js +33 -0
- package/dist/ajv2020.js.map +1 -0
- package/dist/approvalQueue.d.ts +17 -0
- package/dist/approvalQueue.js.map +1 -1
- package/dist/bridge.js +16 -0
- package/dist/bridge.js.map +1 -1
- package/dist/commands/recipeInstall.js +5 -1
- package/dist/commands/recipeInstall.js.map +1 -1
- package/dist/commands/tools.d.ts +20 -1
- package/dist/commands/tools.js +112 -3
- package/dist/commands/tools.js.map +1 -1
- package/dist/haltPushDispatch.d.ts +33 -0
- package/dist/haltPushDispatch.js +103 -0
- package/dist/haltPushDispatch.js.map +1 -0
- package/dist/inboxRoutes.d.ts +22 -0
- package/dist/inboxRoutes.js +61 -1
- package/dist/inboxRoutes.js.map +1 -1
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -1
- package/dist/oauthRoutes.d.ts +1 -1
- package/dist/oauthRoutes.js +2 -2
- package/dist/recipeRoutes.js +133 -65
- package/dist/recipeRoutes.js.map +1 -1
- package/dist/recipes/githubInstallSource.d.ts +66 -0
- package/dist/recipes/githubInstallSource.js +85 -4
- package/dist/recipes/githubInstallSource.js.map +1 -1
- package/dist/recipes/haltCategory.d.ts +4 -0
- package/dist/recipes/haltCategory.js +6 -0
- package/dist/recipes/haltCategory.js.map +1 -1
- package/dist/recipes/names.d.ts +20 -0
- package/dist/recipes/names.js +25 -0
- package/dist/recipes/names.js.map +1 -1
- package/dist/recipes/parser.js +7 -2
- package/dist/recipes/parser.js.map +1 -1
- package/dist/recipes/stepObservation.js +9 -0
- package/dist/recipes/stepObservation.js.map +1 -1
- package/dist/recipes/tools/fanOut.d.ts +20 -0
- package/dist/recipes/tools/fanOut.js +199 -0
- package/dist/recipes/tools/fanOut.js.map +1 -0
- package/dist/recipes/tools/index.d.ts +1 -0
- package/dist/recipes/tools/index.js +1 -0
- package/dist/recipes/tools/index.js.map +1 -1
- package/dist/recipes/tools/slack.js +1 -1
- package/dist/recipes/validation.js +2 -2
- package/dist/recipes/validation.js.map +1 -1
- package/dist/recipes/workspaceRoot.d.ts +37 -0
- package/dist/recipes/workspaceRoot.js +73 -0
- package/dist/recipes/workspaceRoot.js.map +1 -0
- package/dist/recipes/yamlRunner.d.ts +72 -0
- package/dist/recipes/yamlRunner.js +621 -295
- package/dist/recipes/yamlRunner.js.map +1 -1
- package/dist/runLog.d.ts +22 -0
- package/dist/runLog.js +12 -1
- package/dist/runLog.js.map +1 -1
- package/dist/server.d.ts +14 -0
- package/dist/server.js +36 -3
- package/dist/server.js.map +1 -1
- package/dist/tools/batchLsp.d.ts +3 -0
- package/dist/tools/cancelClaudeTask.d.ts +1 -0
- package/dist/tools/clipboard.d.ts +2 -0
- package/dist/tools/closeTabs.d.ts +1 -0
- package/dist/tools/codeLens.d.ts +1 -0
- package/dist/tools/createIssueFromAIComment.d.ts +1 -0
- package/dist/tools/ctxSaveTrace.d.ts +1 -0
- package/dist/tools/debug.d.ts +4 -0
- package/dist/tools/decorations.d.ts +2 -0
- package/dist/tools/documentLinks.d.ts +1 -0
- package/dist/tools/editText.d.ts +1 -0
- package/dist/tools/enrichCommit.d.ts +1 -0
- package/dist/tools/explainDiagnostic.d.ts +1 -0
- package/dist/tools/explainSymbol.d.ts +1 -0
- package/dist/tools/fileOperations.d.ts +3 -0
- package/dist/tools/fileWatcher.d.ts +2 -0
- package/dist/tools/findFiles.d.ts +1 -0
- package/dist/tools/fixAllLintErrors.d.ts +1 -0
- package/dist/tools/foldingRanges.d.ts +1 -0
- package/dist/tools/formatDocument.d.ts +1 -0
- package/dist/tools/generateTests.d.ts +1 -0
- package/dist/tools/getAIComments.d.ts +1 -0
- package/dist/tools/getBufferContent.d.ts +1 -0
- package/dist/tools/getChangeImpact.d.ts +1 -0
- package/dist/tools/getClaudeTaskStatus.d.ts +1 -0
- package/dist/tools/getCodeCoverage.d.ts +1 -0
- package/dist/tools/getCommitsForIssue.d.ts +1 -0
- package/dist/tools/getDebugState.d.ts +1 -0
- package/dist/tools/getDocumentSymbols.d.ts +1 -0
- package/dist/tools/getGitHotspots.d.ts +1 -0
- package/dist/tools/getImportedSignatures.d.ts +1 -0
- package/dist/tools/getPRTemplate.d.ts +1 -0
- package/dist/tools/getSymbolHistory.d.ts +1 -0
- package/dist/tools/getTypeSignature.d.ts +1 -0
- package/dist/tools/getWorkspaceSettings.d.ts +1 -0
- package/dist/tools/gitWrite.d.ts +11 -0
- package/dist/tools/github/actions.d.ts +2 -0
- package/dist/tools/github/composite.d.ts +3 -0
- package/dist/tools/github/issues.d.ts +4 -0
- package/dist/tools/github/pr.d.ts +7 -0
- package/dist/tools/handoffNote.d.ts +1 -0
- package/dist/tools/hoverAtCursor.d.ts +1 -0
- package/dist/tools/httpClient.d.ts +2 -0
- package/dist/tools/inlayHints.d.ts +1 -0
- package/dist/tools/launchQuickTask.d.ts +1 -0
- package/dist/tools/listClaudeTasks.d.ts +1 -0
- package/dist/tools/listTerminals.d.ts +1 -0
- package/dist/tools/lsp.d.ts +15 -0
- package/dist/tools/navigateToSymbolByName.d.ts +1 -0
- package/dist/tools/openDiff.d.ts +1 -0
- package/dist/tools/openFile.d.ts +1 -0
- package/dist/tools/organizeImports.d.ts +1 -0
- package/dist/tools/planPersistence.d.ts +3 -0
- package/dist/tools/previewEdit.d.ts +1 -0
- package/dist/tools/refactorAnalyze.d.ts +1 -0
- package/dist/tools/refactorPreview.d.ts +1 -0
- package/dist/tools/replaceBlock.d.ts +1 -0
- package/dist/tools/resumeClaudeTask.d.ts +1 -0
- package/dist/tools/runClaudeTask.d.ts +1 -0
- package/dist/tools/screenshot.d.ts +1 -0
- package/dist/tools/searchAndReplace.d.ts +1 -0
- package/dist/tools/searchWorkspace.d.ts +1 -0
- package/dist/tools/selectionRanges.d.ts +1 -0
- package/dist/tools/semanticTokens.d.ts +1 -0
- package/dist/tools/signatureHelp.d.ts +1 -0
- package/dist/tools/terminal.d.ts +6 -0
- package/dist/tools/testTraceToSource.d.ts +1 -0
- package/dist/tools/transaction.d.ts +4 -0
- package/dist/tools/typeHierarchy.d.ts +1 -0
- package/dist/tools/utils.d.ts +18 -0
- package/dist/tools/utils.js +28 -6
- package/dist/tools/utils.js.map +1 -1
- package/dist/tools/vscodeCommands.d.ts +2 -0
- package/dist/tools/vscodeTasks.d.ts +2 -0
- package/dist/tools/workspaceSettings.d.ts +1 -0
- package/dist/transport.js +2 -2
- package/dist/transport.js.map +1 -1
- package/dist/wireHaltPushDispatch.d.ts +38 -0
- package/dist/wireHaltPushDispatch.js +71 -0
- package/dist/wireHaltPushDispatch.js.map +1 -0
- package/package.json +1 -1
|
@@ -51,6 +51,7 @@ import { RunBudget } from "./runBudget.js";
|
|
|
51
51
|
import { detectSilentFail } from "./stepObservation.js";
|
|
52
52
|
// Import tool registry and trigger tool self-registration
|
|
53
53
|
import { applyToolOutputContext, executeTool, getTool, hasTool, registerPluginTools, } from "./toolRegistry.js";
|
|
54
|
+
import { resolveWorkspaceRoot } from "./workspaceRoot.js";
|
|
54
55
|
import "./tools/index.js";
|
|
55
56
|
/**
|
|
56
57
|
* Bundled-templates directory used as a third allowed root for nested-recipe
|
|
@@ -127,7 +128,119 @@ export function evaluateExpect(result, expect) {
|
|
|
127
128
|
}
|
|
128
129
|
return failures;
|
|
129
130
|
}
|
|
131
|
+
/**
|
|
132
|
+
* Lazy AJV for `step.expect.schema`. Initialised on first use so recipes
|
|
133
|
+
* without schema assertions don't pay the import/compile cost.
|
|
134
|
+
*/
|
|
135
|
+
let _stepExpectAjv;
|
|
136
|
+
async function getStepExpectAjv() {
|
|
137
|
+
if (!_stepExpectAjv) {
|
|
138
|
+
const { createAjv2020 } = await import("../ajv2020.js");
|
|
139
|
+
_stepExpectAjv = createAjv2020({ strict: false, allErrors: true });
|
|
140
|
+
}
|
|
141
|
+
return _stepExpectAjv;
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Stringify a step value for assertion purposes. Strings pass through;
|
|
145
|
+
* other values JSON.stringify so `matches`/`contains` see something stable.
|
|
146
|
+
*/
|
|
147
|
+
function stringifyForAssert(value) {
|
|
148
|
+
if (typeof value === "string")
|
|
149
|
+
return value;
|
|
150
|
+
try {
|
|
151
|
+
return JSON.stringify(value);
|
|
152
|
+
}
|
|
153
|
+
catch {
|
|
154
|
+
return String(value);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Evaluate a per-step `expect` block against the step's output value.
|
|
159
|
+
* Returns the list of failure messages (empty = all assertions passed).
|
|
160
|
+
*
|
|
161
|
+
* Slice 2 of the agentic-workflow primitives. v1 supports
|
|
162
|
+
* schema/equals/matches/contains; `on_fail: judge` deliberately omitted —
|
|
163
|
+
* see comment on `StepExpect`.
|
|
164
|
+
*/
|
|
165
|
+
export async function evaluateStepExpect(expect, value) {
|
|
166
|
+
const failures = [];
|
|
167
|
+
const asString = stringifyForAssert(value);
|
|
168
|
+
if (expect.equals !== undefined) {
|
|
169
|
+
const expected = expect.equals;
|
|
170
|
+
const expectedStr = typeof expected === "string" ? expected : stringifyForAssert(expected);
|
|
171
|
+
if (asString !== expectedStr) {
|
|
172
|
+
failures.push(`equals: expected ${JSON.stringify(expectedStr)}, got ${JSON.stringify(asString)}`);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
if (expect.contains !== undefined) {
|
|
176
|
+
const needles = Array.isArray(expect.contains)
|
|
177
|
+
? expect.contains
|
|
178
|
+
: [expect.contains];
|
|
179
|
+
for (const needle of needles) {
|
|
180
|
+
if (!asString.includes(needle)) {
|
|
181
|
+
failures.push(`contains: missing ${JSON.stringify(needle)}`);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
if (expect.matches !== undefined) {
|
|
186
|
+
let re;
|
|
187
|
+
try {
|
|
188
|
+
re = new RegExp(expect.matches);
|
|
189
|
+
}
|
|
190
|
+
catch (err) {
|
|
191
|
+
failures.push(`matches: invalid regex ${JSON.stringify(expect.matches)} (${err instanceof Error ? err.message : String(err)})`);
|
|
192
|
+
return failures;
|
|
193
|
+
}
|
|
194
|
+
if (!re.test(asString)) {
|
|
195
|
+
failures.push(`matches: ${JSON.stringify(expect.matches)} did not match output`);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
if (expect.schema !== undefined) {
|
|
199
|
+
let parsed;
|
|
200
|
+
try {
|
|
201
|
+
parsed = typeof value === "string" ? JSON.parse(value) : value;
|
|
202
|
+
}
|
|
203
|
+
catch {
|
|
204
|
+
failures.push(`schema: output is not valid JSON`);
|
|
205
|
+
return failures;
|
|
206
|
+
}
|
|
207
|
+
try {
|
|
208
|
+
const ajv = await getStepExpectAjv();
|
|
209
|
+
const validate = ajv.compile(expect.schema);
|
|
210
|
+
if (!validate(parsed)) {
|
|
211
|
+
const errs = (validate.errors ?? [])
|
|
212
|
+
.map((e) => `${e.instancePath || "/"} ${e.message ?? "invalid"}`)
|
|
213
|
+
.join("; ");
|
|
214
|
+
failures.push(`schema: ${errs || "validation failed"}`);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
catch (err) {
|
|
218
|
+
failures.push(`schema: compile error (${err instanceof Error ? err.message : String(err)})`);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
return failures;
|
|
222
|
+
}
|
|
130
223
|
// Strip tool-call narration some models (e.g. Gemini) prepend before the markdown block.
|
|
224
|
+
/**
|
|
225
|
+
* Phase 0β — separator-agnostic inbox-path detector. Extracted so the
|
|
226
|
+
* Windows path-separator behaviour can be unit-tested by injecting
|
|
227
|
+
* `path.win32` / `path.posix` without booting a real recipe runner.
|
|
228
|
+
*
|
|
229
|
+
* Returns true when `candidate` resolves to a direct child of
|
|
230
|
+
* `inboxDirAbs`, isn't a dotfile, and lives in (not above) the inbox
|
|
231
|
+
* dir. Both arguments must already be platform-appropriate absolute
|
|
232
|
+
* paths (resolve them with the same path module before calling).
|
|
233
|
+
*/
|
|
234
|
+
export function isInboxPathFor(candidate, inboxDirAbs, pathMod) {
|
|
235
|
+
const target = pathMod.resolve(candidate);
|
|
236
|
+
const rel = pathMod.relative(inboxDirAbs, target);
|
|
237
|
+
if (!rel || rel.startsWith("..") || pathMod.isAbsolute(rel))
|
|
238
|
+
return false;
|
|
239
|
+
if (pathMod.basename(target).startsWith("."))
|
|
240
|
+
return false;
|
|
241
|
+
// Only direct children — `~/.patchwork/inbox/foo.md`, not nested.
|
|
242
|
+
return !rel.includes(pathMod.sep);
|
|
243
|
+
}
|
|
131
244
|
function stripLeadingNarration(text) {
|
|
132
245
|
const lines = text.split("\n");
|
|
133
246
|
const firstMarkdown = lines.findIndex((l) => /^(#|>|`|\||[-*+] |\d+\. |\*\*)/.test(l.trimStart()));
|
|
@@ -239,6 +352,84 @@ export async function runYamlRecipe(recipe, deps = {}, seedContext = {}) {
|
|
|
239
352
|
...seedContext,
|
|
240
353
|
};
|
|
241
354
|
const stepDeps = resolveStepDeps(deps, { recipeName: recipe.name });
|
|
355
|
+
// Phase 0β — inbox provenance. When a recipe `file.write` / `file.append`
|
|
356
|
+
// step targets `~/.patchwork/inbox/`, prepend a YAML frontmatter block
|
|
357
|
+
// (first write only) recording recipe + run + trigger, and accumulate the
|
|
358
|
+
// delivered filename onto the run record's `inboxOutputs`. Old recipes /
|
|
359
|
+
// non-inbox paths pass through unchanged.
|
|
360
|
+
//
|
|
361
|
+
// Windows path-separator fix (CI repro 2026-05-20): the original
|
|
362
|
+
// implementation built the prefix as `${os.homedir()}/.patchwork/inbox/`
|
|
363
|
+
// and compared with `startsWith`, which failed on Windows where
|
|
364
|
+
// resolved absolute paths use `\` separators and `os.homedir()` returns
|
|
365
|
+
// `C:\Users\...`. Now we resolve both sides through `path.resolve()`
|
|
366
|
+
// and use `path.relative()` to detect containment so the comparison is
|
|
367
|
+
// separator-agnostic. Also case-insensitive on Win32 (NTFS).
|
|
368
|
+
const inboxDirAbs = path.resolve(path.join(os.homedir(), ".patchwork", "inbox"));
|
|
369
|
+
const inboxOutputs = [];
|
|
370
|
+
const isInboxPath = (abs) => isInboxPathFor(abs, inboxDirAbs, path);
|
|
371
|
+
const buildFrontmatter = () => {
|
|
372
|
+
const triggerKindAtWrite = yamlTriggerKind;
|
|
373
|
+
const lines = ["---", `recipe: ${recipe.name}`];
|
|
374
|
+
if (runSeq !== undefined)
|
|
375
|
+
lines.push(`runSeq: ${runSeq}`);
|
|
376
|
+
lines.push(`trigger: ${triggerKindAtWrite}`, `deliveredAt: ${new Date().toISOString()}`, "---", "", "");
|
|
377
|
+
return lines.join("\n");
|
|
378
|
+
};
|
|
379
|
+
const recordInboxDelivery = (abs) => {
|
|
380
|
+
inboxOutputs.push({
|
|
381
|
+
filename: path.basename(abs),
|
|
382
|
+
deliveredAt: Date.now(),
|
|
383
|
+
});
|
|
384
|
+
};
|
|
385
|
+
// Atomic read-or-default: a single `readFileSync` in a try/catch. No
|
|
386
|
+
// `existsSync`/`statSync` probe around the write — on Windows a stat
|
|
387
|
+
// immediately before write can race a concurrent fd holder and surface
|
|
388
|
+
// `EBUSY`/`EPERM`. The read either succeeds (file present) or throws
|
|
389
|
+
// ENOENT (treated as new file). Either way we never stat the same path
|
|
390
|
+
// we're about to write.
|
|
391
|
+
const readExistingOrEmpty = (abs) => {
|
|
392
|
+
try {
|
|
393
|
+
return readFileSync(abs, "utf-8");
|
|
394
|
+
}
|
|
395
|
+
catch {
|
|
396
|
+
return "";
|
|
397
|
+
}
|
|
398
|
+
};
|
|
399
|
+
const originalWrite = stepDeps.writeFile;
|
|
400
|
+
const originalAppend = stepDeps.appendFile;
|
|
401
|
+
stepDeps.writeFile = (p, content) => {
|
|
402
|
+
if (isInboxPath(p)) {
|
|
403
|
+
// First-write detection by content shape, not by stat. Empty string
|
|
404
|
+
// (ENOENT) and any file that does NOT already begin with `---\n`
|
|
405
|
+
// gets frontmatter; pre-frontmattered files are overwritten as-is
|
|
406
|
+
// so consumers can replay a recipe without doubling the header.
|
|
407
|
+
const existing = readExistingOrEmpty(p);
|
|
408
|
+
const hasFm = existing.startsWith("---\n");
|
|
409
|
+
const final = hasFm ? content : buildFrontmatter() + content;
|
|
410
|
+
originalWrite(p, final);
|
|
411
|
+
recordInboxDelivery(p);
|
|
412
|
+
return;
|
|
413
|
+
}
|
|
414
|
+
originalWrite(p, content);
|
|
415
|
+
};
|
|
416
|
+
stepDeps.appendFile = (p, content) => {
|
|
417
|
+
if (isInboxPath(p)) {
|
|
418
|
+
// file.append: never re-prepend. If file is brand-new, seed one
|
|
419
|
+
// frontmatter block so an append-only recipe still gets
|
|
420
|
+
// provenance. Same atomic read-or-default — no stat probe.
|
|
421
|
+
const existing = readExistingOrEmpty(p);
|
|
422
|
+
if (existing.length === 0) {
|
|
423
|
+
originalWrite(p, buildFrontmatter() + content);
|
|
424
|
+
}
|
|
425
|
+
else {
|
|
426
|
+
originalAppend(p, content);
|
|
427
|
+
}
|
|
428
|
+
recordInboxDelivery(p);
|
|
429
|
+
return;
|
|
430
|
+
}
|
|
431
|
+
originalAppend(p, content);
|
|
432
|
+
};
|
|
242
433
|
// PR2b: one per-run budget shared across all agent steps. Absent
|
|
243
434
|
// `recipe.budget` → no enforcement, no overhead.
|
|
244
435
|
const runBudget = new RunBudget(recipe.budget);
|
|
@@ -316,348 +507,459 @@ export async function runYamlRecipe(recipe, deps = {}, seedContext = {}) {
|
|
|
316
507
|
// Track per-step start timestamps so done events carry durationMs
|
|
317
508
|
// without a second roundtrip.
|
|
318
509
|
const stepStartTs = new Map();
|
|
319
|
-
for
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
510
|
+
// Emit recipe_step_done for the step result just pushed onto
|
|
511
|
+
// `stepResults`. Every loop branch (skip / budget / agent / tool)
|
|
512
|
+
// pushes exactly one result before it ends, so the last element is
|
|
513
|
+
// always the current step. `stepId` mirrors recipe_step_start's
|
|
514
|
+
// `stepIdForEmit` so live consumers can correlate start↔done — the
|
|
515
|
+
// pushed result's own id can diverge for agent steps without `into`.
|
|
516
|
+
const emitStepDone = (stepIdForEmit) => {
|
|
517
|
+
const justPushed = stepResults[stepResults.length - 1];
|
|
518
|
+
if (!justPushed)
|
|
519
|
+
return;
|
|
520
|
+
const haltReason = justPushed.haltReason;
|
|
521
|
+
emit("recipe_step_done", {
|
|
324
522
|
runSeq,
|
|
325
523
|
recipeName: recipe.name,
|
|
326
524
|
stepId: stepIdForEmit,
|
|
327
|
-
tool:
|
|
328
|
-
|
|
525
|
+
tool: justPushed.tool,
|
|
526
|
+
status: justPushed.status,
|
|
527
|
+
durationMs: justPushed.durationMs,
|
|
528
|
+
...(justPushed.error !== undefined && { error: justPushed.error }),
|
|
529
|
+
...(haltReason !== undefined && {
|
|
530
|
+
haltReason,
|
|
531
|
+
haltCategory: categoriseHaltReason(haltReason),
|
|
532
|
+
}),
|
|
533
|
+
ts: Date.now(),
|
|
329
534
|
});
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
535
|
+
};
|
|
536
|
+
// The step loop is wrapped so an uncaught throw from any unguarded
|
|
537
|
+
// call site (a `when`/prompt render on a malformed step, a path-jail
|
|
538
|
+
// re-check, etc.) cannot escape `runYamlRecipe` and strand the
|
|
539
|
+
// run-log entry at "running" forever. On throw we capture the
|
|
540
|
+
// message into `runError` and fall through to the normal
|
|
541
|
+
// finalization path, which marks the run "error".
|
|
542
|
+
try {
|
|
543
|
+
for (const step of recipe.steps) {
|
|
544
|
+
const stepIdForEmit = step.into ?? step.agent?.into ?? `step_${stepsRun}`;
|
|
545
|
+
const stepTs = Date.now();
|
|
546
|
+
stepStartTs.set(stepIdForEmit, stepTs);
|
|
547
|
+
emit("recipe_step_start", {
|
|
548
|
+
runSeq,
|
|
549
|
+
recipeName: recipe.name,
|
|
550
|
+
stepId: stepIdForEmit,
|
|
551
|
+
tool: step.agent ? "agent" : step.tool,
|
|
552
|
+
ts: stepTs,
|
|
553
|
+
});
|
|
554
|
+
// Evaluate `when` guard before running anything. Mirrors
|
|
555
|
+
// chainedRunner.ts:248-266 — render the template, then truthy-check the
|
|
556
|
+
// result (empty string, "0", "false", "null", "undefined" are falsy).
|
|
557
|
+
// A falsy guard records the step as `skipped`, increments stepsRun, and
|
|
558
|
+
// continues — it is NOT a failure. Bridge-dev iMessage recipes rely on
|
|
559
|
+
// this to suppress the iMessage agent step when phone is empty.
|
|
560
|
+
if (typeof step.when === "string" && step.when.length > 0) {
|
|
561
|
+
const rendered = render(step.when, ctx).trim().toLowerCase();
|
|
562
|
+
const truthy = !!rendered &&
|
|
563
|
+
rendered !== "0" &&
|
|
564
|
+
rendered !== "false" &&
|
|
565
|
+
rendered !== "null" &&
|
|
566
|
+
rendered !== "undefined";
|
|
567
|
+
if (!truthy) {
|
|
568
|
+
const skipId = step.into ?? step.agent?.into ?? `step_${stepsRun}`;
|
|
569
|
+
stepResults.push({
|
|
570
|
+
id: skipId,
|
|
571
|
+
tool: step.agent ? "agent" : step.tool,
|
|
572
|
+
status: "skipped",
|
|
573
|
+
durationMs: 0,
|
|
574
|
+
});
|
|
575
|
+
stepsRun++;
|
|
576
|
+
persistLiveStepResults();
|
|
577
|
+
emit("recipe_step_done", {
|
|
578
|
+
runSeq,
|
|
579
|
+
recipeName: recipe.name,
|
|
580
|
+
stepId: skipId,
|
|
581
|
+
tool: step.agent ? "agent" : step.tool,
|
|
582
|
+
status: "skipped",
|
|
583
|
+
durationMs: 0,
|
|
584
|
+
ts: Date.now(),
|
|
585
|
+
});
|
|
586
|
+
continue;
|
|
376
587
|
}
|
|
377
|
-
renderedPrompt += JUDGE_PROMPT_SUFFIX;
|
|
378
588
|
}
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
const agentReturn = await _executeAgent({
|
|
407
|
-
prompt: renderedPrompt,
|
|
408
|
-
driver: agentCfg.driver === "api" ? "anthropic" : agentCfg.driver,
|
|
409
|
-
model: agentCfg.model,
|
|
410
|
-
...(agentCfg.mcpAccess !== undefined && {
|
|
411
|
-
mcpAccess: agentCfg.mcpAccess,
|
|
412
|
-
}),
|
|
413
|
-
}, buildAgentExecutorDeps(stepDeps, deps));
|
|
414
|
-
agentResult = agentReturn.text;
|
|
415
|
-
runBudget.reconcile(agentCfg.driver === "api" ? "anthropic" : (agentCfg.driver ?? "auto"), agentReturn.usage);
|
|
416
|
-
// Catch both `[agent step failed: ...]` (existing) and the
|
|
417
|
-
// silent-fail patterns `[agent step skipped: ...]` etc. via the
|
|
418
|
-
// shared detector. Per-step opt-out via `silentFailDetection: false`.
|
|
419
|
-
const agentSilentFail = step.silentFailDetection !== false
|
|
420
|
-
? detectSilentFail(agentResult)
|
|
421
|
-
: null;
|
|
422
|
-
if (agentResult.startsWith("[agent step failed:") || agentSilentFail) {
|
|
423
|
-
const reason = agentSilentFail
|
|
424
|
-
? `silent-fail detected (${agentSilentFail.reason}): ${agentSilentFail.matched}`
|
|
425
|
-
: agentResult;
|
|
589
|
+
// Handle agent steps separately
|
|
590
|
+
if (step.agent) {
|
|
591
|
+
const agentCfg = step.agent;
|
|
592
|
+
const isJudge = agentCfg.kind === "judge";
|
|
593
|
+
// PR3a: judge prompt convention. Append the structured-verdict
|
|
594
|
+
// suffix and, when `reviews: <stepId>` is set, inject the
|
|
595
|
+
// upstream step's output as an <artefact> block.
|
|
596
|
+
let renderedPrompt = render(agentCfg.prompt, ctx);
|
|
597
|
+
if (isJudge) {
|
|
598
|
+
if (agentCfg.reviews) {
|
|
599
|
+
renderedPrompt += buildJudgeArtefactBlock(ctx[agentCfg.reviews]);
|
|
600
|
+
}
|
|
601
|
+
renderedPrompt += JUDGE_PROMPT_SUFFIX;
|
|
602
|
+
}
|
|
603
|
+
const intoKey = agentCfg.into ?? "agent_output";
|
|
604
|
+
const stepId = intoKey;
|
|
605
|
+
const stepStart = Date.now();
|
|
606
|
+
let agentResult;
|
|
607
|
+
// PR2b: per-recipe token budget. Admission check before dispatch;
|
|
608
|
+
// reconcile actual consumption after. Subscription drivers
|
|
609
|
+
// (Claude CLI, provider subprocess) report `usage === undefined`
|
|
610
|
+
// — `RunBudget.reconcile` records a fail-open warning per driver
|
|
611
|
+
// per run and continues.
|
|
612
|
+
const admission = runBudget.admit();
|
|
613
|
+
if (!admission.admitted) {
|
|
614
|
+
const reason = admission.reason ??
|
|
615
|
+
"Run exceeded its token budget — budget_exceeded.";
|
|
426
616
|
runError = runError ?? reason;
|
|
427
617
|
stepResults.push({
|
|
428
618
|
id: stepId,
|
|
429
619
|
tool: "agent",
|
|
430
620
|
status: "error",
|
|
431
621
|
error: reason,
|
|
432
|
-
haltReason:
|
|
433
|
-
|
|
434
|
-
: `Agent step "${stepId}" reported failure.`,
|
|
435
|
-
durationMs: Date.now() - stepStart,
|
|
622
|
+
haltReason: reason,
|
|
623
|
+
durationMs: 0,
|
|
436
624
|
});
|
|
625
|
+
stepsRun++;
|
|
626
|
+
persistLiveStepResults();
|
|
627
|
+
emitStepDone(stepIdForEmit);
|
|
628
|
+
continue;
|
|
437
629
|
}
|
|
438
|
-
|
|
439
|
-
const
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
630
|
+
try {
|
|
631
|
+
const agentReturn = await _executeAgent({
|
|
632
|
+
prompt: renderedPrompt,
|
|
633
|
+
driver: agentCfg.driver === "api" ? "anthropic" : agentCfg.driver,
|
|
634
|
+
model: agentCfg.model,
|
|
635
|
+
...(agentCfg.mcpAccess !== undefined && {
|
|
636
|
+
mcpAccess: agentCfg.mcpAccess,
|
|
637
|
+
}),
|
|
638
|
+
}, buildAgentExecutorDeps(stepDeps, deps));
|
|
639
|
+
agentResult = agentReturn.text;
|
|
640
|
+
runBudget.reconcile(agentCfg.driver === "api"
|
|
641
|
+
? "anthropic"
|
|
642
|
+
: (agentCfg.driver ?? "auto"), agentReturn.usage);
|
|
643
|
+
// Catch both `[agent step failed: ...]` (existing) and the
|
|
644
|
+
// silent-fail patterns `[agent step skipped: ...]` etc. via the
|
|
645
|
+
// shared detector. Per-step opt-out via `silentFailDetection: false`.
|
|
646
|
+
const agentSilentFail = step.silentFailDetection !== false
|
|
647
|
+
? detectSilentFail(agentResult)
|
|
648
|
+
: null;
|
|
649
|
+
if (agentResult.startsWith("[agent step failed:") ||
|
|
650
|
+
agentSilentFail) {
|
|
651
|
+
const reason = agentSilentFail
|
|
652
|
+
? `silent-fail detected (${agentSilentFail.reason}): ${agentSilentFail.matched}`
|
|
653
|
+
: agentResult;
|
|
654
|
+
runError = runError ?? reason;
|
|
443
655
|
stepResults.push({
|
|
444
656
|
id: stepId,
|
|
445
657
|
tool: "agent",
|
|
446
658
|
status: "error",
|
|
447
|
-
error:
|
|
448
|
-
haltReason:
|
|
659
|
+
error: reason,
|
|
660
|
+
haltReason: agentSilentFail
|
|
661
|
+
? `Agent step "${stepId}" returned no usable output (silent-fail: ${agentSilentFail.reason}).`
|
|
662
|
+
: `Agent step "${stepId}" reported failure.`,
|
|
449
663
|
durationMs: Date.now() - stepStart,
|
|
450
664
|
});
|
|
451
665
|
}
|
|
452
666
|
else {
|
|
453
|
-
|
|
667
|
+
const stripped = stripLeadingNarration(agentResult);
|
|
668
|
+
if (!stripped.trim()) {
|
|
669
|
+
const errMsg = `[agent step failed: ${agentCfg.driver ?? "agent"} returned only narration or whitespace — no content]`;
|
|
670
|
+
runError = runError ?? errMsg;
|
|
671
|
+
stepResults.push({
|
|
672
|
+
id: stepId,
|
|
673
|
+
tool: "agent",
|
|
674
|
+
status: "error",
|
|
675
|
+
error: errMsg,
|
|
676
|
+
haltReason: `Agent step "${stepId}" returned only narration or whitespace — no content.`,
|
|
677
|
+
durationMs: Date.now() - stepStart,
|
|
678
|
+
});
|
|
679
|
+
}
|
|
680
|
+
else {
|
|
681
|
+
// Try to parse as JSON so dot-notation ({{meeting.field}}) works
|
|
682
|
+
try {
|
|
683
|
+
const jsonMatch = /```(?:json)?\s*([\s\S]*?)```/.exec(stripped) ?? [null, stripped];
|
|
684
|
+
const parsed = sanitizeParsed(JSON.parse((jsonMatch[1] ?? "").trim()));
|
|
685
|
+
ctx[intoKey] = parsed;
|
|
686
|
+
}
|
|
687
|
+
catch {
|
|
688
|
+
ctx[intoKey] = stripped;
|
|
689
|
+
}
|
|
690
|
+
outputs.push(intoKey);
|
|
691
|
+
// PR3a: parse + stash the judge verdict on the step result.
|
|
692
|
+
// Augment-only: a `request_changes` verdict still yields
|
|
693
|
+
// `status: "ok"`. The verdict surfaces via the runlog +
|
|
694
|
+
// future PR3b dashboard panel, but never gates the run.
|
|
695
|
+
const judgeVerdict = isJudge
|
|
696
|
+
? parseJudgeVerdict(stripped)
|
|
697
|
+
: undefined;
|
|
698
|
+
stepResults.push({
|
|
699
|
+
id: stepId,
|
|
700
|
+
tool: "agent",
|
|
701
|
+
status: "ok",
|
|
702
|
+
...(judgeVerdict !== undefined && { judgeVerdict }),
|
|
703
|
+
durationMs: Date.now() - stepStart,
|
|
704
|
+
});
|
|
705
|
+
// Slice 2 — per-step expect eval. Runs on the value just
|
|
706
|
+
// committed to ctx[intoKey]. Halt failure flips the just-pushed
|
|
707
|
+
// result to error and rolls back the ctx commit so downstream
|
|
708
|
+
// steps don't see a value the recipe author rejected.
|
|
709
|
+
if (step.expect) {
|
|
710
|
+
const failures = await evaluateStepExpect(step.expect, ctx[intoKey]);
|
|
711
|
+
if (failures.length > 0) {
|
|
712
|
+
const onFail = step.expect.on_fail ?? "halt";
|
|
713
|
+
const last = stepResults[stepResults.length - 1];
|
|
714
|
+
if (last) {
|
|
715
|
+
if (onFail === "halt") {
|
|
716
|
+
last.status = "error";
|
|
717
|
+
last.error = `expect_failed: ${failures.join("; ")}`;
|
|
718
|
+
last.haltReason = `expect_failed in step "${stepId}": ${failures.join("; ")}`;
|
|
719
|
+
const fbk = recipe.on_error?.fallback;
|
|
720
|
+
const fbkOpen = fbk === "log_only" || fbk === "deliver_original";
|
|
721
|
+
const failOpenAgent = step.optional === true || fbkOpen;
|
|
722
|
+
if (!failOpenAgent) {
|
|
723
|
+
runError = runError ?? last.haltReason;
|
|
724
|
+
}
|
|
725
|
+
delete ctx[intoKey];
|
|
726
|
+
}
|
|
727
|
+
else {
|
|
728
|
+
last.expectWarnings = failures;
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
catch (err) {
|
|
737
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
738
|
+
runError = runError ?? `agent step "${stepId}" failed: ${msg}`;
|
|
739
|
+
stepResults.push({
|
|
740
|
+
id: stepId,
|
|
741
|
+
tool: "agent",
|
|
742
|
+
status: "error",
|
|
743
|
+
error: msg,
|
|
744
|
+
haltReason: `Agent step "${stepId}" threw before completing: ${msg}`,
|
|
745
|
+
durationMs: Date.now() - stepStart,
|
|
746
|
+
});
|
|
747
|
+
}
|
|
748
|
+
stepsRun++;
|
|
749
|
+
persistLiveStepResults();
|
|
750
|
+
emitStepDone(stepIdForEmit);
|
|
751
|
+
continue;
|
|
752
|
+
}
|
|
753
|
+
const stepStart = Date.now();
|
|
754
|
+
const stepId = step.into ?? `step_${stepsRun}`;
|
|
755
|
+
// Resolve retry policy: step-level overrides recipe-level.
|
|
756
|
+
const retryCount = step.retry ?? recipe.on_error?.retry ?? 0;
|
|
757
|
+
const retryDelayMs = step.retryDelay ?? recipe.on_error?.retryDelay ?? 1000;
|
|
758
|
+
let result = null;
|
|
759
|
+
let stepError;
|
|
760
|
+
let thrownError;
|
|
761
|
+
let thrownErrorCode;
|
|
762
|
+
for (let attempt = 0; attempt <= retryCount; attempt++) {
|
|
763
|
+
if (attempt > 0) {
|
|
764
|
+
await new Promise((r) => setTimeout(r, retryDelayMs));
|
|
765
|
+
}
|
|
766
|
+
stepError = undefined;
|
|
767
|
+
thrownError = undefined;
|
|
768
|
+
thrownErrorCode = undefined;
|
|
769
|
+
try {
|
|
770
|
+
// Slice (sandbox-alternative): per-step wall-clock timeout via
|
|
771
|
+
// Promise.race. The underlying tool keeps running in the
|
|
772
|
+
// background — this is a halt signal for the runner, not a
|
|
773
|
+
// process kill. The thrown error carries a `step_timeout`
|
|
774
|
+
// prefix so categoriseHaltReason maps it correctly.
|
|
775
|
+
const timeoutMs = typeof step.timeout_ms === "number" && step.timeout_ms > 0
|
|
776
|
+
? step.timeout_ms
|
|
777
|
+
: 0;
|
|
778
|
+
if (timeoutMs > 0) {
|
|
779
|
+
let timer;
|
|
780
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
781
|
+
timer = setTimeout(() => {
|
|
782
|
+
reject(new Error(`step_timeout: exceeded ${timeoutMs}ms in step "${step.into ?? step.tool ?? "?"}"`));
|
|
783
|
+
}, timeoutMs);
|
|
784
|
+
});
|
|
785
|
+
try {
|
|
786
|
+
result = await Promise.race([
|
|
787
|
+
executeStep(step, ctx, stepDeps),
|
|
788
|
+
timeoutPromise,
|
|
789
|
+
]);
|
|
790
|
+
}
|
|
791
|
+
finally {
|
|
792
|
+
if (timer)
|
|
793
|
+
clearTimeout(timer);
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
else {
|
|
797
|
+
result = await executeStep(step, ctx, stepDeps);
|
|
798
|
+
}
|
|
799
|
+
// Detect tool-level errors reported as JSON {ok: false, error: ...}
|
|
800
|
+
if (result !== null) {
|
|
454
801
|
try {
|
|
455
|
-
const
|
|
456
|
-
|
|
457
|
-
|
|
802
|
+
const parsed = JSON.parse(result);
|
|
803
|
+
if (parsed.ok === false && typeof parsed.error === "string") {
|
|
804
|
+
stepError = parsed.error;
|
|
805
|
+
}
|
|
458
806
|
}
|
|
459
807
|
catch {
|
|
460
|
-
|
|
808
|
+
/* non-JSON result is fine */
|
|
461
809
|
}
|
|
462
|
-
outputs.push(intoKey);
|
|
463
|
-
// PR3a: parse + stash the judge verdict on the step result.
|
|
464
|
-
// Augment-only: a `request_changes` verdict still yields
|
|
465
|
-
// `status: "ok"`. The verdict surfaces via the runlog +
|
|
466
|
-
// future PR3b dashboard panel, but never gates the run.
|
|
467
|
-
const judgeVerdict = isJudge
|
|
468
|
-
? parseJudgeVerdict(stripped)
|
|
469
|
-
: undefined;
|
|
470
|
-
stepResults.push({
|
|
471
|
-
id: stepId,
|
|
472
|
-
tool: "agent",
|
|
473
|
-
status: "ok",
|
|
474
|
-
...(judgeVerdict !== undefined && { judgeVerdict }),
|
|
475
|
-
durationMs: Date.now() - stepStart,
|
|
476
|
-
});
|
|
477
810
|
}
|
|
811
|
+
// Silent-fail detection: tools that return string placeholders
|
|
812
|
+
// (`(git branches unavailable)`, `[agent step skipped: ...]`)
|
|
813
|
+
// or empty list-tool error shapes (`{count:0,error:"..."}`)
|
|
814
|
+
// succeed with bad data — flag them as `error` so the runner
|
|
815
|
+
// doesn't quietly hand garbage to a downstream agent. Per-step
|
|
816
|
+
// opt-out via `silentFailDetection: false`.
|
|
817
|
+
if (!stepError &&
|
|
818
|
+
result !== null &&
|
|
819
|
+
step.silentFailDetection !== false) {
|
|
820
|
+
const detected = detectSilentFail(result);
|
|
821
|
+
if (detected) {
|
|
822
|
+
stepError = `silent-fail detected (${detected.reason}): ${detected.matched}`;
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
catch (err) {
|
|
827
|
+
thrownError = err instanceof Error ? err.message : String(err);
|
|
828
|
+
// Preserve structured error codes (e.g. recipe_path_jail_escape)
|
|
829
|
+
// so callers and tests can branch on `err.code` per R2 M-4
|
|
830
|
+
// without scraping the message string.
|
|
831
|
+
const code = err?.code;
|
|
832
|
+
if (typeof code === "string")
|
|
833
|
+
thrownErrorCode = code;
|
|
834
|
+
result = null;
|
|
478
835
|
}
|
|
836
|
+
if (!stepError && !thrownError)
|
|
837
|
+
break;
|
|
479
838
|
}
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
839
|
+
// Recipe-level fallback: log_only / deliver_original treat step failure
|
|
840
|
+
// as non-fatal (fail-open) — same semantics as step-level optional: true.
|
|
841
|
+
const fallback = recipe.on_error?.fallback;
|
|
842
|
+
const fallbackFailOpen = fallback === "log_only" || fallback === "deliver_original";
|
|
843
|
+
const failOpen = step.optional === true || fallbackFailOpen;
|
|
844
|
+
if (thrownError) {
|
|
845
|
+
const retryNote = retryCount > 0 ? ` after ${retryCount + 1} attempts` : "";
|
|
483
846
|
stepResults.push({
|
|
484
847
|
id: stepId,
|
|
485
|
-
tool:
|
|
848
|
+
tool: step.tool,
|
|
486
849
|
status: "error",
|
|
487
|
-
error:
|
|
488
|
-
|
|
850
|
+
error: thrownError,
|
|
851
|
+
...(thrownErrorCode ? { errorCode: thrownErrorCode } : {}),
|
|
852
|
+
haltReason: `Tool "${step.tool ?? "?"}" in step "${stepId}" threw${retryNote}: ${thrownError}`,
|
|
489
853
|
durationMs: Date.now() - stepStart,
|
|
490
854
|
});
|
|
855
|
+
if (!failOpen) {
|
|
856
|
+
runError = runError ?? `${step.tool} failed: ${thrownError}`;
|
|
857
|
+
}
|
|
858
|
+
else if (fallbackFailOpen && !step.optional) {
|
|
859
|
+
console.warn(`step ${stepId} failed but on_error.fallback=${fallback} — treating as non-fatal: ${thrownError}`);
|
|
860
|
+
}
|
|
491
861
|
}
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
862
|
+
else {
|
|
863
|
+
const finalStatus = result === null ? "skipped" : stepError ? "error" : "ok";
|
|
864
|
+
const retryNote = retryCount > 0 ? ` after ${retryCount + 1} attempts` : "";
|
|
865
|
+
stepResults.push({
|
|
866
|
+
id: stepId,
|
|
867
|
+
tool: step.tool,
|
|
868
|
+
status: finalStatus,
|
|
869
|
+
error: stepError,
|
|
870
|
+
...(finalStatus === "error" && stepError
|
|
871
|
+
? {
|
|
872
|
+
haltReason: `Tool "${step.tool ?? "?"}" in step "${stepId}" reported an error${retryNote}: ${stepError}`,
|
|
873
|
+
}
|
|
874
|
+
: {}),
|
|
875
|
+
durationMs: Date.now() - stepStart,
|
|
876
|
+
});
|
|
877
|
+
if (stepError) {
|
|
878
|
+
if (!failOpen) {
|
|
879
|
+
runError = runError ?? `${step.tool} failed: ${stepError}`;
|
|
880
|
+
}
|
|
881
|
+
else if (fallbackFailOpen && !step.optional) {
|
|
882
|
+
console.warn(`step ${stepId} failed but on_error.fallback=${fallback} — treating as non-fatal: ${stepError}`);
|
|
883
|
+
}
|
|
884
|
+
}
|
|
508
885
|
}
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
result = await executeStep(step, ctx, stepDeps);
|
|
514
|
-
// Detect tool-level errors reported as JSON {ok: false, error: ...}
|
|
515
|
-
if (result !== null) {
|
|
886
|
+
stepsRun++;
|
|
887
|
+
if (result !== null) {
|
|
888
|
+
// Apply transform if present — render template with $result injected
|
|
889
|
+
if (step.transform) {
|
|
516
890
|
try {
|
|
517
|
-
|
|
518
|
-
if (parsed.ok === false && typeof parsed.error === "string") {
|
|
519
|
-
stepError = parsed.error;
|
|
520
|
-
}
|
|
891
|
+
result = render(step.transform, { ...ctx, $result: result });
|
|
521
892
|
}
|
|
522
|
-
catch {
|
|
523
|
-
|
|
893
|
+
catch (err) {
|
|
894
|
+
// warn but fall through with original result
|
|
895
|
+
console.warn(`transform failed for step ${step.into ?? step.tool ?? "?"}: ${err}`);
|
|
524
896
|
}
|
|
525
897
|
}
|
|
526
|
-
//
|
|
527
|
-
// (
|
|
528
|
-
//
|
|
529
|
-
//
|
|
530
|
-
//
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
898
|
+
// Slice 2 — per-step expect eval. Runs on the post-transform value
|
|
899
|
+
// (what would land in ctx) and only when the step otherwise succeeded.
|
|
900
|
+
// Halt failure flips the just-pushed result to error and suppresses
|
|
901
|
+
// the ctx commit by nulling `result` so the downstream `if (step.into)`
|
|
902
|
+
// block skips. Composes with `optional: true` / `on_error.fallback`.
|
|
903
|
+
if (step.expect && !thrownError && !stepError && result !== null) {
|
|
904
|
+
const failures = await evaluateStepExpect(step.expect, result);
|
|
905
|
+
if (failures.length > 0) {
|
|
906
|
+
const onFail = step.expect.on_fail ?? "halt";
|
|
907
|
+
const last = stepResults[stepResults.length - 1];
|
|
908
|
+
if (last) {
|
|
909
|
+
if (onFail === "halt") {
|
|
910
|
+
last.status = "error";
|
|
911
|
+
last.error = `expect_failed: ${failures.join("; ")}`;
|
|
912
|
+
last.haltReason = `expect_failed in step "${stepId}": ${failures.join("; ")}`;
|
|
913
|
+
if (!failOpen) {
|
|
914
|
+
runError = runError ?? last.haltReason;
|
|
915
|
+
}
|
|
916
|
+
result = null;
|
|
917
|
+
}
|
|
918
|
+
else {
|
|
919
|
+
last.expectWarnings = failures;
|
|
920
|
+
}
|
|
921
|
+
}
|
|
538
922
|
}
|
|
539
923
|
}
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
// so callers and tests can branch on `err.code` per R2 M-4
|
|
545
|
-
// without scraping the message string.
|
|
546
|
-
const code = err?.code;
|
|
547
|
-
if (typeof code === "string")
|
|
548
|
-
thrownErrorCode = code;
|
|
549
|
-
result = null;
|
|
550
|
-
}
|
|
551
|
-
if (!stepError && !thrownError)
|
|
552
|
-
break;
|
|
553
|
-
}
|
|
554
|
-
// Recipe-level fallback: log_only / deliver_original treat step failure
|
|
555
|
-
// as non-fatal (fail-open) — same semantics as step-level optional: true.
|
|
556
|
-
const fallback = recipe.on_error?.fallback;
|
|
557
|
-
const fallbackFailOpen = fallback === "log_only" || fallback === "deliver_original";
|
|
558
|
-
const failOpen = step.optional === true || fallbackFailOpen;
|
|
559
|
-
if (thrownError) {
|
|
560
|
-
const retryNote = retryCount > 0 ? ` after ${retryCount + 1} attempts` : "";
|
|
561
|
-
stepResults.push({
|
|
562
|
-
id: stepId,
|
|
563
|
-
tool: step.tool,
|
|
564
|
-
status: "error",
|
|
565
|
-
error: thrownError,
|
|
566
|
-
...(thrownErrorCode ? { errorCode: thrownErrorCode } : {}),
|
|
567
|
-
haltReason: `Tool "${step.tool ?? "?"}" in step "${stepId}" threw${retryNote}: ${thrownError}`,
|
|
568
|
-
durationMs: Date.now() - stepStart,
|
|
569
|
-
});
|
|
570
|
-
if (!failOpen) {
|
|
571
|
-
runError = runError ?? `${step.tool} failed: ${thrownError}`;
|
|
572
|
-
}
|
|
573
|
-
else if (fallbackFailOpen && !step.optional) {
|
|
574
|
-
console.warn(`step ${stepId} failed but on_error.fallback=${fallback} — treating as non-fatal: ${thrownError}`);
|
|
575
|
-
}
|
|
576
|
-
}
|
|
577
|
-
else {
|
|
578
|
-
const finalStatus = result === null ? "skipped" : stepError ? "error" : "ok";
|
|
579
|
-
const retryNote = retryCount > 0 ? ` after ${retryCount + 1} attempts` : "";
|
|
580
|
-
stepResults.push({
|
|
581
|
-
id: stepId,
|
|
582
|
-
tool: step.tool,
|
|
583
|
-
status: finalStatus,
|
|
584
|
-
error: stepError,
|
|
585
|
-
...(finalStatus === "error" && stepError
|
|
586
|
-
? {
|
|
587
|
-
haltReason: `Tool "${step.tool ?? "?"}" in step "${stepId}" reported an error${retryNote}: ${stepError}`,
|
|
924
|
+
if (result !== null && step.into) {
|
|
925
|
+
ctx[step.into] = result;
|
|
926
|
+
if (step.tool) {
|
|
927
|
+
applyToolOutputContext(step.tool, step.into, result, ctx);
|
|
588
928
|
}
|
|
589
|
-
: {}),
|
|
590
|
-
durationMs: Date.now() - stepStart,
|
|
591
|
-
});
|
|
592
|
-
if (stepError) {
|
|
593
|
-
if (!failOpen) {
|
|
594
|
-
runError = runError ?? `${step.tool} failed: ${stepError}`;
|
|
595
929
|
}
|
|
596
|
-
|
|
597
|
-
|
|
930
|
+
if (step.tool === "file.write" || step.tool === "file.append") {
|
|
931
|
+
// R2 C-1 / F-02: re-validate the rendered path against the jail so a
|
|
932
|
+
// template substitution that survived earlier checks (e.g. via a
|
|
933
|
+
// chained sub-recipe deps override) cannot smuggle an out-of-jail
|
|
934
|
+
// path into the run log / dashboard outputs list.
|
|
935
|
+
const renderedPath = render(step.path, ctx);
|
|
936
|
+
outputs.push(resolveRecipePath(renderedPath, {
|
|
937
|
+
workspace: stepDeps.workdir,
|
|
938
|
+
write: true,
|
|
939
|
+
}));
|
|
598
940
|
}
|
|
599
941
|
}
|
|
942
|
+
persistLiveStepResults();
|
|
943
|
+
emitStepDone(stepIdForEmit);
|
|
600
944
|
}
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
}
|
|
613
|
-
if (step.into) {
|
|
614
|
-
ctx[step.into] = result;
|
|
615
|
-
if (step.tool) {
|
|
616
|
-
applyToolOutputContext(step.tool, step.into, result, ctx);
|
|
617
|
-
}
|
|
618
|
-
}
|
|
619
|
-
if (step.tool === "file.write" || step.tool === "file.append") {
|
|
620
|
-
// R2 C-1 / F-02: re-validate the rendered path against the jail so a
|
|
621
|
-
// template substitution that survived earlier checks (e.g. via a
|
|
622
|
-
// chained sub-recipe deps override) cannot smuggle an out-of-jail
|
|
623
|
-
// path into the run log / dashboard outputs list.
|
|
624
|
-
const renderedPath = render(step.path, ctx);
|
|
625
|
-
outputs.push(resolveRecipePath(renderedPath, {
|
|
626
|
-
workspace: stepDeps.workdir,
|
|
627
|
-
write: true,
|
|
628
|
-
}));
|
|
629
|
-
}
|
|
945
|
+
}
|
|
946
|
+
catch (err) {
|
|
947
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
948
|
+
runError = runError ?? `recipe run aborted: ${msg}`;
|
|
949
|
+
}
|
|
950
|
+
// Evaluate expect block before persisting so failures are stored in the
|
|
951
|
+
// run log. Guarded: a throw here must not skip finalization and strand
|
|
952
|
+
// the run at "running".
|
|
953
|
+
let assertionFailures = [];
|
|
954
|
+
if (recipe.expect) {
|
|
955
|
+
try {
|
|
956
|
+
assertionFailures = evaluateExpect({ stepsRun, outputs, context: ctx, errorMessage: runError }, recipe.expect);
|
|
630
957
|
}
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
// payload mirrors chainedRunner's done event plus haltCategory.
|
|
635
|
-
const justPushed = stepResults
|
|
636
|
-
.slice()
|
|
637
|
-
.reverse()
|
|
638
|
-
.find((r) => r.id === stepIdForEmit);
|
|
639
|
-
if (justPushed) {
|
|
640
|
-
const haltReason = justPushed.haltReason;
|
|
641
|
-
emit("recipe_step_done", {
|
|
642
|
-
runSeq,
|
|
643
|
-
recipeName: recipe.name,
|
|
644
|
-
stepId: justPushed.id,
|
|
645
|
-
tool: justPushed.tool,
|
|
646
|
-
status: justPushed.status,
|
|
647
|
-
durationMs: justPushed.durationMs,
|
|
648
|
-
...(justPushed.error !== undefined && { error: justPushed.error }),
|
|
649
|
-
...(haltReason !== undefined && {
|
|
650
|
-
haltReason,
|
|
651
|
-
haltCategory: categoriseHaltReason(haltReason),
|
|
652
|
-
}),
|
|
653
|
-
ts: Date.now(),
|
|
654
|
-
});
|
|
958
|
+
catch (err) {
|
|
959
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
960
|
+
runError = runError ?? `expect evaluation failed: ${msg}`;
|
|
655
961
|
}
|
|
656
962
|
}
|
|
657
|
-
// Evaluate expect block before persisting so failures are stored in the run log
|
|
658
|
-
const assertionFailures = recipe.expect
|
|
659
|
-
? evaluateExpect({ stepsRun, outputs, context: ctx, errorMessage: runError }, recipe.expect)
|
|
660
|
-
: [];
|
|
661
963
|
// Write to RecipeRunLog so the dashboard Runs page shows this execution.
|
|
662
964
|
// Bridge path: completeRun on the running entry opened above (live-tail).
|
|
663
965
|
// CLI path: construct a local log + appendDirect (no live-tail).
|
|
@@ -686,6 +988,7 @@ export async function runYamlRecipe(recipe, deps = {}, seedContext = {}) {
|
|
|
686
988
|
outputTail,
|
|
687
989
|
...(runError !== undefined && { errorMessage: runError }),
|
|
688
990
|
...(assertionFailures.length > 0 ? { assertionFailures } : {}),
|
|
991
|
+
...(inboxOutputs.length > 0 ? { inboxOutputs } : {}),
|
|
689
992
|
});
|
|
690
993
|
emit("recipe_done", {
|
|
691
994
|
runSeq,
|
|
@@ -693,6 +996,10 @@ export async function runYamlRecipe(recipe, deps = {}, seedContext = {}) {
|
|
|
693
996
|
status: runError ? "error" : "done",
|
|
694
997
|
durationMs: doneAt - recipeStartedAt,
|
|
695
998
|
stepCount: finalStepResults.length,
|
|
999
|
+
// A `done` run can still carry step errors — the runner
|
|
1000
|
+
// continues past a non-fatal step failure. Surface it so
|
|
1001
|
+
// live consumers can show "completed with errors".
|
|
1002
|
+
hadStepErrors: finalStepResults.some((s) => s.status === "error"),
|
|
696
1003
|
...(runError !== undefined && { errorMessage: runError }),
|
|
697
1004
|
...(assertionFailures.length > 0 && {
|
|
698
1005
|
assertionFailureCount: assertionFailures.length,
|
|
@@ -718,6 +1025,7 @@ export async function runYamlRecipe(recipe, deps = {}, seedContext = {}) {
|
|
|
718
1025
|
errorMessage: runError,
|
|
719
1026
|
stepResults: finalStepResults,
|
|
720
1027
|
...(assertionFailures.length > 0 ? { assertionFailures } : {}),
|
|
1028
|
+
...(inboxOutputs.length > 0 ? { inboxOutputs } : {}),
|
|
721
1029
|
});
|
|
722
1030
|
}
|
|
723
1031
|
}
|
|
@@ -772,11 +1080,19 @@ export async function executeStep(step, ctx, deps) {
|
|
|
772
1080
|
// Check if tool is registered in the new registry
|
|
773
1081
|
if (hasTool(toolId)) {
|
|
774
1082
|
const tool = getTool(toolId);
|
|
775
|
-
// Build params with template rendering for string values
|
|
1083
|
+
// Build params with template rendering for string values.
|
|
1084
|
+
// `do` is left raw: it carries a nested sub-step template (used by
|
|
1085
|
+
// `fan_out`) whose `{{item.*}}` placeholders must be rendered per-iter
|
|
1086
|
+
// with the loop variable in scope, not pre-rendered against the outer
|
|
1087
|
+
// ctx (which would resolve them to empty strings).
|
|
776
1088
|
const params = {};
|
|
777
1089
|
for (const [key, value] of Object.entries(step)) {
|
|
778
1090
|
if (key === "tool" || key === "agent" || key === "into")
|
|
779
1091
|
continue;
|
|
1092
|
+
if (key === "do") {
|
|
1093
|
+
params[key] = value;
|
|
1094
|
+
continue;
|
|
1095
|
+
}
|
|
780
1096
|
params[key] = deepRender(value, ctx);
|
|
781
1097
|
}
|
|
782
1098
|
// Check if mock connector is available for this tool
|
|
@@ -1096,8 +1412,16 @@ export function resolveClaudeBinary() {
|
|
|
1096
1412
|
}
|
|
1097
1413
|
return ensureCmdShim("claude");
|
|
1098
1414
|
}
|
|
1099
|
-
function defaultClaudeCodeFn(prompt, _opts) {
|
|
1415
|
+
export function defaultClaudeCodeFn(prompt, _opts) {
|
|
1100
1416
|
const binary = resolveClaudeBinary();
|
|
1417
|
+
// Resolve a workspace cwd so the spawned `claude -p` doesn't inherit the
|
|
1418
|
+
// bridge LaunchAgent's `$HOME` (P2 from the 2026-05-20 research run).
|
|
1419
|
+
// When nothing resolves, surface a typed reason instead of silently
|
|
1420
|
+
// shelling out from the wrong directory.
|
|
1421
|
+
const workspace = resolveWorkspaceRoot();
|
|
1422
|
+
if (!workspace) {
|
|
1423
|
+
return Promise.resolve(`[agent step failed: recipe_no_workspace — no .git ancestor of "${process.cwd()}" and PATCHWORK_WORKSPACE not set. Set PATCHWORK_WORKSPACE in the bridge environment or add a 'workspace:' field to the recipe.]`);
|
|
1424
|
+
}
|
|
1101
1425
|
try {
|
|
1102
1426
|
const result = spawnSync(binary, [
|
|
1103
1427
|
"-p",
|
|
@@ -1106,6 +1430,7 @@ function defaultClaudeCodeFn(prompt, _opts) {
|
|
|
1106
1430
|
"You are a helpful assistant processing a recipe task. Use ONLY the data explicitly provided in the user message — treat it as ground truth. Do not call tools to look up git history, emails, or any other information; all necessary data is already included.",
|
|
1107
1431
|
"--no-session-persistence",
|
|
1108
1432
|
], {
|
|
1433
|
+
cwd: workspace.path,
|
|
1109
1434
|
encoding: "utf-8",
|
|
1110
1435
|
timeout: 120_000,
|
|
1111
1436
|
maxBuffer: 10 * 1024 * 1024,
|
|
@@ -1144,10 +1469,11 @@ function makeProviderDriverFn() {
|
|
|
1144
1469
|
const timeoutMs = 300_000;
|
|
1145
1470
|
const startupTimeoutMs = 30_000;
|
|
1146
1471
|
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
|
1472
|
+
const resolvedWorkspace = process.cwd();
|
|
1147
1473
|
try {
|
|
1148
1474
|
const result = await driver.run({
|
|
1149
1475
|
prompt,
|
|
1150
|
-
workspace:
|
|
1476
|
+
workspace: resolvedWorkspace,
|
|
1151
1477
|
timeoutMs,
|
|
1152
1478
|
startupTimeoutMs,
|
|
1153
1479
|
signal: controller.signal,
|