clinkx 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/clinkx-workflows/dist/artifacts.d.ts +65 -0
- package/clinkx-workflows/dist/artifacts.js +268 -0
- package/clinkx-workflows/dist/artifacts.js.map +1 -0
- package/clinkx-workflows/dist/backend.d.ts +33 -0
- package/clinkx-workflows/dist/backend.js +9 -0
- package/clinkx-workflows/dist/backend.js.map +1 -0
- package/clinkx-workflows/dist/child-env.d.ts +23 -0
- package/clinkx-workflows/dist/child-env.js +53 -0
- package/clinkx-workflows/dist/child-env.js.map +1 -0
- package/clinkx-workflows/dist/clink-client.d.ts +51 -0
- package/clinkx-workflows/dist/clink-client.js +216 -0
- package/clinkx-workflows/dist/clink-client.js.map +1 -0
- package/clinkx-workflows/dist/config.d.ts +126 -0
- package/clinkx-workflows/dist/config.js +226 -0
- package/clinkx-workflows/dist/config.js.map +1 -0
- package/clinkx-workflows/dist/definition-normalizer.d.ts +59 -0
- package/clinkx-workflows/dist/definition-normalizer.js +75 -0
- package/clinkx-workflows/dist/definition-normalizer.js.map +1 -0
- package/clinkx-workflows/dist/engine.d.ts +235 -0
- package/clinkx-workflows/dist/engine.js +1044 -0
- package/clinkx-workflows/dist/engine.js.map +1 -0
- package/clinkx-workflows/dist/errors.d.ts +74 -0
- package/clinkx-workflows/dist/errors.js +84 -0
- package/clinkx-workflows/dist/errors.js.map +1 -0
- package/clinkx-workflows/dist/fidelity.d.ts +112 -0
- package/clinkx-workflows/dist/fidelity.js +140 -0
- package/clinkx-workflows/dist/fidelity.js.map +1 -0
- package/clinkx-workflows/dist/fingerprint.d.ts +69 -0
- package/clinkx-workflows/dist/fingerprint.js +143 -0
- package/clinkx-workflows/dist/fingerprint.js.map +1 -0
- package/clinkx-workflows/dist/index.d.ts +16 -0
- package/clinkx-workflows/dist/index.js +42 -0
- package/clinkx-workflows/dist/index.js.map +1 -0
- package/clinkx-workflows/dist/loader.d.ts +64 -0
- package/clinkx-workflows/dist/loader.js +371 -0
- package/clinkx-workflows/dist/loader.js.map +1 -0
- package/clinkx-workflows/dist/logger.d.ts +16 -0
- package/clinkx-workflows/dist/logger.js +31 -0
- package/clinkx-workflows/dist/logger.js.map +1 -0
- package/clinkx-workflows/dist/path-validation.d.ts +23 -0
- package/clinkx-workflows/dist/path-validation.js +73 -0
- package/clinkx-workflows/dist/path-validation.js.map +1 -0
- package/clinkx-workflows/dist/prompt-budget.d.ts +31 -0
- package/clinkx-workflows/dist/prompt-budget.js +78 -0
- package/clinkx-workflows/dist/prompt-budget.js.map +1 -0
- package/clinkx-workflows/dist/queue.d.ts +16 -0
- package/clinkx-workflows/dist/queue.js +46 -0
- package/clinkx-workflows/dist/queue.js.map +1 -0
- package/clinkx-workflows/dist/ranking-reducer.d.ts +11 -0
- package/clinkx-workflows/dist/ranking-reducer.js +245 -0
- package/clinkx-workflows/dist/ranking-reducer.js.map +1 -0
- package/clinkx-workflows/dist/reducers/index.d.ts +8 -0
- package/clinkx-workflows/dist/reducers/index.js +12 -0
- package/clinkx-workflows/dist/reducers/index.js.map +1 -0
- package/clinkx-workflows/dist/run-id.d.ts +17 -0
- package/clinkx-workflows/dist/run-id.js +26 -0
- package/clinkx-workflows/dist/run-id.js.map +1 -0
- package/clinkx-workflows/dist/run-summary/cards/council-answer.d.ts +8 -0
- package/clinkx-workflows/dist/run-summary/cards/council-answer.js +75 -0
- package/clinkx-workflows/dist/run-summary/cards/council-answer.js.map +1 -0
- package/clinkx-workflows/dist/run-summary/cards/council-code-review.d.ts +13 -0
- package/clinkx-workflows/dist/run-summary/cards/council-code-review.js +90 -0
- package/clinkx-workflows/dist/run-summary/cards/council-code-review.js.map +1 -0
- package/clinkx-workflows/dist/run-summary/cards/council-debug.d.ts +9 -0
- package/clinkx-workflows/dist/run-summary/cards/council-debug.js +79 -0
- package/clinkx-workflows/dist/run-summary/cards/council-debug.js.map +1 -0
- package/clinkx-workflows/dist/run-summary/cards/council-default.d.ts +11 -0
- package/clinkx-workflows/dist/run-summary/cards/council-default.js +57 -0
- package/clinkx-workflows/dist/run-summary/cards/council-default.js.map +1 -0
- package/clinkx-workflows/dist/run-summary/cards/council-discover.d.ts +10 -0
- package/clinkx-workflows/dist/run-summary/cards/council-discover.js +79 -0
- package/clinkx-workflows/dist/run-summary/cards/council-discover.js.map +1 -0
- package/clinkx-workflows/dist/run-summary/cards/generic.d.ts +2 -0
- package/clinkx-workflows/dist/run-summary/cards/generic.js +4 -0
- package/clinkx-workflows/dist/run-summary/cards/generic.js.map +1 -0
- package/clinkx-workflows/dist/run-summary/cards/index.d.ts +6 -0
- package/clinkx-workflows/dist/run-summary/cards/index.js +17 -0
- package/clinkx-workflows/dist/run-summary/cards/index.js.map +1 -0
- package/clinkx-workflows/dist/run-summary/utils.d.ts +6 -0
- package/clinkx-workflows/dist/run-summary/utils.js +30 -0
- package/clinkx-workflows/dist/run-summary/utils.js.map +1 -0
- package/clinkx-workflows/dist/run-summary-derived.d.ts +19 -0
- package/clinkx-workflows/dist/run-summary-derived.js +100 -0
- package/clinkx-workflows/dist/run-summary-derived.js.map +1 -0
- package/clinkx-workflows/dist/run-summary.d.ts +70 -0
- package/clinkx-workflows/dist/run-summary.js +125 -0
- package/clinkx-workflows/dist/run-summary.js.map +1 -0
- package/clinkx-workflows/dist/schema.d.ts +609 -0
- package/clinkx-workflows/dist/schema.js +123 -0
- package/clinkx-workflows/dist/schema.js.map +1 -0
- package/clinkx-workflows/dist/server.d.ts +16 -0
- package/clinkx-workflows/dist/server.js +33 -0
- package/clinkx-workflows/dist/server.js.map +1 -0
- package/clinkx-workflows/dist/shutdown.d.ts +54 -0
- package/clinkx-workflows/dist/shutdown.js +120 -0
- package/clinkx-workflows/dist/shutdown.js.map +1 -0
- package/clinkx-workflows/dist/state-schema.d.ts +141 -0
- package/clinkx-workflows/dist/state-schema.js +21 -0
- package/clinkx-workflows/dist/state-schema.js.map +1 -0
- package/clinkx-workflows/dist/state.d.ts +37 -0
- package/clinkx-workflows/dist/state.js +838 -0
- package/clinkx-workflows/dist/state.js.map +1 -0
- package/clinkx-workflows/dist/template-loader.d.ts +30 -0
- package/clinkx-workflows/dist/template-loader.js +77 -0
- package/clinkx-workflows/dist/template-loader.js.map +1 -0
- package/clinkx-workflows/dist/template.d.ts +54 -0
- package/clinkx-workflows/dist/template.js +128 -0
- package/clinkx-workflows/dist/template.js.map +1 -0
- package/clinkx-workflows/dist/transport.d.ts +91 -0
- package/clinkx-workflows/dist/transport.js +249 -0
- package/clinkx-workflows/dist/transport.js.map +1 -0
- package/clinkx-workflows/dist/types.d.ts +137 -0
- package/clinkx-workflows/dist/types.js +11 -0
- package/clinkx-workflows/dist/types.js.map +1 -0
- package/clinkx-workflows/dist/validators/council.d.ts +1488 -0
- package/clinkx-workflows/dist/validators/council.js +509 -0
- package/clinkx-workflows/dist/validators/council.js.map +1 -0
- package/clinkx-workflows/dist/validators/index.d.ts +40 -0
- package/clinkx-workflows/dist/validators/index.js +43 -0
- package/clinkx-workflows/dist/validators/index.js.map +1 -0
- package/clinkx-workflows/dist/workflow-receipt.d.ts +4 -0
- package/clinkx-workflows/dist/workflow-receipt.js +177 -0
- package/clinkx-workflows/dist/workflow-receipt.js.map +1 -0
- package/clinkx-workflows/dist/workflow-tools.d.ts +77 -0
- package/clinkx-workflows/dist/workflow-tools.js +1131 -0
- package/clinkx-workflows/dist/workflow-tools.js.map +1 -0
- package/clinkx-workflows/dist/workflows/council-default.d.ts +123 -0
- package/clinkx-workflows/dist/workflows/council-default.js +141 -0
- package/clinkx-workflows/dist/workflows/council-default.js.map +1 -0
- package/clinkx-workflows/dist/workflows/index.d.ts +12 -0
- package/clinkx-workflows/dist/workflows/index.js +15 -0
- package/clinkx-workflows/dist/workflows/index.js.map +1 -0
- package/conf/adapters/codex.json +2 -2
- package/conf/adapters/hapi/codex.json +2 -2
- package/dist/config.d.ts +5 -0
- package/dist/config.js +17 -0
- package/dist/config.js.map +1 -1
- package/dist/parsers/extract.d.ts +2 -0
- package/dist/parsers/extract.js +29 -20
- package/dist/parsers/extract.js.map +1 -1
- package/dist/pipeline.d.ts +2 -4
- package/dist/pipeline.js +93 -8
- package/dist/pipeline.js.map +1 -1
- package/dist/result-contract.d.ts +6 -1
- package/dist/result-contract.js +10 -22
- package/dist/result-contract.js.map +1 -1
- package/dist/runner.js +43 -1
- package/dist/runner.js.map +1 -1
- package/package.json +12 -5
|
@@ -0,0 +1,1044 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Workflow execution engine for ordered linear stages.
|
|
3
|
+
*
|
|
4
|
+
* ## Timeout Ownership Model (3-8)
|
|
5
|
+
*
|
|
6
|
+
* Timeouts in clinkx-workflows operate at three distinct layers, each with
|
|
7
|
+
* clear ownership boundaries:
|
|
8
|
+
*
|
|
9
|
+
* ### Layer 1: Workflow-level deadline (engine-owned)
|
|
10
|
+
*
|
|
11
|
+
* The engine creates an AbortController with a deadline derived from
|
|
12
|
+
* `settings.timeout_seconds` (default: CLINKX_WORKFLOWS_TIMEOUT_SECONDS,
|
|
13
|
+
* 1800s). This is a hard wall clock ceiling for the entire workflow run.
|
|
14
|
+
* When exceeded, remaining stages are aborted and the run fails with
|
|
15
|
+
* a clear timeout error.
|
|
16
|
+
*
|
|
17
|
+
* Owner: WorkflowEngine.createDeadlineController()
|
|
18
|
+
* Config: CLINKX_WORKFLOWS_TIMEOUT_SECONDS (default: 1800)
|
|
19
|
+
* Scope: Entire workflow run
|
|
20
|
+
*
|
|
21
|
+
* ### Layer 2: Per-call parent-side timeout (engine-owned)
|
|
22
|
+
*
|
|
23
|
+
* Each tools/call RPC to the child has a parent-side timeout computed as:
|
|
24
|
+
* max(call.timeout_seconds * 1000 + CALL_TIMEOUT_BUFFER_MS, DEFAULT_PARENT_CALL_TIMEOUT_MS)
|
|
25
|
+
*
|
|
26
|
+
* This prevents individual calls from blocking indefinitely. The buffer
|
|
27
|
+
* (CALL_TIMEOUT_BUFFER_MS = 5s) accounts for child-side teardown latency.
|
|
28
|
+
* DEFAULT_PARENT_CALL_TIMEOUT_MS (600s = 10min) is the floor — even calls
|
|
29
|
+
* without explicit timeout_seconds get a parent-side deadline.
|
|
30
|
+
*
|
|
31
|
+
* Owner: WorkflowEngine.invokeCall()
|
|
32
|
+
* Config: Per-call timeout_seconds in workflow definition
|
|
33
|
+
* Scope: Single tools/call RPC round-trip
|
|
34
|
+
*
|
|
35
|
+
* ### Layer 3: Child-side timeout (adapter-owned, NOT engine-controlled)
|
|
36
|
+
*
|
|
37
|
+
* The ClinkX child's own timeout behavior is controlled by adapter JSON
|
|
38
|
+
* config (codex.json:9, claude.json:8 — both set timeout_seconds: 3600).
|
|
39
|
+
* The env vars CLINKX_TIMEOUT_SECONDS, CLINKX_IDLE_TIMEOUT_SECONDS, and
|
|
40
|
+
* CLINKX_IDLE_TIMEOUT_STARTUP_SECONDS are DEAD KNOBS with current shipped
|
|
41
|
+
* adapters — adapter JSON config wins (runner.ts:156-171).
|
|
42
|
+
*
|
|
43
|
+
* The workflow engine does NOT attempt to control child-side timeouts via
|
|
44
|
+
* env vars. Instead:
|
|
45
|
+
* - Per-call `timeout_seconds` in the workflow definition is passed as a
|
|
46
|
+
* hidden field in the ClinkInput payload, which the child uses for
|
|
47
|
+
* UPWARD EXTENSION ONLY (extends beyond adapter floor, never shortens).
|
|
48
|
+
* - If different child timeout/idle floors are needed, point the child at
|
|
49
|
+
* a workflow-specific adapter bundle via CLINKX_WORKFLOWS_CHILD_CONFIG_PATH.
|
|
50
|
+
*
|
|
51
|
+
* Owner: Adapter JSON config in the child's config path
|
|
52
|
+
* Config: CLINKX_WORKFLOWS_CHILD_CONFIG_PATH for custom adapter bundles
|
|
53
|
+
* Scope: Single subprocess spawn within the child
|
|
54
|
+
*
|
|
55
|
+
* ### Interaction between layers
|
|
56
|
+
*
|
|
57
|
+
* The workflow deadline (L1) takes precedence: if it fires, all in-flight
|
|
58
|
+
* calls are cancelled regardless of their per-call timeouts (L2). The
|
|
59
|
+
* per-call timeout (L2) prevents any single call from consuming the entire
|
|
60
|
+
* workflow budget. The child-side timeout (L3) is independently enforced
|
|
61
|
+
* by the child and may fire before L2 — in that case the child returns
|
|
62
|
+
* isError: true with termination_reason: "wall" or "idle", which the
|
|
63
|
+
* workflow engine treats as a transient failure eligible for retry.
|
|
64
|
+
*/
|
|
65
|
+
import { stat as fsStat } from "node:fs/promises";
|
|
66
|
+
import { isAbsolute, join, resolve } from "node:path";
|
|
67
|
+
import { callClink } from "./clink-client.js";
|
|
68
|
+
import { getArtifactsDir, getChildMaxConcurrent, getMaxParallel, getMaxPromptBytes, getStateDir, resolveWorkspaceRoot, } from "./config.js";
|
|
69
|
+
import { CallExecutionError, InvalidCallError, WorkflowCancellationError, WorkflowError, } from "./errors.js";
|
|
70
|
+
import { analyzeOutputFidelity, enforceOutputFidelity } from "./fidelity.js";
|
|
71
|
+
import { validateDynamicPaths } from "./path-validation.js";
|
|
72
|
+
import { estimatePromptBudget, enforcePromptBudget, createPathContributor } from "./prompt-budget.js";
|
|
73
|
+
import { DispatchQueue } from "./queue.js";
|
|
74
|
+
import { renderCallPrompt } from "./template.js";
|
|
75
|
+
import { ArtifactStore } from "./artifacts.js";
|
|
76
|
+
import { getReducer } from "./reducers/index.js";
|
|
77
|
+
import { logger } from "./logger.js";
|
|
78
|
+
/** Default parent-side timeout for a single tools/call RPC (10 minutes). */
|
|
79
|
+
const DEFAULT_PARENT_CALL_TIMEOUT_MS = 600_000;
|
|
80
|
+
/** Buffer added to per-call timeout to account for child-side teardown latency. */
|
|
81
|
+
const CALL_TIMEOUT_BUFFER_MS = 5_000;
|
|
82
|
+
export class WorkflowEngine {
|
|
83
|
+
definition;
|
|
84
|
+
session;
|
|
85
|
+
variables;
|
|
86
|
+
validatorRegistry;
|
|
87
|
+
runId;
|
|
88
|
+
runDir;
|
|
89
|
+
signal;
|
|
90
|
+
queueDepth;
|
|
91
|
+
maxPromptBytes;
|
|
92
|
+
promptReserveBytes;
|
|
93
|
+
allowedRoots;
|
|
94
|
+
hooks;
|
|
95
|
+
resumeCompletedCalls;
|
|
96
|
+
resumePreparedInputs;
|
|
97
|
+
resumeFailedCalls;
|
|
98
|
+
contextProducingCalls = new Set();
|
|
99
|
+
artifactStore;
|
|
100
|
+
constructor(options) {
|
|
101
|
+
this.definition = options.definition;
|
|
102
|
+
this.session = options.session;
|
|
103
|
+
this.variables = options.variables;
|
|
104
|
+
this.validatorRegistry = options.validatorRegistry;
|
|
105
|
+
this.runId = options.runId;
|
|
106
|
+
this.runDir = options.runDir;
|
|
107
|
+
this.signal = options.signal;
|
|
108
|
+
this.queueDepth = Math.min(options.maxParallel ?? getMaxParallel(), options.childMaxConcurrent ?? getChildMaxConcurrent());
|
|
109
|
+
this.maxPromptBytes = options.maxPromptBytes ?? getMaxPromptBytes();
|
|
110
|
+
this.promptReserveBytes = options.promptReserveBytes;
|
|
111
|
+
this.allowedRoots = options.allowedRoots ?? this.defaultAllowedRoots();
|
|
112
|
+
this.hooks = options.hooks ?? {};
|
|
113
|
+
this.resumeCompletedCalls = options.resume?.completedCalls ?? new Map();
|
|
114
|
+
this.resumePreparedInputs = options.resume?.preparedInputs ?? new Map();
|
|
115
|
+
this.resumeFailedCalls = options.resume?.failedCalls ?? new Map();
|
|
116
|
+
this.artifactStore = new ArtifactStore(options.runDir, options.validatorRegistry);
|
|
117
|
+
this.validateFinalStage();
|
|
118
|
+
this.validateAdvertisedCliNames();
|
|
119
|
+
this.contextProducingCalls = collectContextProducingCalls(this.definition);
|
|
120
|
+
this.seedResumedArtifacts();
|
|
121
|
+
}
|
|
122
|
+
async execute() {
|
|
123
|
+
const deadlineController = this.createDeadlineController();
|
|
124
|
+
const stages = [];
|
|
125
|
+
const startedAt = new Date().toISOString();
|
|
126
|
+
try {
|
|
127
|
+
await this.emitRunStateChange({
|
|
128
|
+
runId: this.runId,
|
|
129
|
+
workflowName: this.definition.name,
|
|
130
|
+
state: "running",
|
|
131
|
+
startedAt,
|
|
132
|
+
});
|
|
133
|
+
await this.emitProgress({
|
|
134
|
+
runId: this.runId,
|
|
135
|
+
workflowName: this.definition.name,
|
|
136
|
+
scope: "run",
|
|
137
|
+
state: "running",
|
|
138
|
+
message: `Workflow ${this.definition.name} is running`,
|
|
139
|
+
});
|
|
140
|
+
await this.validateInputPathsExist();
|
|
141
|
+
for (let stageIndex = 0; stageIndex < this.definition.stages.length; stageIndex += 1) {
|
|
142
|
+
this.throwIfAborted(deadlineController.signal);
|
|
143
|
+
const stage = this.definition.stages[stageIndex];
|
|
144
|
+
logger.info({ run_id: this.runId, stage_id: stage.id }, "executing workflow stage");
|
|
145
|
+
await this.emitProgress({
|
|
146
|
+
runId: this.runId,
|
|
147
|
+
workflowName: this.definition.name,
|
|
148
|
+
scope: "stage",
|
|
149
|
+
stageId: stage.id,
|
|
150
|
+
stageDescription: stage.description,
|
|
151
|
+
state: "running",
|
|
152
|
+
message: `Stage ${stage.id} is running`,
|
|
153
|
+
});
|
|
154
|
+
const executedStage = stage.parallel
|
|
155
|
+
? await this.executeParallelStage(stage, stageIndex, deadlineController.signal)
|
|
156
|
+
: await this.executeSequentialStage(stage, stageIndex, deadlineController.signal);
|
|
157
|
+
const aggregateArtifactPath = this.artifactStore.writeStageAggregate(stage, stageIndex);
|
|
158
|
+
stages.push({
|
|
159
|
+
...executedStage,
|
|
160
|
+
...(aggregateArtifactPath != null ? { aggregateArtifactPath } : {}),
|
|
161
|
+
});
|
|
162
|
+
await this.emitProgress({
|
|
163
|
+
runId: this.runId,
|
|
164
|
+
workflowName: this.definition.name,
|
|
165
|
+
scope: "stage",
|
|
166
|
+
stageId: stage.id,
|
|
167
|
+
stageDescription: stage.description,
|
|
168
|
+
state: executedStage.ok ? "succeeded" : "failed",
|
|
169
|
+
message: `Stage ${stage.id} ${executedStage.ok ? "succeeded" : "failed"}`,
|
|
170
|
+
});
|
|
171
|
+
if (!executedStage.ok && stage.on_failure === "abort") {
|
|
172
|
+
throw new WorkflowError(`Stage ${stage.id} failed and on_failure=abort`);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
const finalStage = stages[stages.length - 1];
|
|
176
|
+
const finalCall = finalStage?.calls[0];
|
|
177
|
+
if (finalCall == null || !finalCall.ok || finalCall.outputText == null) {
|
|
178
|
+
throw new WorkflowError("Final stage did not produce a successful terminal call result");
|
|
179
|
+
}
|
|
180
|
+
const completedAt = new Date().toISOString();
|
|
181
|
+
await this.emitRunStateChange({
|
|
182
|
+
runId: this.runId,
|
|
183
|
+
workflowName: this.definition.name,
|
|
184
|
+
state: "succeeded",
|
|
185
|
+
startedAt,
|
|
186
|
+
completedAt,
|
|
187
|
+
});
|
|
188
|
+
await this.emitProgress({
|
|
189
|
+
runId: this.runId,
|
|
190
|
+
workflowName: this.definition.name,
|
|
191
|
+
scope: "run",
|
|
192
|
+
state: "succeeded",
|
|
193
|
+
message: `Workflow ${this.definition.name} succeeded`,
|
|
194
|
+
});
|
|
195
|
+
return {
|
|
196
|
+
outputText: finalCall.outputText,
|
|
197
|
+
stages,
|
|
198
|
+
artifactsDir: this.artifactStore.artifactsDir,
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
catch (error) {
|
|
202
|
+
const completedAt = new Date().toISOString();
|
|
203
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
204
|
+
if (error instanceof WorkflowCancellationError) {
|
|
205
|
+
await this.emitRunStateChange({
|
|
206
|
+
runId: this.runId,
|
|
207
|
+
workflowName: this.definition.name,
|
|
208
|
+
state: "cancelled",
|
|
209
|
+
startedAt,
|
|
210
|
+
completedAt,
|
|
211
|
+
error: message,
|
|
212
|
+
});
|
|
213
|
+
await this.emitProgress({
|
|
214
|
+
runId: this.runId,
|
|
215
|
+
workflowName: this.definition.name,
|
|
216
|
+
scope: "run",
|
|
217
|
+
state: "cancelled",
|
|
218
|
+
message: `Workflow ${this.definition.name} cancelled`,
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
else {
|
|
222
|
+
await this.emitRunStateChange({
|
|
223
|
+
runId: this.runId,
|
|
224
|
+
workflowName: this.definition.name,
|
|
225
|
+
state: "failed",
|
|
226
|
+
startedAt,
|
|
227
|
+
completedAt,
|
|
228
|
+
error: message,
|
|
229
|
+
failureClass: classifyFailureClass(error),
|
|
230
|
+
});
|
|
231
|
+
await this.emitProgress({
|
|
232
|
+
runId: this.runId,
|
|
233
|
+
workflowName: this.definition.name,
|
|
234
|
+
scope: "run",
|
|
235
|
+
state: "failed",
|
|
236
|
+
message: `Workflow ${this.definition.name} failed: ${message}`,
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
throw error;
|
|
240
|
+
}
|
|
241
|
+
finally {
|
|
242
|
+
deadlineController.dispose();
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
async executeParallelStage(stage, stageIndex, signal) {
|
|
246
|
+
const queue = new DispatchQueue(this.queueDepth);
|
|
247
|
+
const settled = await Promise.allSettled(stage.calls.map((call, callIndex) => queue.run(async () => this.executeSingleCall(stage, call, stageIndex, callIndex, signal))));
|
|
248
|
+
const calls = new Array(stage.calls.length);
|
|
249
|
+
for (let callIndex = 0; callIndex < settled.length; callIndex += 1) {
|
|
250
|
+
const result = settled[callIndex];
|
|
251
|
+
if (result.status === "fulfilled") {
|
|
252
|
+
calls[callIndex] = result.value;
|
|
253
|
+
continue;
|
|
254
|
+
}
|
|
255
|
+
calls[callIndex] = await this.materializeUnexpectedFailure(stage, stage.calls[callIndex], result.reason);
|
|
256
|
+
}
|
|
257
|
+
const ok = calls.every((call) => call?.ok === true);
|
|
258
|
+
return { stageId: stage.id, ok, calls };
|
|
259
|
+
}
|
|
260
|
+
async executeSequentialStage(stage, stageIndex, signal) {
|
|
261
|
+
const calls = [];
|
|
262
|
+
for (let callIndex = 0; callIndex < stage.calls.length; callIndex += 1) {
|
|
263
|
+
this.throwIfAborted(signal);
|
|
264
|
+
const executedCall = await this.executeSingleCall(stage, stage.calls[callIndex], stageIndex, callIndex, signal);
|
|
265
|
+
calls.push(executedCall);
|
|
266
|
+
if (!executedCall.ok && stage.on_failure === "abort") {
|
|
267
|
+
return { stageId: stage.id, ok: false, calls };
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
return {
|
|
271
|
+
stageId: stage.id,
|
|
272
|
+
ok: calls.every((call) => call.ok),
|
|
273
|
+
calls,
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
async executeSingleCall(stage, call, stageIndex, callIndex, signal) {
|
|
277
|
+
const startedAt = new Date().toISOString();
|
|
278
|
+
const startedAtMs = Date.now();
|
|
279
|
+
const contextProducing = this.contextProducingCalls.has(`${stage.id}.${call.id}`);
|
|
280
|
+
const callKey = `${stage.id}.${call.id}`;
|
|
281
|
+
const resumedCall = this.resumeCompletedCalls.get(callKey);
|
|
282
|
+
if (resumedCall != null) {
|
|
283
|
+
return resumedCall;
|
|
284
|
+
}
|
|
285
|
+
// Handle built-in reducers — no LLM call, no retry loop
|
|
286
|
+
if (call.reducer != null) {
|
|
287
|
+
const reducer = getReducer(call.reducer);
|
|
288
|
+
if (reducer == null) {
|
|
289
|
+
const message = `Unknown reducer: "${call.reducer}"`;
|
|
290
|
+
this.artifactStore.noteFailedCall(stage.id, call.id, message);
|
|
291
|
+
await this.emitCallStateChange({
|
|
292
|
+
runId: this.runId,
|
|
293
|
+
workflowName: this.definition.name,
|
|
294
|
+
stageId: stage.id,
|
|
295
|
+
callId: call.id,
|
|
296
|
+
state: "failed",
|
|
297
|
+
attempt: 1,
|
|
298
|
+
retryCount: 0,
|
|
299
|
+
startedAt,
|
|
300
|
+
completedAt: new Date().toISOString(),
|
|
301
|
+
durationMs: Date.now() - startedAtMs,
|
|
302
|
+
error: message,
|
|
303
|
+
failureClass: "permanent",
|
|
304
|
+
});
|
|
305
|
+
return { stageId: stage.id, callId: call.id, ok: false, error: message };
|
|
306
|
+
}
|
|
307
|
+
try {
|
|
308
|
+
const contextArtifacts = this.artifactStore.buildContext(this.definition, call.context_from);
|
|
309
|
+
const outputText = reducer(contextArtifacts.artifacts);
|
|
310
|
+
const artifact = this.maybeWriteArtifact(stage, call, stageIndex, callIndex, outputText);
|
|
311
|
+
const completedAt = new Date().toISOString();
|
|
312
|
+
const durationMs = Date.now() - startedAtMs;
|
|
313
|
+
await this.emitCallStateChange({
|
|
314
|
+
runId: this.runId,
|
|
315
|
+
workflowName: this.definition.name,
|
|
316
|
+
stageId: stage.id,
|
|
317
|
+
callId: call.id,
|
|
318
|
+
state: "succeeded",
|
|
319
|
+
attempt: 1,
|
|
320
|
+
retryCount: 0,
|
|
321
|
+
startedAt,
|
|
322
|
+
completedAt,
|
|
323
|
+
durationMs,
|
|
324
|
+
outputText,
|
|
325
|
+
artifact,
|
|
326
|
+
});
|
|
327
|
+
await this.emitProgress({
|
|
328
|
+
runId: this.runId,
|
|
329
|
+
workflowName: this.definition.name,
|
|
330
|
+
scope: "call",
|
|
331
|
+
stageId: stage.id,
|
|
332
|
+
stageDescription: stage.description,
|
|
333
|
+
callId: call.id,
|
|
334
|
+
state: "succeeded",
|
|
335
|
+
attempt: 1,
|
|
336
|
+
retryCount: 0,
|
|
337
|
+
message: `Call ${callKey} succeeded (reducer: ${call.reducer})`,
|
|
338
|
+
});
|
|
339
|
+
return { stageId: stage.id, callId: call.id, ok: true, outputText, artifact };
|
|
340
|
+
}
|
|
341
|
+
catch (error) {
|
|
342
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
343
|
+
this.artifactStore.noteFailedCall(stage.id, call.id, message);
|
|
344
|
+
await this.emitCallStateChange({
|
|
345
|
+
runId: this.runId,
|
|
346
|
+
workflowName: this.definition.name,
|
|
347
|
+
stageId: stage.id,
|
|
348
|
+
callId: call.id,
|
|
349
|
+
state: "failed",
|
|
350
|
+
attempt: 1,
|
|
351
|
+
retryCount: 0,
|
|
352
|
+
startedAt,
|
|
353
|
+
completedAt: new Date().toISOString(),
|
|
354
|
+
durationMs: Date.now() - startedAtMs,
|
|
355
|
+
error: message,
|
|
356
|
+
failureClass: "permanent",
|
|
357
|
+
});
|
|
358
|
+
return { stageId: stage.id, callId: call.id, ok: false, error: message };
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
// Build input ONCE before retry loop — materialized inputs are stable
|
|
362
|
+
let input;
|
|
363
|
+
try {
|
|
364
|
+
input = this.buildCallInput(stage, call, callKey, callIndex);
|
|
365
|
+
}
|
|
366
|
+
catch (error) {
|
|
367
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
368
|
+
this.artifactStore.noteFailedCall(stage.id, call.id, message);
|
|
369
|
+
logger.warn({ run_id: this.runId, stage_id: stage.id, call_id: call.id, err: message }, "workflow call input build failed");
|
|
370
|
+
const completedAt = new Date().toISOString();
|
|
371
|
+
const durationMs = Date.now() - startedAtMs;
|
|
372
|
+
await this.emitCallStateChange({
|
|
373
|
+
runId: this.runId,
|
|
374
|
+
workflowName: this.definition.name,
|
|
375
|
+
stageId: stage.id,
|
|
376
|
+
callId: call.id,
|
|
377
|
+
state: "failed",
|
|
378
|
+
attempt: 1,
|
|
379
|
+
retryCount: 0,
|
|
380
|
+
startedAt,
|
|
381
|
+
completedAt,
|
|
382
|
+
durationMs,
|
|
383
|
+
error: message,
|
|
384
|
+
failureClass: "permanent",
|
|
385
|
+
});
|
|
386
|
+
await this.emitProgress({
|
|
387
|
+
runId: this.runId,
|
|
388
|
+
workflowName: this.definition.name,
|
|
389
|
+
scope: "call",
|
|
390
|
+
stageId: stage.id,
|
|
391
|
+
stageDescription: stage.description,
|
|
392
|
+
callId: call.id,
|
|
393
|
+
state: "failed",
|
|
394
|
+
attempt: 1,
|
|
395
|
+
retryCount: 0,
|
|
396
|
+
message: `Call ${callKey} failed: ${message}`,
|
|
397
|
+
});
|
|
398
|
+
return {
|
|
399
|
+
stageId: stage.id,
|
|
400
|
+
callId: call.id,
|
|
401
|
+
ok: false,
|
|
402
|
+
error: message,
|
|
403
|
+
};
|
|
404
|
+
}
|
|
405
|
+
// Read retry config from stage
|
|
406
|
+
const maxRetries = stage.retry.max_retries;
|
|
407
|
+
const retryMode = stage.retry.mode;
|
|
408
|
+
const backoffSeconds = stage.retry.backoff_seconds;
|
|
409
|
+
const maxBackoffSeconds = stage.retry.max_backoff_seconds;
|
|
410
|
+
// Emit call prepared ONCE (input is stable across retries)
|
|
411
|
+
await this.emitCallPrepared({
|
|
412
|
+
runId: this.runId,
|
|
413
|
+
workflowName: this.definition.name,
|
|
414
|
+
stageId: stage.id,
|
|
415
|
+
callId: call.id,
|
|
416
|
+
attempt: 1,
|
|
417
|
+
retryCount: 0,
|
|
418
|
+
contextProducing,
|
|
419
|
+
input: structuredClone(input),
|
|
420
|
+
});
|
|
421
|
+
let lastError = undefined;
|
|
422
|
+
let lastFailureClass = "permanent";
|
|
423
|
+
let lastRetryCount = 0;
|
|
424
|
+
for (let retryCount = 0; retryCount <= maxRetries; retryCount += 1) {
|
|
425
|
+
const attempt = retryCount + 1;
|
|
426
|
+
lastRetryCount = retryCount;
|
|
427
|
+
this.throwIfAborted(signal);
|
|
428
|
+
await this.emitCallStateChange({
|
|
429
|
+
runId: this.runId,
|
|
430
|
+
workflowName: this.definition.name,
|
|
431
|
+
stageId: stage.id,
|
|
432
|
+
callId: call.id,
|
|
433
|
+
state: "running",
|
|
434
|
+
attempt,
|
|
435
|
+
retryCount,
|
|
436
|
+
startedAt,
|
|
437
|
+
});
|
|
438
|
+
await this.emitProgress({
|
|
439
|
+
runId: this.runId,
|
|
440
|
+
workflowName: this.definition.name,
|
|
441
|
+
scope: "call",
|
|
442
|
+
stageId: stage.id,
|
|
443
|
+
stageDescription: stage.description,
|
|
444
|
+
callId: call.id,
|
|
445
|
+
state: "running",
|
|
446
|
+
attempt,
|
|
447
|
+
retryCount,
|
|
448
|
+
message: retryCount === 0
|
|
449
|
+
? `Call ${callKey} is running`
|
|
450
|
+
: `Call ${callKey} is retrying (attempt ${String(attempt)})`,
|
|
451
|
+
});
|
|
452
|
+
try {
|
|
453
|
+
const result = await this.invokeCall(stage, call, input, signal);
|
|
454
|
+
const fidelity = analyzeOutputFidelity(result.outputText, result.debugMetadata);
|
|
455
|
+
enforceOutputFidelity(fidelity, {
|
|
456
|
+
stageId: stage.id,
|
|
457
|
+
callId: call.id,
|
|
458
|
+
contextProducing,
|
|
459
|
+
});
|
|
460
|
+
const artifact = this.maybeWriteArtifact(stage, call, stageIndex, callIndex, result.outputText);
|
|
461
|
+
const completedAt = new Date().toISOString();
|
|
462
|
+
const durationMs = Date.now() - startedAtMs;
|
|
463
|
+
await this.emitCallStateChange({
|
|
464
|
+
runId: this.runId,
|
|
465
|
+
workflowName: this.definition.name,
|
|
466
|
+
stageId: stage.id,
|
|
467
|
+
callId: call.id,
|
|
468
|
+
state: "succeeded",
|
|
469
|
+
attempt,
|
|
470
|
+
retryCount,
|
|
471
|
+
startedAt,
|
|
472
|
+
completedAt,
|
|
473
|
+
durationMs,
|
|
474
|
+
outputText: result.outputText,
|
|
475
|
+
fidelity,
|
|
476
|
+
artifact,
|
|
477
|
+
});
|
|
478
|
+
await this.emitProgress({
|
|
479
|
+
runId: this.runId,
|
|
480
|
+
workflowName: this.definition.name,
|
|
481
|
+
scope: "call",
|
|
482
|
+
stageId: stage.id,
|
|
483
|
+
stageDescription: stage.description,
|
|
484
|
+
callId: call.id,
|
|
485
|
+
state: "succeeded",
|
|
486
|
+
attempt,
|
|
487
|
+
retryCount,
|
|
488
|
+
message: `Call ${callKey} succeeded`,
|
|
489
|
+
});
|
|
490
|
+
return {
|
|
491
|
+
stageId: stage.id,
|
|
492
|
+
callId: call.id,
|
|
493
|
+
ok: true,
|
|
494
|
+
outputText: result.outputText,
|
|
495
|
+
artifact,
|
|
496
|
+
fidelity,
|
|
497
|
+
};
|
|
498
|
+
}
|
|
499
|
+
catch (error) {
|
|
500
|
+
// Cancellation is never retryable
|
|
501
|
+
if (error instanceof WorkflowCancellationError) {
|
|
502
|
+
const completedAt = new Date().toISOString();
|
|
503
|
+
const durationMs = Date.now() - startedAtMs;
|
|
504
|
+
await this.emitCallStateChange({
|
|
505
|
+
runId: this.runId,
|
|
506
|
+
workflowName: this.definition.name,
|
|
507
|
+
stageId: stage.id,
|
|
508
|
+
callId: call.id,
|
|
509
|
+
state: "cancelled",
|
|
510
|
+
attempt,
|
|
511
|
+
retryCount,
|
|
512
|
+
startedAt,
|
|
513
|
+
completedAt,
|
|
514
|
+
durationMs,
|
|
515
|
+
error: error.message,
|
|
516
|
+
});
|
|
517
|
+
await this.emitProgress({
|
|
518
|
+
runId: this.runId,
|
|
519
|
+
workflowName: this.definition.name,
|
|
520
|
+
scope: "call",
|
|
521
|
+
stageId: stage.id,
|
|
522
|
+
stageDescription: stage.description,
|
|
523
|
+
callId: call.id,
|
|
524
|
+
state: "cancelled",
|
|
525
|
+
attempt,
|
|
526
|
+
retryCount,
|
|
527
|
+
message: `Call ${callKey} cancelled`,
|
|
528
|
+
});
|
|
529
|
+
throw error;
|
|
530
|
+
}
|
|
531
|
+
const failureClass = classifyFailureClass(error);
|
|
532
|
+
lastError = error;
|
|
533
|
+
lastFailureClass = failureClass;
|
|
534
|
+
// Determine if this error is retryable
|
|
535
|
+
const canRetry = retryCount < maxRetries && isRetryable(error, failureClass, retryMode);
|
|
536
|
+
if (canRetry) {
|
|
537
|
+
// Compute backoff with jitter
|
|
538
|
+
const delayMs = computeRetryDelay(retryCount, backoffSeconds, maxBackoffSeconds);
|
|
539
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
540
|
+
logger.info({
|
|
541
|
+
run_id: this.runId,
|
|
542
|
+
stage_id: stage.id,
|
|
543
|
+
call_id: call.id,
|
|
544
|
+
attempt,
|
|
545
|
+
retry_count: retryCount,
|
|
546
|
+
delay_ms: delayMs,
|
|
547
|
+
failure_class: failureClass,
|
|
548
|
+
}, "scheduling retry");
|
|
549
|
+
await this.emitRetryScheduled({
|
|
550
|
+
runId: this.runId,
|
|
551
|
+
workflowName: this.definition.name,
|
|
552
|
+
stageId: stage.id,
|
|
553
|
+
callId: call.id,
|
|
554
|
+
attempt,
|
|
555
|
+
retryCount,
|
|
556
|
+
nextRetryCount: retryCount + 1,
|
|
557
|
+
delayMs,
|
|
558
|
+
error: message,
|
|
559
|
+
failureClass,
|
|
560
|
+
});
|
|
561
|
+
// Wait for backoff delay (abort-aware)
|
|
562
|
+
await abortableDelay(delayMs, signal);
|
|
563
|
+
continue;
|
|
564
|
+
}
|
|
565
|
+
// Not retryable or retries exhausted — fail the call
|
|
566
|
+
break;
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
// All retries exhausted or non-retryable error
|
|
570
|
+
const message = lastError instanceof Error ? lastError.message : String(lastError);
|
|
571
|
+
const finalRetryCount = lastRetryCount;
|
|
572
|
+
const lastChildDebug = lastError instanceof CallExecutionError ? lastError.childDebugMetadata : undefined;
|
|
573
|
+
this.artifactStore.noteFailedCall(stage.id, call.id, message);
|
|
574
|
+
logger.warn({ run_id: this.runId, stage_id: stage.id, call_id: call.id, err: message, retries: finalRetryCount }, "workflow call failed");
|
|
575
|
+
const completedAt = new Date().toISOString();
|
|
576
|
+
const durationMs = Date.now() - startedAtMs;
|
|
577
|
+
await this.emitCallStateChange({
|
|
578
|
+
runId: this.runId,
|
|
579
|
+
workflowName: this.definition.name,
|
|
580
|
+
stageId: stage.id,
|
|
581
|
+
callId: call.id,
|
|
582
|
+
state: "failed",
|
|
583
|
+
attempt: finalRetryCount + 1,
|
|
584
|
+
retryCount: finalRetryCount,
|
|
585
|
+
startedAt,
|
|
586
|
+
completedAt,
|
|
587
|
+
durationMs,
|
|
588
|
+
error: message,
|
|
589
|
+
failureClass: lastFailureClass,
|
|
590
|
+
childDebugMetadata: lastChildDebug,
|
|
591
|
+
});
|
|
592
|
+
await this.emitProgress({
|
|
593
|
+
runId: this.runId,
|
|
594
|
+
workflowName: this.definition.name,
|
|
595
|
+
scope: "call",
|
|
596
|
+
stageId: stage.id,
|
|
597
|
+
stageDescription: stage.description,
|
|
598
|
+
callId: call.id,
|
|
599
|
+
state: "failed",
|
|
600
|
+
attempt: finalRetryCount + 1,
|
|
601
|
+
retryCount: finalRetryCount,
|
|
602
|
+
message: `Call ${callKey} failed: ${message}`,
|
|
603
|
+
});
|
|
604
|
+
return {
|
|
605
|
+
stageId: stage.id,
|
|
606
|
+
callId: call.id,
|
|
607
|
+
ok: false,
|
|
608
|
+
error: message,
|
|
609
|
+
};
|
|
610
|
+
}
|
|
611
|
+
async invokeCall(stage, call, input, signal) {
|
|
612
|
+
const timeoutMs = call.timeout_seconds != null
|
|
613
|
+
? Math.max((call.timeout_seconds * 1000) + CALL_TIMEOUT_BUFFER_MS, DEFAULT_PARENT_CALL_TIMEOUT_MS)
|
|
614
|
+
: undefined;
|
|
615
|
+
return await callClink(this.session, input, signal, { stageId: stage.id, callId: call.id }, timeoutMs);
|
|
616
|
+
}
|
|
617
|
+
maybeWriteArtifact(stage, call, stageIndex, callIndex, outputText) {
|
|
618
|
+
const requiresValidation = call.validator_schema_id != null;
|
|
619
|
+
if (!requiresValidation) {
|
|
620
|
+
return undefined;
|
|
621
|
+
}
|
|
622
|
+
const validatorSchemaId = call.validator_schema_id;
|
|
623
|
+
if (validatorSchemaId == null) {
|
|
624
|
+
throw new WorkflowError(`Validated call ${stage.id}.${call.id} must declare validator_schema_id`);
|
|
625
|
+
}
|
|
626
|
+
const validated = this.artifactStore.validateJsonOutput(stage.id, call.id, validatorSchemaId, outputText);
|
|
627
|
+
return this.artifactStore.writeCallArtifact({
|
|
628
|
+
stage,
|
|
629
|
+
call,
|
|
630
|
+
stageIndex,
|
|
631
|
+
callIndex,
|
|
632
|
+
validated,
|
|
633
|
+
});
|
|
634
|
+
}
|
|
635
|
+
resolveDynamicPaths(variableName, context) {
|
|
636
|
+
if (variableName == null) {
|
|
637
|
+
return [];
|
|
638
|
+
}
|
|
639
|
+
const rawValue = this.variables[variableName];
|
|
640
|
+
if (!Array.isArray(rawValue)) {
|
|
641
|
+
throw new WorkflowError(`Variable "${variableName}" for ${context} must be a string[]`);
|
|
642
|
+
}
|
|
643
|
+
const workspaceRoot = resolveWorkspaceRoot();
|
|
644
|
+
const values = rawValue.map((value) => {
|
|
645
|
+
if (typeof value !== "string") {
|
|
646
|
+
throw new WorkflowError(`Variable "${variableName}" for ${context} must contain only strings`);
|
|
647
|
+
}
|
|
648
|
+
return isAbsolute(value) ? value : resolve(workspaceRoot, value);
|
|
649
|
+
});
|
|
650
|
+
validateDynamicPaths(values, this.allowedRoots, context);
|
|
651
|
+
return values;
|
|
652
|
+
}
|
|
653
|
+
createDeadlineController() {
|
|
654
|
+
const controller = new AbortController();
|
|
655
|
+
const timeout = setTimeout(() => {
|
|
656
|
+
controller.abort(new Error(`Workflow timeout exceeded after ${String(this.definition.settings.timeout_seconds)} seconds`));
|
|
657
|
+
}, this.definition.settings.timeout_seconds * 1000);
|
|
658
|
+
if (this.signal != null) {
|
|
659
|
+
this.signal.addEventListener("abort", () => controller.abort(new WorkflowCancellationError()), { once: true });
|
|
660
|
+
}
|
|
661
|
+
return {
|
|
662
|
+
signal: controller.signal,
|
|
663
|
+
dispose: () => clearTimeout(timeout),
|
|
664
|
+
};
|
|
665
|
+
}
|
|
666
|
+
throwIfAborted(signal) {
|
|
667
|
+
if (!signal.aborted) {
|
|
668
|
+
return;
|
|
669
|
+
}
|
|
670
|
+
const reason = signal.reason;
|
|
671
|
+
if (reason instanceof WorkflowCancellationError) {
|
|
672
|
+
throw reason;
|
|
673
|
+
}
|
|
674
|
+
if (reason instanceof Error && reason.message.includes("Workflow timeout exceeded")) {
|
|
675
|
+
throw new WorkflowError(reason.message);
|
|
676
|
+
}
|
|
677
|
+
throw new WorkflowCancellationError();
|
|
678
|
+
}
|
|
679
|
+
validateFinalStage() {
|
|
680
|
+
const finalStage = this.definition.stages[this.definition.stages.length - 1];
|
|
681
|
+
if (finalStage == null || finalStage.calls.length !== 1) {
|
|
682
|
+
throw new WorkflowError("Final stage must contain exactly one call");
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
validateAdvertisedCliNames() {
|
|
686
|
+
const knownCliNames = Array.isArray(this.session.cliNames) ? this.session.cliNames : [];
|
|
687
|
+
if (knownCliNames.length === 0) {
|
|
688
|
+
return;
|
|
689
|
+
}
|
|
690
|
+
const known = new Set(knownCliNames);
|
|
691
|
+
const unknownCalls = this.definition.stages.flatMap((stage) => stage.calls
|
|
692
|
+
.filter((call) => !known.has(call.cli_name))
|
|
693
|
+
.map((call) => `${stage.id}.${call.id} -> ${call.cli_name}`));
|
|
694
|
+
if (unknownCalls.length === 0) {
|
|
695
|
+
return;
|
|
696
|
+
}
|
|
697
|
+
throw new WorkflowError(`Workflow references cli_name values not advertised by the child: ${unknownCalls.join(", ")}. ` +
|
|
698
|
+
`Known cli_name values: ${knownCliNames.join(", ")}`);
|
|
699
|
+
}
|
|
700
|
+
defaultAllowedRoots() {
|
|
701
|
+
const workspaceRoot = resolveWorkspaceRoot();
|
|
702
|
+
const stateDir = getStateDir();
|
|
703
|
+
const configuredArtifactsDir = getArtifactsDir();
|
|
704
|
+
return [
|
|
705
|
+
workspaceRoot,
|
|
706
|
+
stateDir,
|
|
707
|
+
configuredArtifactsDir,
|
|
708
|
+
this.runDir,
|
|
709
|
+
join(this.runDir, "artifacts"),
|
|
710
|
+
];
|
|
711
|
+
}
|
|
712
|
+
buildCallInput(stage, call, callKey, callIndex) {
|
|
713
|
+
const resumedInput = this.resumePreparedInputs.get(callKey);
|
|
714
|
+
if (resumedInput != null) {
|
|
715
|
+
return structuredClone(resumedInput);
|
|
716
|
+
}
|
|
717
|
+
const prompt = renderCallPrompt(call.prompt, this.definition.variables, this.variables, {
|
|
718
|
+
workflow: { run_id: this.runId },
|
|
719
|
+
stage: { id: stage.id },
|
|
720
|
+
call: { id: call.id },
|
|
721
|
+
});
|
|
722
|
+
const contextArtifacts = call.anonymize_context === true
|
|
723
|
+
? this.artifactStore.buildAnonymizedContext(this.definition, call.context_from, callIndex)
|
|
724
|
+
: this.artifactStore.buildContext(this.definition, call.context_from);
|
|
725
|
+
const staticFiles = call.absolute_file_paths ?? [];
|
|
726
|
+
const dynamicFiles = this.resolveDynamicPaths(call.absolute_file_paths_from, `${stage.id}.${call.id}.absolute_file_paths_from`);
|
|
727
|
+
const staticImages = call.images ?? [];
|
|
728
|
+
const dynamicImages = this.resolveDynamicPaths(call.images_from, `${stage.id}.${call.id}.images_from`);
|
|
729
|
+
const absoluteFilePaths = [
|
|
730
|
+
...contextArtifacts.absoluteFilePaths,
|
|
731
|
+
...staticFiles,
|
|
732
|
+
...dynamicFiles,
|
|
733
|
+
];
|
|
734
|
+
const images = [...staticImages, ...dynamicImages];
|
|
735
|
+
const estimate = estimatePromptBudget({
|
|
736
|
+
promptText: prompt,
|
|
737
|
+
contextText: contextArtifacts.context,
|
|
738
|
+
fileContributors: [
|
|
739
|
+
...contextArtifacts.contributors,
|
|
740
|
+
...staticFiles.map((path) => createPathContributor(`absolute_file_paths:${path}`, path, "file")),
|
|
741
|
+
...dynamicFiles.map((path) => createPathContributor(`absolute_file_paths_from:${path}`, path, "file")),
|
|
742
|
+
],
|
|
743
|
+
imageContributors: images.map((path) => createPathContributor(`image:${path}`, path, "image")),
|
|
744
|
+
maxBytes: this.maxPromptBytes,
|
|
745
|
+
reserveBytes: this.promptReserveBytes,
|
|
746
|
+
});
|
|
747
|
+
enforcePromptBudget(estimate);
|
|
748
|
+
const input = {
|
|
749
|
+
cli_name: call.cli_name,
|
|
750
|
+
role: call.role,
|
|
751
|
+
prompt,
|
|
752
|
+
};
|
|
753
|
+
if (absoluteFilePaths.length > 0) {
|
|
754
|
+
input["absolute_file_paths"] = absoluteFilePaths;
|
|
755
|
+
}
|
|
756
|
+
if (images.length > 0) {
|
|
757
|
+
input["images"] = images;
|
|
758
|
+
}
|
|
759
|
+
if (contextArtifacts.context != null) {
|
|
760
|
+
input["context"] = contextArtifacts.context;
|
|
761
|
+
}
|
|
762
|
+
if (call.timeout_seconds != null) {
|
|
763
|
+
input["timeout_seconds"] = call.timeout_seconds;
|
|
764
|
+
}
|
|
765
|
+
if (call.max_response_chars != null) {
|
|
766
|
+
input["max_response_chars"] = call.max_response_chars;
|
|
767
|
+
}
|
|
768
|
+
// Derive JSON enforcement from response_contract or validator_schema_id.
|
|
769
|
+
// This bridges workflow-level structured-output intent to the child pipeline's
|
|
770
|
+
// enforceJson gate, decoupling JSON enforcement from role names.
|
|
771
|
+
if (call.response_contract === "json" || call.validator_schema_id != null) {
|
|
772
|
+
input["expect_json"] = true;
|
|
773
|
+
}
|
|
774
|
+
return input;
|
|
775
|
+
}
|
|
776
|
+
async materializeUnexpectedFailure(stage, call, error) {
|
|
777
|
+
if (error instanceof WorkflowCancellationError) {
|
|
778
|
+
throw error;
|
|
779
|
+
}
|
|
780
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
781
|
+
this.artifactStore.noteFailedCall(stage.id, call.id, message);
|
|
782
|
+
logger.error({ run_id: this.runId, stage_id: stage.id, call_id: call.id, err: message }, "parallel stage call rejected unexpectedly");
|
|
783
|
+
await this.emitCallStateChange({
|
|
784
|
+
runId: this.runId,
|
|
785
|
+
workflowName: this.definition.name,
|
|
786
|
+
stageId: stage.id,
|
|
787
|
+
callId: call.id,
|
|
788
|
+
state: "failed",
|
|
789
|
+
attempt: 1,
|
|
790
|
+
retryCount: 0,
|
|
791
|
+
completedAt: new Date().toISOString(),
|
|
792
|
+
error: message,
|
|
793
|
+
failureClass: classifyFailureClass(error),
|
|
794
|
+
childDebugMetadata: error instanceof CallExecutionError ? error.childDebugMetadata : undefined,
|
|
795
|
+
});
|
|
796
|
+
await this.emitProgress({
|
|
797
|
+
runId: this.runId,
|
|
798
|
+
workflowName: this.definition.name,
|
|
799
|
+
scope: "call",
|
|
800
|
+
stageId: stage.id,
|
|
801
|
+
stageDescription: stage.description,
|
|
802
|
+
callId: call.id,
|
|
803
|
+
state: "failed",
|
|
804
|
+
attempt: 1,
|
|
805
|
+
retryCount: 0,
|
|
806
|
+
message: `Call ${stage.id}.${call.id} failed unexpectedly: ${message}`,
|
|
807
|
+
});
|
|
808
|
+
return {
|
|
809
|
+
stageId: stage.id,
|
|
810
|
+
callId: call.id,
|
|
811
|
+
ok: false,
|
|
812
|
+
error: message,
|
|
813
|
+
};
|
|
814
|
+
}
|
|
815
|
+
async emitRunStateChange(event) {
|
|
816
|
+
await this.hooks.onRunStateChange?.(event);
|
|
817
|
+
}
|
|
818
|
+
async emitCallPrepared(event) {
|
|
819
|
+
await this.hooks.onCallPrepared?.(event);
|
|
820
|
+
}
|
|
821
|
+
async emitCallStateChange(event) {
|
|
822
|
+
await this.hooks.onCallStateChange?.(event);
|
|
823
|
+
}
|
|
824
|
+
async emitRetryScheduled(event) {
|
|
825
|
+
await this.hooks.onRetryScheduled?.(event);
|
|
826
|
+
}
|
|
827
|
+
async emitProgress(event) {
|
|
828
|
+
await this.hooks.onProgress?.(event);
|
|
829
|
+
}
|
|
830
|
+
seedResumedArtifacts() {
|
|
831
|
+
for (const executedCall of this.resumeCompletedCalls.values()) {
|
|
832
|
+
if (executedCall.artifact != null) {
|
|
833
|
+
this.artifactStore.seedArtifact(executedCall.artifact);
|
|
834
|
+
}
|
|
835
|
+
}
|
|
836
|
+
for (const failedCall of this.resumeFailedCalls.values()) {
|
|
837
|
+
this.artifactStore.seedFailedCall(failedCall.stageId, failedCall.callId, failedCall.error);
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
/**
|
|
841
|
+
* Pre-flight validation: check that all user-supplied file/image paths exist
|
|
842
|
+
* before any stage executes. Skips already-completed calls on resume.
|
|
843
|
+
*
|
|
844
|
+
* - ENOENT → collect as missing (fail-fast after checking all)
|
|
845
|
+
* - EACCES/EIO → warn but don't fail (NFS/FUSE transient)
|
|
846
|
+
* - context_from artifacts are NOT validated (engine-generated)
|
|
847
|
+
*/
|
|
848
|
+
async validateInputPathsExist() {
|
|
849
|
+
const pathsToCheck = new Set();
|
|
850
|
+
const resolutionErrors = [];
|
|
851
|
+
for (const stage of this.definition.stages) {
|
|
852
|
+
for (const call of stage.calls) {
|
|
853
|
+
const callKey = `${stage.id}.${call.id}`;
|
|
854
|
+
// Skip already-completed calls (resume edge case)
|
|
855
|
+
if (this.resumeCompletedCalls.has(callKey)) {
|
|
856
|
+
continue;
|
|
857
|
+
}
|
|
858
|
+
// For calls with persisted inputs (resumed but not completed),
|
|
859
|
+
// validate the PERSISTED paths — buildCallInput() returns the
|
|
860
|
+
// persisted input verbatim via structuredClone, ignoring current
|
|
861
|
+
// variable resolution.
|
|
862
|
+
const persistedInput = this.resumePreparedInputs.get(callKey);
|
|
863
|
+
if (persistedInput != null) {
|
|
864
|
+
const persistedFiles = persistedInput["absolute_file_paths"];
|
|
865
|
+
if (Array.isArray(persistedFiles)) {
|
|
866
|
+
for (const p of persistedFiles) {
|
|
867
|
+
if (typeof p === "string") {
|
|
868
|
+
pathsToCheck.add(p);
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
const persistedImages = persistedInput["images"];
|
|
873
|
+
if (Array.isArray(persistedImages)) {
|
|
874
|
+
for (const p of persistedImages) {
|
|
875
|
+
if (typeof p === "string") {
|
|
876
|
+
pathsToCheck.add(p);
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
continue;
|
|
881
|
+
}
|
|
882
|
+
// Static file paths
|
|
883
|
+
if (call.absolute_file_paths != null) {
|
|
884
|
+
for (const p of call.absolute_file_paths) {
|
|
885
|
+
pathsToCheck.add(p);
|
|
886
|
+
}
|
|
887
|
+
}
|
|
888
|
+
// Dynamic file paths (variable-resolved)
|
|
889
|
+
try {
|
|
890
|
+
const dynamicFiles = this.resolveDynamicPaths(call.absolute_file_paths_from, `${stage.id}.${call.id}.absolute_file_paths_from`);
|
|
891
|
+
for (const p of dynamicFiles) {
|
|
892
|
+
pathsToCheck.add(p);
|
|
893
|
+
}
|
|
894
|
+
}
|
|
895
|
+
catch (error) {
|
|
896
|
+
resolutionErrors.push(error.message);
|
|
897
|
+
}
|
|
898
|
+
// Static image paths
|
|
899
|
+
if (call.images != null) {
|
|
900
|
+
for (const p of call.images) {
|
|
901
|
+
pathsToCheck.add(p);
|
|
902
|
+
}
|
|
903
|
+
}
|
|
904
|
+
// Dynamic image paths (variable-resolved)
|
|
905
|
+
try {
|
|
906
|
+
const dynamicImages = this.resolveDynamicPaths(call.images_from, `${stage.id}.${call.id}.images_from`);
|
|
907
|
+
for (const p of dynamicImages) {
|
|
908
|
+
pathsToCheck.add(p);
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
catch (error) {
|
|
912
|
+
resolutionErrors.push(error.message);
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
if (pathsToCheck.size === 0 && resolutionErrors.length === 0) {
|
|
917
|
+
return;
|
|
918
|
+
}
|
|
919
|
+
const missing = [];
|
|
920
|
+
const accessErrors = [];
|
|
921
|
+
if (pathsToCheck.size > 0) {
|
|
922
|
+
await Promise.all([...pathsToCheck].map(async (filePath) => {
|
|
923
|
+
try {
|
|
924
|
+
await fsStat(filePath);
|
|
925
|
+
}
|
|
926
|
+
catch (error) {
|
|
927
|
+
const code = error.code;
|
|
928
|
+
if (code === "ENOENT") {
|
|
929
|
+
missing.push(filePath);
|
|
930
|
+
}
|
|
931
|
+
else if (code === "EACCES" || code === "EIO") {
|
|
932
|
+
logger.warn({ path: filePath, err: error.message }, "input path preflight: transient access error, proceeding");
|
|
933
|
+
}
|
|
934
|
+
else {
|
|
935
|
+
const errCode = error.code ?? "UNKNOWN";
|
|
936
|
+
accessErrors.push(`${filePath} (${errCode})`);
|
|
937
|
+
}
|
|
938
|
+
}
|
|
939
|
+
}));
|
|
940
|
+
}
|
|
941
|
+
const hasErrors = missing.length > 0 || accessErrors.length > 0 || resolutionErrors.length > 0;
|
|
942
|
+
if (!hasErrors) {
|
|
943
|
+
return;
|
|
944
|
+
}
|
|
945
|
+
const sections = [];
|
|
946
|
+
if (missing.length > 0) {
|
|
947
|
+
missing.sort();
|
|
948
|
+
sections.push(`Missing files:\n - ${missing.join("\n - ")}`);
|
|
949
|
+
}
|
|
950
|
+
if (accessErrors.length > 0) {
|
|
951
|
+
accessErrors.sort();
|
|
952
|
+
sections.push(`Access errors:\n - ${accessErrors.join("\n - ")}`);
|
|
953
|
+
}
|
|
954
|
+
if (resolutionErrors.length > 0) {
|
|
955
|
+
resolutionErrors.sort();
|
|
956
|
+
sections.push(`Resolution errors:\n - ${resolutionErrors.join("\n - ")}`);
|
|
957
|
+
}
|
|
958
|
+
throw new WorkflowError(`Input validation failed before workflow execution:\n ${sections.join("\n ")}`);
|
|
959
|
+
}
|
|
960
|
+
}
|
|
961
|
+
function collectContextProducingCalls(definition) {
|
|
962
|
+
const referenced = new Set();
|
|
963
|
+
for (const stage of definition.stages) {
|
|
964
|
+
for (const call of stage.calls) {
|
|
965
|
+
if (call.context_from == null) {
|
|
966
|
+
continue;
|
|
967
|
+
}
|
|
968
|
+
for (const ref of call.context_from) {
|
|
969
|
+
const dot = ref.indexOf(".");
|
|
970
|
+
if (dot === -1) {
|
|
971
|
+
const referencedStage = definition.stages.find((candidate) => candidate.id === ref);
|
|
972
|
+
if (referencedStage == null) {
|
|
973
|
+
continue;
|
|
974
|
+
}
|
|
975
|
+
for (const referencedCall of referencedStage.calls) {
|
|
976
|
+
referenced.add(`${referencedStage.id}.${referencedCall.id}`);
|
|
977
|
+
}
|
|
978
|
+
}
|
|
979
|
+
else {
|
|
980
|
+
referenced.add(ref);
|
|
981
|
+
}
|
|
982
|
+
}
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
return referenced;
|
|
986
|
+
}
|
|
987
|
+
function classifyFailureClass(error) {
|
|
988
|
+
if (error instanceof InvalidCallError) {
|
|
989
|
+
return "permanent";
|
|
990
|
+
}
|
|
991
|
+
if (error instanceof CallExecutionError) {
|
|
992
|
+
return error.failureClass;
|
|
993
|
+
}
|
|
994
|
+
if (error instanceof WorkflowCancellationError) {
|
|
995
|
+
return "permanent";
|
|
996
|
+
}
|
|
997
|
+
return "permanent";
|
|
998
|
+
}
|
|
999
|
+
/**
|
|
1000
|
+
* Determine whether an error is retryable based on the retry mode (5-1, 5-2).
|
|
1001
|
+
*
|
|
1002
|
+
* - InvalidCallError (-32602): ALWAYS permanent — never retry.
|
|
1003
|
+
* - transport_only (default): retry only on transient failures
|
|
1004
|
+
* (child crash, transport death, wall timeout, idle timeout).
|
|
1005
|
+
* - all: retry on any failure except InvalidCallError.
|
|
1006
|
+
*/
|
|
1007
|
+
function isRetryable(error, failureClass, retryMode) {
|
|
1008
|
+
// Child -32602 (InvalidParams) is ALWAYS permanent
|
|
1009
|
+
if (error instanceof InvalidCallError) {
|
|
1010
|
+
return false;
|
|
1011
|
+
}
|
|
1012
|
+
if (retryMode === "all") {
|
|
1013
|
+
return true;
|
|
1014
|
+
}
|
|
1015
|
+
// transport_only: retry only on transient failures
|
|
1016
|
+
return failureClass === "transient";
|
|
1017
|
+
}
|
|
1018
|
+
/**
|
|
1019
|
+
* Compute retry delay with exponential backoff and jitter (5-1).
|
|
1020
|
+
*
|
|
1021
|
+
* delay = min(backoff_seconds * 2^retryCount, max_backoff_seconds) + random 0-25% jitter
|
|
1022
|
+
*/
|
|
1023
|
+
function computeRetryDelay(retryCount, backoffSeconds, maxBackoffSeconds) {
|
|
1024
|
+
const baseDelay = Math.min(backoffSeconds * Math.pow(2, retryCount), maxBackoffSeconds);
|
|
1025
|
+
const jitter = baseDelay * Math.random() * 0.25;
|
|
1026
|
+
return Math.round((baseDelay + jitter) * 1000);
|
|
1027
|
+
}
|
|
1028
|
+
/**
|
|
1029
|
+
* Delay that respects AbortSignal — resolves early on abort.
|
|
1030
|
+
*/
|
|
1031
|
+
function abortableDelay(ms, signal) {
|
|
1032
|
+
return new Promise((resolve) => {
|
|
1033
|
+
if (signal.aborted) {
|
|
1034
|
+
resolve();
|
|
1035
|
+
return;
|
|
1036
|
+
}
|
|
1037
|
+
const timer = setTimeout(resolve, ms);
|
|
1038
|
+
signal.addEventListener("abort", () => {
|
|
1039
|
+
clearTimeout(timer);
|
|
1040
|
+
resolve();
|
|
1041
|
+
}, { once: true });
|
|
1042
|
+
});
|
|
1043
|
+
}
|
|
1044
|
+
//# sourceMappingURL=engine.js.map
|