@claushaas/ergon-engine 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/LICENSE +1 -0
  2. package/README.md +13 -0
  3. package/dist/defaults.d.ts +11 -0
  4. package/dist/defaults.d.ts.map +1 -0
  5. package/dist/defaults.js +27 -0
  6. package/dist/defaults.js.map +1 -0
  7. package/dist/executors/agent.d.ts +12 -0
  8. package/dist/executors/agent.d.ts.map +1 -0
  9. package/dist/executors/agent.js +80 -0
  10. package/dist/executors/agent.js.map +1 -0
  11. package/dist/executors/artifact.d.ts +7 -0
  12. package/dist/executors/artifact.d.ts.map +1 -0
  13. package/dist/executors/artifact.js +148 -0
  14. package/dist/executors/artifact.js.map +1 -0
  15. package/dist/executors/condition.d.ts +7 -0
  16. package/dist/executors/condition.d.ts.map +1 -0
  17. package/dist/executors/condition.js +53 -0
  18. package/dist/executors/condition.js.map +1 -0
  19. package/dist/executors/exec.d.ts +32 -0
  20. package/dist/executors/exec.d.ts.map +1 -0
  21. package/dist/executors/exec.js +169 -0
  22. package/dist/executors/exec.js.map +1 -0
  23. package/dist/executors/index.d.ts +54 -0
  24. package/dist/executors/index.d.ts.map +1 -0
  25. package/dist/executors/index.js +49 -0
  26. package/dist/executors/index.js.map +1 -0
  27. package/dist/executors/manual.d.ts +7 -0
  28. package/dist/executors/manual.d.ts.map +1 -0
  29. package/dist/executors/manual.js +25 -0
  30. package/dist/executors/manual.js.map +1 -0
  31. package/dist/executors/notify.d.ts +48 -0
  32. package/dist/executors/notify.d.ts.map +1 -0
  33. package/dist/executors/notify.js +313 -0
  34. package/dist/executors/notify.js.map +1 -0
  35. package/dist/index.d.ts +12 -0
  36. package/dist/index.d.ts.map +1 -0
  37. package/dist/index.js +12 -0
  38. package/dist/index.js.map +1 -0
  39. package/dist/runner.d.ts +11 -0
  40. package/dist/runner.d.ts.map +1 -0
  41. package/dist/runner.js +1061 -0
  42. package/dist/runner.js.map +1 -0
  43. package/dist/templating/index.d.ts +43 -0
  44. package/dist/templating/index.d.ts.map +1 -0
  45. package/dist/templating/index.js +778 -0
  46. package/dist/templating/index.js.map +1 -0
  47. package/dist/worker.d.ts +26 -0
  48. package/dist/worker.d.ts.map +1 -0
  49. package/dist/worker.js +320 -0
  50. package/dist/worker.js.map +1 -0
  51. package/dist/workflowIdentity.d.ts +3 -0
  52. package/dist/workflowIdentity.d.ts.map +1 -0
  53. package/dist/workflowIdentity.js +12 -0
  54. package/dist/workflowIdentity.js.map +1 -0
  55. package/package.json +44 -0
package/dist/runner.js ADDED
@@ -0,0 +1,1061 @@
1
+ import { createHash } from 'node:crypto';
2
+ import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync, } from 'node:fs';
3
+ import path from 'node:path';
4
+ import { ERROR_CODES } from '@claushaas/ergon-shared';
5
+ import { appendEvent, appendEventInTransaction, artifactsDir, assertSafeSegment, createStepRun, getLatestEventForStepRun, getRun, getRunForClaim, getWaitingManualStepRun, getWorkflow, insertArtifact, listArtifacts, listStepRuns, markRunFailed, markRunSucceeded, markRunWaitingManual, stepAttemptDir, updateRunCursor, updateStepRunStatus, withRunClaim, } from '@claushaas/ergon-storage';
6
+ import { createExecutionContext, } from './executors/index.js';
7
+ import { interpolateTemplateString, loadAndValidateTemplateFromFile, renderStepRequestPayload, resolveTemplateReference, } from './templating/index.js';
8
+ import { assertWorkflowTemplateIdentity } from './workflowIdentity.js';
9
+ const EXACT_INTERPOLATION_PATTERN = /^{{\s*([^{}]+?)\s*}}$/;
10
+ const ERROR_CODE_SET = new Set(ERROR_CODES);
11
+ const MAX_SAFE_JSON_BYTES = 128 * 1024;
12
+ const EXECUTION_ABORT_POLL_MS = 50;
13
+ const REDACTED_VALUE = '[REDACTED]';
14
+ const REDACTED_KEY_PATTERN = /(api[_-]?key|authorization|token|secret|password|prompt|target|env|messages?)/i;
15
+ function parseJson(value, fallback) {
16
+ if (!value) {
17
+ return fallback;
18
+ }
19
+ return JSON.parse(value);
20
+ }
21
+ function safeJsonStringify(value) {
22
+ if (value === undefined) {
23
+ return null;
24
+ }
25
+ const seen = new WeakSet();
26
+ let serialized;
27
+ try {
28
+ serialized = JSON.stringify(value, (_key, currentValue) => {
29
+ if (!currentValue || typeof currentValue !== 'object') {
30
+ return currentValue;
31
+ }
32
+ if (seen.has(currentValue)) {
33
+ return '[Circular]';
34
+ }
35
+ seen.add(currentValue);
36
+ return currentValue;
37
+ });
38
+ }
39
+ catch {
40
+ return JSON.stringify('[Unserializable]');
41
+ }
42
+ if (serialized === undefined) {
43
+ return null;
44
+ }
45
+ const bytes = Buffer.byteLength(serialized, 'utf8');
46
+ if (bytes <= MAX_SAFE_JSON_BYTES) {
47
+ return serialized;
48
+ }
49
+ return JSON.stringify({
50
+ note: 'truncated',
51
+ size_bytes: bytes,
52
+ });
53
+ }
54
+ function normalizeForStorage(value) {
55
+ const serialized = safeJsonStringify(value);
56
+ if (!serialized) {
57
+ return value === undefined ? undefined : null;
58
+ }
59
+ return JSON.parse(serialized);
60
+ }
61
+ function redactForPersistence(value, key, seen = new WeakSet()) {
62
+ if (value === undefined || value === null) {
63
+ return value;
64
+ }
65
+ if (key && REDACTED_KEY_PATTERN.test(key)) {
66
+ return REDACTED_VALUE;
67
+ }
68
+ if (Array.isArray(value)) {
69
+ return value.map((entry) => redactForPersistence(entry, undefined, seen));
70
+ }
71
+ if (typeof value === 'object') {
72
+ if (seen.has(value)) {
73
+ return '[Circular]';
74
+ }
75
+ seen.add(value);
76
+ const redacted = {};
77
+ for (const [entryKey, entryValue] of Object.entries(value)) {
78
+ redacted[entryKey] = redactForPersistence(entryValue, entryKey, seen);
79
+ }
80
+ return redacted;
81
+ }
82
+ return value;
83
+ }
84
+ function persistableValue(value) {
85
+ return normalizeForStorage(redactForPersistence(value));
86
+ }
87
+ function createAbortError(message) {
88
+ return Object.assign(new Error(message), { name: 'AbortError' });
89
+ }
90
+ function createStepAbortMonitor(db, runId, claim, step) {
91
+ const controller = new AbortController();
92
+ let abortReason = null;
93
+ let timeoutId;
94
+ const intervalId = setInterval(() => {
95
+ if (controller.signal.aborted) {
96
+ return;
97
+ }
98
+ const currentRun = getRun(db, runId);
99
+ if (!currentRun) {
100
+ abortReason = {
101
+ message: `Workflow run "${runId}" disappeared while step "${step.id}" was running`,
102
+ type: 'claim_lost',
103
+ };
104
+ controller.abort(createAbortError(abortReason.message));
105
+ return;
106
+ }
107
+ if (currentRun.status === 'canceled') {
108
+ abortReason = {
109
+ message: `Workflow run "${runId}" was canceled during step "${step.id}"`,
110
+ type: 'canceled',
111
+ };
112
+ controller.abort(createAbortError(abortReason.message));
113
+ return;
114
+ }
115
+ if (currentRun.status !== 'running' ||
116
+ currentRun.claimed_by !== claim.workerId ||
117
+ currentRun.claim_epoch !== claim.claimEpoch) {
118
+ abortReason = {
119
+ message: `Workflow run "${runId}" lost claim ownership during step "${step.id}"`,
120
+ type: 'claim_lost',
121
+ };
122
+ controller.abort(createAbortError(abortReason.message));
123
+ }
124
+ }, EXECUTION_ABORT_POLL_MS);
125
+ if (typeof step.timeout_ms === 'number') {
126
+ timeoutId = setTimeout(() => {
127
+ if (controller.signal.aborted) {
128
+ return;
129
+ }
130
+ abortReason = {
131
+ message: `Step "${step.id}" exceeded timeout of ${step.timeout_ms}ms`,
132
+ type: 'timeout',
133
+ };
134
+ controller.abort(createAbortError(abortReason.message));
135
+ }, step.timeout_ms);
136
+ }
137
+ return {
138
+ abortReason: () => abortReason,
139
+ cleanup: () => {
140
+ clearInterval(intervalId);
141
+ if (timeoutId) {
142
+ clearTimeout(timeoutId);
143
+ }
144
+ },
145
+ signal: controller.signal,
146
+ };
147
+ }
148
+ function isErrorCode(value) {
149
+ return typeof value === 'string' && ERROR_CODE_SET.has(value);
150
+ }
151
+ function getStepExecutor(executors, step) {
152
+ return executors.get(step.kind);
153
+ }
154
+ function prepareStepExecution(runId, workerId, stepIndex, step, template, inputs, artifacts, signal, stepRuns) {
155
+ const stepAttempt = getStepAttempt(step.id, stepRuns);
156
+ const context = createExecutionContext({
157
+ artifacts,
158
+ inputs,
159
+ run: {
160
+ attempt: stepAttempt,
161
+ runId,
162
+ stepIndex,
163
+ workerId,
164
+ workflowId: template.workflow.id,
165
+ workflowVersion: template.workflow.version,
166
+ },
167
+ signal,
168
+ });
169
+ const request = buildRequestSnapshot(step, context);
170
+ return {
171
+ context,
172
+ request,
173
+ stepAttempt,
174
+ };
175
+ }
176
+ function getStepAttempt(stepId, stepRuns) {
177
+ return (Math.max(0, ...stepRuns
178
+ .filter((stepRun) => stepRun.step_id === stepId)
179
+ .map((stepRun) => stepRun.attempt)) + 1);
180
+ }
181
+ function startStepAttempt(db, runId, claim, step, stepAttempt, stepIndex, request) {
182
+ return withRunClaim(db, runId, claim, () => {
183
+ const now = new Date().toISOString();
184
+ const persistableRequest = persistableValue(request);
185
+ const stepRun = createStepRun(db, runId, step.id, stepAttempt, step.kind, {
186
+ dependsOn: step.depends_on ?? [],
187
+ request: persistableRequest,
188
+ });
189
+ appendEventInTransaction(db, runId, 'step_scheduled', {
190
+ attempt: stepAttempt,
191
+ step_id: step.id,
192
+ step_kind: step.kind,
193
+ }, {
194
+ actor: `worker:${claim.workerId}`,
195
+ stepRunId: stepRun.id,
196
+ ts: now,
197
+ });
198
+ updateRunCursor(db, runId, claim.workerId, claim.claimEpoch, stepIndex, step.id);
199
+ updateStepRunStatus(db, stepRun.id, 'running', {
200
+ request: persistableRequest,
201
+ startedAt: now,
202
+ });
203
+ appendEventInTransaction(db, runId, 'step_started', {
204
+ attempt: stepAttempt,
205
+ step_id: step.id,
206
+ step_kind: step.kind,
207
+ }, {
208
+ actor: `worker:${claim.workerId}`,
209
+ stepRunId: stepRun.id,
210
+ ts: now,
211
+ });
212
+ return stepRun;
213
+ });
214
+ }
215
+ function buildCompletedSteps(stepRuns) {
216
+ const latestByStep = new Map();
217
+ for (const stepRun of stepRuns) {
218
+ const current = latestByStep.get(stepRun.step_id);
219
+ if (current && current.attempt >= stepRun.attempt) {
220
+ continue;
221
+ }
222
+ latestByStep.set(stepRun.step_id, {
223
+ attempt: stepRun.attempt,
224
+ output: parseJson(stepRun.output_json, null),
225
+ status: stepRun.status,
226
+ });
227
+ }
228
+ return new Map(Array.from(latestByStep.entries()).map(([stepId, value]) => [
229
+ stepId,
230
+ {
231
+ output: value.output,
232
+ status: value.status,
233
+ },
234
+ ]));
235
+ }
236
+ function getArtifactFilePath(rootDir, artifactRow) {
237
+ return resolvePathWithinBase(rootDir, artifactRow.path, 'artifact path');
238
+ }
239
+ function resolvePathWithinBase(baseDir, unsafePath, label) {
240
+ if (path.isAbsolute(unsafePath)) {
241
+ throw new Error(`Unsafe ${label}: absolute paths are not allowed`);
242
+ }
243
+ const resolvedBase = path.resolve(baseDir);
244
+ const resolvedPath = path.resolve(resolvedBase, unsafePath);
245
+ const relative = path.relative(resolvedBase, resolvedPath);
246
+ if (!relative || relative.startsWith('..') || path.isAbsolute(relative)) {
247
+ throw new Error(`Unsafe ${label}: path escapes base directory`);
248
+ }
249
+ return resolvedPath;
250
+ }
251
+ function restoreArtifacts(artifactBaseDir, stepRuns, runArtifacts) {
252
+ const restored = {};
253
+ const succeededStepRunIds = new Set(stepRuns
254
+ .filter((stepRun) => stepRun.status === 'succeeded')
255
+ .map((stepRun) => stepRun.id));
256
+ for (const artifact of runArtifacts) {
257
+ if (!succeededStepRunIds.has(artifact.step_run_id)) {
258
+ continue;
259
+ }
260
+ const artifactFile = getArtifactFilePath(artifactBaseDir, artifact);
261
+ if (!existsSync(artifactFile)) {
262
+ continue;
263
+ }
264
+ const content = readFileSync(artifactFile, 'utf8');
265
+ restored[artifact.name] =
266
+ artifact.type === 'text' ? content : JSON.parse(content);
267
+ }
268
+ return restored;
269
+ }
270
+ function resolveNextStepIndex(template, completedSteps, currentStepIndex) {
271
+ let nextStepIndex = Math.max(0, currentStepIndex);
272
+ while (nextStepIndex < template.steps.length) {
273
+ const step = template.steps[nextStepIndex];
274
+ if (!step) {
275
+ break;
276
+ }
277
+ const previous = completedSteps.get(step.id);
278
+ if (previous?.status === 'skipped' || previous?.status === 'succeeded') {
279
+ nextStepIndex += 1;
280
+ continue;
281
+ }
282
+ break;
283
+ }
284
+ return nextStepIndex;
285
+ }
286
+ function resolveRunState(artifactBaseDir, template, run, db) {
287
+ const stepRuns = listStepRuns(db, run.id);
288
+ const completedSteps = buildCompletedSteps(stepRuns);
289
+ return {
290
+ artifacts: restoreArtifacts(artifactBaseDir, stepRuns, listArtifacts(db, run.id)),
291
+ completedSteps,
292
+ nextStepIndex: resolveNextStepIndex(template, completedSteps, run.current_step_index),
293
+ };
294
+ }
295
+ function buildRequestSnapshot(step, context) {
296
+ switch (step.kind) {
297
+ case 'agent':
298
+ return {
299
+ model: step.model,
300
+ provider: step.provider,
301
+ ...renderStepRequestPayload(step, {
302
+ artifacts: context.artifacts,
303
+ inputs: context.inputs,
304
+ }),
305
+ };
306
+ case 'exec':
307
+ return {
308
+ command: renderStepRequestPayload(step, {
309
+ artifacts: context.artifacts,
310
+ inputs: context.inputs,
311
+ }).command,
312
+ cwd: step.cwd
313
+ ? interpolateTemplateString(step.cwd, {
314
+ artifacts: context.artifacts,
315
+ inputs: context.inputs,
316
+ })
317
+ : undefined,
318
+ env: step.env
319
+ ? Object.fromEntries(Object.entries(step.env).map(([key, value]) => [
320
+ key,
321
+ interpolateTemplateString(value, {
322
+ artifacts: context.artifacts,
323
+ inputs: context.inputs,
324
+ }),
325
+ ]))
326
+ : undefined,
327
+ };
328
+ case 'condition':
329
+ return {
330
+ expression: interpolateTemplateString(step.expression, {
331
+ artifacts: context.artifacts,
332
+ inputs: context.inputs,
333
+ }),
334
+ };
335
+ case 'manual':
336
+ return {
337
+ message: renderStepRequestPayload(step, {
338
+ artifacts: context.artifacts,
339
+ inputs: context.inputs,
340
+ }).message,
341
+ };
342
+ case 'notify':
343
+ return {
344
+ ...renderStepRequestPayload(step, {
345
+ artifacts: context.artifacts,
346
+ inputs: context.inputs,
347
+ }),
348
+ channel: interpolateTemplateString(step.channel, {
349
+ artifacts: context.artifacts,
350
+ inputs: context.inputs,
351
+ }),
352
+ target: step.target
353
+ ? interpolateTemplateString(step.target, {
354
+ artifacts: context.artifacts,
355
+ inputs: context.inputs,
356
+ })
357
+ : undefined,
358
+ };
359
+ case 'artifact':
360
+ return {
361
+ input: step.input,
362
+ operation: step.operation,
363
+ };
364
+ default: {
365
+ const exhaustive = step;
366
+ void exhaustive;
367
+ return undefined;
368
+ }
369
+ }
370
+ }
371
+ function shouldSkipStep(step, completedSteps) {
372
+ const dependencies = step.depends_on ?? [];
373
+ if (dependencies.length === 0) {
374
+ return false;
375
+ }
376
+ for (const dependency of dependencies) {
377
+ const state = completedSteps.get(dependency);
378
+ if (!state) {
379
+ return false;
380
+ }
381
+ if (state.status === 'failed' || state.status === 'skipped') {
382
+ return true;
383
+ }
384
+ if (state.status === 'succeeded' &&
385
+ isConditionOutput(state.output) &&
386
+ state.output.passed === false) {
387
+ return true;
388
+ }
389
+ }
390
+ return false;
391
+ }
392
+ function isConditionOutput(value) {
393
+ if (!value || typeof value !== 'object') {
394
+ return false;
395
+ }
396
+ return ('passed' in value &&
397
+ typeof value.passed === 'boolean');
398
+ }
399
+ function getArtifactFileName(artifact) {
400
+ assertSafeSegment(artifact.name, 'artifact name');
401
+ switch (artifact.type) {
402
+ case 'text':
403
+ return `${artifact.name}.txt`;
404
+ default:
405
+ return `${artifact.name}.json`;
406
+ }
407
+ }
408
+ function toArtifactFileContent(artifact) {
409
+ if (artifact.type === 'text' && typeof artifact.value === 'string') {
410
+ return artifact.value;
411
+ }
412
+ return JSON.stringify(artifact.value, null, 2);
413
+ }
414
+ function toStoragePath(rootDir, filePath) {
415
+ return path.relative(rootDir, filePath).split(path.sep).join('/');
416
+ }
417
+ function stageArtifacts(rootDir, runId, step, stepAttempt, stepRunId, artifacts) {
418
+ const _stored = {};
419
+ const persisted = [];
420
+ for (const artifact of artifacts) {
421
+ const artifactFile = path.join(stepAttemptDir(rootDir, runId, step.id, stepAttempt), getArtifactFileName(artifact));
422
+ mkdirSync(path.dirname(artifactFile), { recursive: true });
423
+ const content = toArtifactFileContent(artifact);
424
+ writeFileSync(artifactFile, content, 'utf8');
425
+ const buffer = Buffer.from(content, 'utf8');
426
+ persisted.push({
427
+ filePath: artifactFile,
428
+ name: artifact.name,
429
+ record: {
430
+ meta: {
431
+ attempt: stepAttempt,
432
+ step_id: step.id,
433
+ },
434
+ mime: null,
435
+ path: toStoragePath(rootDir, artifactFile),
436
+ runId,
437
+ sha256: createHash('sha256').update(buffer).digest('hex'),
438
+ sizeBytes: buffer.byteLength,
439
+ stepRunId,
440
+ type: artifact.type,
441
+ },
442
+ value: artifact.value,
443
+ });
444
+ }
445
+ return persisted;
446
+ }
447
+ function cleanupStagedArtifacts(artifacts) {
448
+ for (const artifact of artifacts) {
449
+ rmSync(artifact.filePath, { force: true });
450
+ }
451
+ }
452
+ function finalizeArtifacts(db, artifacts) {
453
+ const stored = {};
454
+ for (const artifact of artifacts) {
455
+ insertArtifact(db, {
456
+ meta: artifact.record.meta,
457
+ mime: artifact.record.mime,
458
+ name: artifact.name,
459
+ path: artifact.record.path,
460
+ runId: artifact.record.runId,
461
+ sha256: artifact.record.sha256,
462
+ sizeBytes: artifact.record.sizeBytes,
463
+ stepRunId: artifact.record.stepRunId,
464
+ type: artifact.record.type,
465
+ });
466
+ stored[artifact.name] = artifact.value;
467
+ }
468
+ return stored;
469
+ }
470
+ function appendStepEventsInTransaction(db, runId, stepRunId, workerId, events) {
471
+ for (const event of events ?? []) {
472
+ appendEventInTransaction(db, runId, event.type, event.payload, {
473
+ actor: `worker:${workerId}`,
474
+ stepRunId,
475
+ });
476
+ }
477
+ }
478
+ function handleSkippedStep(db, claim, options, nextStepIndex, nextStepId) {
479
+ const skipped = withRunClaim(db, options.runId, claim, () => {
480
+ updateStepRunStatus(db, options.stepRun.id, 'skipped', {
481
+ finishedAt: new Date().toISOString(),
482
+ output: persistableValue(options.output),
483
+ });
484
+ appendEventInTransaction(db, options.runId, 'step_skipped', {
485
+ step_id: options.step.id,
486
+ ...options.output,
487
+ }, {
488
+ actor: `worker:${claim.workerId}`,
489
+ stepRunId: options.stepRun.id,
490
+ });
491
+ updateRunCursor(db, options.runId, claim.workerId, claim.claimEpoch, nextStepIndex, nextStepId);
492
+ return true;
493
+ });
494
+ if (!skipped) {
495
+ return false;
496
+ }
497
+ options.stepRuns.push({
498
+ ...options.stepRun,
499
+ output_json: safeJsonStringify(persistableValue(options.output)),
500
+ status: 'skipped',
501
+ });
502
+ return true;
503
+ }
504
+ function resolveWorkflowOutputs(template, inputs, artifacts) {
505
+ const outputs = template.outputs ?? {};
506
+ const resolved = {};
507
+ for (const [key, value] of Object.entries(outputs)) {
508
+ if (/^(artifacts|inputs)\./.test(value.trim())) {
509
+ resolved[key] = resolveTemplateReference(value, { artifacts, inputs });
510
+ continue;
511
+ }
512
+ const exactMatch = value.match(EXACT_INTERPOLATION_PATTERN);
513
+ if (exactMatch?.[1]) {
514
+ resolved[key] = resolveTemplateReference(exactMatch[1], {
515
+ artifacts,
516
+ inputs,
517
+ });
518
+ continue;
519
+ }
520
+ resolved[key] = interpolateTemplateString(value, { artifacts, inputs });
521
+ }
522
+ return resolved;
523
+ }
524
+ function getFailureCodeForStep(step, error) {
525
+ if (error &&
526
+ typeof error === 'object' &&
527
+ 'code' in error &&
528
+ isErrorCode(error.code)) {
529
+ return error.code;
530
+ }
531
+ switch (step.kind) {
532
+ case 'agent':
533
+ case 'notify':
534
+ return 'provider_error';
535
+ case 'artifact':
536
+ return 'artifact_failed';
537
+ case 'condition':
538
+ return 'condition_failed';
539
+ case 'exec':
540
+ return 'exec_failed';
541
+ case 'manual':
542
+ return 'manual_rejected';
543
+ default: {
544
+ const exhaustive = step;
545
+ void exhaustive;
546
+ return 'schema_invalid';
547
+ }
548
+ }
549
+ }
550
+ function buildFailureMetadata(step, error) {
551
+ return {
552
+ code: getFailureCodeForStep(step, error),
553
+ detail: error instanceof Error
554
+ ? {
555
+ name: error.name,
556
+ stack: error.stack,
557
+ }
558
+ : {
559
+ error,
560
+ },
561
+ message: error instanceof Error ? error.message : `Step "${step.id}" failed`,
562
+ };
563
+ }
564
+ function canRetryStep(step, attempt, failureCode) {
565
+ const retry = step.retry;
566
+ if (!retry) {
567
+ return false;
568
+ }
569
+ const maxAttempts = Math.max(1, Math.trunc(retry.max_attempts));
570
+ if (attempt >= maxAttempts) {
571
+ return false;
572
+ }
573
+ if (!retry.on || retry.on.length === 0) {
574
+ return true;
575
+ }
576
+ return retry.on.includes(failureCode);
577
+ }
578
+ function markStepRetry(db, runId, claim, stepId, stepRunId, nextAttempt, failure, output) {
579
+ return Boolean(withRunClaim(db, runId, claim, () => {
580
+ updateStepRunStatus(db, stepRunId, 'failed', {
581
+ errorCode: failure.code,
582
+ errorDetail: persistableValue(failure.detail),
583
+ errorMessage: failure.message,
584
+ finishedAt: new Date().toISOString(),
585
+ output: persistableValue(output),
586
+ });
587
+ appendEventInTransaction(db, runId, 'step_failed', {
588
+ error_code: failure.code,
589
+ error_message: failure.message,
590
+ step_id: stepId,
591
+ }, {
592
+ actor: `worker:${claim.workerId}`,
593
+ stepRunId,
594
+ });
595
+ appendEventInTransaction(db, runId, 'step_retry', {
596
+ error_code: failure.code,
597
+ next_attempt: nextAttempt,
598
+ step_id: stepId,
599
+ }, {
600
+ actor: `worker:${claim.workerId}`,
601
+ stepRunId,
602
+ });
603
+ return true;
604
+ }));
605
+ }
606
+ function recordRetryableFailure(db, runId, claim, stepId, stepRun, stepRuns, nextAttempt, failure, output) {
607
+ const recorded = markStepRetry(db, runId, claim, stepId, stepRun.id, nextAttempt, failure, output);
608
+ if (!recorded) {
609
+ throw new Error(`Workflow run "${runId}" lost claim ownership before retrying step "${stepId}"`);
610
+ }
611
+ stepRuns.push({
612
+ ...stepRun,
613
+ error_code: failure.code,
614
+ error_detail_json: safeJsonStringify(persistableValue(failure.detail)),
615
+ error_message: failure.message,
616
+ output_json: safeJsonStringify(persistableValue(output)),
617
+ status: 'failed',
618
+ });
619
+ }
620
+ function markStepFailed(db, runId, claim, step, stepRunId, error, output) {
621
+ const failure = buildFailureMetadata(step, error);
622
+ const failed = withRunClaim(db, runId, claim, () => {
623
+ updateStepRunStatus(db, stepRunId, 'failed', {
624
+ errorCode: failure.code,
625
+ errorDetail: persistableValue(failure.detail),
626
+ errorMessage: failure.message,
627
+ finishedAt: new Date().toISOString(),
628
+ output: persistableValue(output),
629
+ });
630
+ appendEventInTransaction(db, runId, 'step_failed', {
631
+ error_code: failure.code,
632
+ error_message: failure.message,
633
+ step_id: step.id,
634
+ }, {
635
+ actor: `worker:${claim.workerId}`,
636
+ stepRunId,
637
+ });
638
+ appendEventInTransaction(db, runId, 'workflow_failed', {
639
+ error_code: failure.code,
640
+ error_message: failure.message,
641
+ step_id: step.id,
642
+ }, {
643
+ actor: `worker:${claim.workerId}`,
644
+ });
645
+ markRunFailed(db, runId, claim.workerId, claim.claimEpoch, {
646
+ errorCode: failure.code,
647
+ errorDetail: persistableValue(failure.detail),
648
+ errorMessage: failure.message,
649
+ });
650
+ return true;
651
+ });
652
+ if (!failed) {
653
+ throw new Error(`Workflow run "${runId}" lost claim ownership before failing step "${step.id}"`);
654
+ }
655
+ throw error instanceof Error ? error : new Error(failure.message);
656
+ }
657
+ function hasWorkflowCanceledEvent(db, runId) {
658
+ const existingEvent = db
659
+ .prepare("SELECT 1 FROM events WHERE run_id = ? AND type = 'workflow_canceled' LIMIT 1;")
660
+ .get(runId);
661
+ return Boolean(existingEvent);
662
+ }
663
+ function abortIfCanceled(db, runId, workerId, reason) {
664
+ const currentRun = getRun(db, runId);
665
+ if (!currentRun) {
666
+ throw new Error(`Workflow run "${runId}" was not found`);
667
+ }
668
+ if (currentRun.status !== 'canceled') {
669
+ return null;
670
+ }
671
+ if (!hasWorkflowCanceledEvent(db, runId)) {
672
+ appendEvent(db, runId, 'workflow_canceled', {
673
+ reason,
674
+ }, {
675
+ actor: `worker:${workerId}`,
676
+ });
677
+ }
678
+ return currentRun;
679
+ }
680
+ function markCanceledStep(db, runId, workerId, step, stepRunId) {
681
+ const failure = buildFailureMetadata(step, new Error(`Workflow run "${runId}" was canceled during step "${step.id}"`));
682
+ updateStepRunStatus(db, stepRunId, 'failed', {
683
+ errorCode: failure.code,
684
+ errorDetail: persistableValue({
685
+ ...failure.detail,
686
+ reason: 'canceled_during_step',
687
+ }),
688
+ errorMessage: failure.message,
689
+ finishedAt: new Date().toISOString(),
690
+ });
691
+ const latestFailureEvent = getLatestEventForStepRun(db, runId, stepRunId, [
692
+ 'step_failed',
693
+ ]);
694
+ if (!latestFailureEvent) {
695
+ appendEvent(db, runId, 'step_failed', {
696
+ error_code: failure.code,
697
+ error_message: failure.message,
698
+ reason: 'canceled_during_step',
699
+ step_id: step.id,
700
+ }, {
701
+ actor: `worker:${workerId}`,
702
+ stepRunId,
703
+ });
704
+ }
705
+ }
706
+ function replaceStepRunSnapshot(stepRuns, stepRunId, patch) {
707
+ const index = stepRuns.findIndex((stepRun) => stepRun.id === stepRunId);
708
+ if (index < 0) {
709
+ return;
710
+ }
711
+ stepRuns[index] = {
712
+ ...stepRuns[index],
713
+ ...patch,
714
+ };
715
+ }
716
+ function resumeApprovedManualStep(options) {
717
+ if (options.step.kind !== 'manual') {
718
+ return null;
719
+ }
720
+ const waitingStepRun = getWaitingManualStepRun(options.db, options.runId, options.step.id);
721
+ if (!waitingStepRun) {
722
+ return null;
723
+ }
724
+ const approvalEvent = getLatestEventForStepRun(options.db, options.runId, waitingStepRun.id, ['manual_approved']);
725
+ if (!approvalEvent) {
726
+ return null;
727
+ }
728
+ const approvalOutput = {
729
+ approved_at: approvalEvent.ts,
730
+ approved_by: approvalEvent.actor,
731
+ decision: 'approve',
732
+ };
733
+ const completed = withRunClaim(options.db, options.runId, options.claim, () => {
734
+ updateStepRunStatus(options.db, waitingStepRun.id, 'succeeded', {
735
+ finishedAt: approvalEvent.ts,
736
+ output: persistableValue(approvalOutput),
737
+ });
738
+ appendEventInTransaction(options.db, options.runId, 'step_succeeded', {
739
+ artifact_names: [],
740
+ step_id: options.step.id,
741
+ }, {
742
+ actor: `worker:${options.claim.workerId}`,
743
+ stepRunId: waitingStepRun.id,
744
+ });
745
+ updateRunCursor(options.db, options.runId, options.claim.workerId, options.claim.claimEpoch, options.stepIndex + 1, options.template.steps[options.stepIndex + 1]?.id ?? null);
746
+ return true;
747
+ });
748
+ if (!completed) {
749
+ throw new Error(`Workflow run "${options.runId}" lost claim ownership while resuming manual step "${options.step.id}"`);
750
+ }
751
+ options.state.completedSteps.set(options.step.id, {
752
+ output: approvalOutput,
753
+ status: 'succeeded',
754
+ });
755
+ replaceStepRunSnapshot(options.stepRuns, waitingStepRun.id, {
756
+ finished_at: approvalEvent.ts,
757
+ output_json: safeJsonStringify(persistableValue(approvalOutput)),
758
+ status: 'succeeded',
759
+ });
760
+ const runCanceledDuringStep = abortIfCanceled(options.db, options.runId, options.claim.workerId, 'canceled_during_step');
761
+ if (runCanceledDuringStep) {
762
+ return {
763
+ canceledRun: runCanceledDuringStep,
764
+ shouldContinue: false,
765
+ };
766
+ }
767
+ return {
768
+ canceledRun: null,
769
+ shouldContinue: true,
770
+ };
771
+ }
772
+ function resolveStepAbort(options, stepRunId, stepAbortReason) {
773
+ if (!stepAbortReason) {
774
+ return null;
775
+ }
776
+ if (stepAbortReason.type === 'canceled') {
777
+ abortIfCanceled(options.db, options.runId, options.claim.workerId, 'canceled_during_step');
778
+ markCanceledStep(options.db, options.runId, options.claim.workerId, options.step, stepRunId);
779
+ return {
780
+ canceledRun: getRun(options.db, options.runId),
781
+ shouldContinue: false,
782
+ };
783
+ }
784
+ if (stepAbortReason.type === 'claim_lost') {
785
+ throw new Error(stepAbortReason.message);
786
+ }
787
+ return null;
788
+ }
789
+ async function executeStep(options) {
790
+ const canceledRun = abortIfCanceled(options.db, options.runId, options.claim.workerId, 'canceled_before_next_step');
791
+ if (canceledRun) {
792
+ return {
793
+ canceledRun,
794
+ shouldContinue: false,
795
+ };
796
+ }
797
+ const resumedManualStep = resumeApprovedManualStep(options);
798
+ if (resumedManualStep) {
799
+ return resumedManualStep;
800
+ }
801
+ if (shouldSkipStep(options.step, options.state.completedSteps)) {
802
+ const { request, stepAttempt } = prepareStepExecution(options.runId, options.claim.workerId, options.stepIndex, options.step, options.template, options.inputs, options.state.artifacts, new AbortController().signal, options.stepRuns);
803
+ const stepRun = startStepAttempt(options.db, options.runId, options.claim, options.step, stepAttempt, options.stepIndex, request);
804
+ if (!stepRun) {
805
+ throw new Error(`Workflow run "${options.runId}" lost claim ownership before starting step "${options.step.id}"`);
806
+ }
807
+ const skippedOutput = {
808
+ reason: 'dependency_not_satisfied',
809
+ skipped_by: options.step.depends_on ?? [],
810
+ };
811
+ options.state.completedSteps.set(options.step.id, {
812
+ output: skippedOutput,
813
+ status: 'skipped',
814
+ });
815
+ const skipped = handleSkippedStep(options.db, options.claim, {
816
+ output: skippedOutput,
817
+ runId: options.runId,
818
+ step: options.step,
819
+ stepRun,
820
+ stepRuns: options.stepRuns,
821
+ }, options.stepIndex + 1, options.template.steps[options.stepIndex + 1]?.id ?? null);
822
+ if (!skipped) {
823
+ throw new Error(`Workflow run "${options.runId}" lost claim ownership before skipping step "${options.step.id}"`);
824
+ }
825
+ return {
826
+ canceledRun: null,
827
+ shouldContinue: true,
828
+ };
829
+ }
830
+ while (true) {
831
+ const abortMonitor = createStepAbortMonitor(options.db, options.runId, options.claim, options.step);
832
+ const { context, request, stepAttempt } = prepareStepExecution(options.runId, options.claim.workerId, options.stepIndex, options.step, options.template, options.inputs, options.state.artifacts, abortMonitor.signal, options.stepRuns);
833
+ const stepRun = startStepAttempt(options.db, options.runId, options.claim, options.step, stepAttempt, options.stepIndex, request);
834
+ if (!stepRun) {
835
+ throw new Error(`Workflow run "${options.runId}" lost claim ownership before starting step "${options.step.id}"`);
836
+ }
837
+ let result;
838
+ try {
839
+ result = await getStepExecutor(options.executors, options.step).execute(options.step, context);
840
+ }
841
+ catch (error) {
842
+ abortMonitor.cleanup();
843
+ const abortedStep = resolveStepAbort(options, stepRun.id, abortMonitor.abortReason());
844
+ if (abortedStep) {
845
+ return abortedStep;
846
+ }
847
+ const failure = buildFailureMetadata(options.step, error);
848
+ if (canRetryStep(options.step, stepAttempt, failure.code)) {
849
+ recordRetryableFailure(options.db, options.runId, options.claim, options.step.id, stepRun, options.stepRuns, stepAttempt + 1, failure);
850
+ continue;
851
+ }
852
+ markStepFailed(options.db, options.runId, options.claim, options.step, stepRun.id, error);
853
+ }
854
+ abortMonitor.cleanup();
855
+ const abortedStep = resolveStepAbort(options, stepRun.id, abortMonitor.abortReason());
856
+ if (abortedStep) {
857
+ return abortedStep;
858
+ }
859
+ if (result.status === 'failed') {
860
+ const failure = buildFailureMetadata(options.step, new Error(`Step "${options.step.id}" returned status failed`));
861
+ if (canRetryStep(options.step, stepAttempt, failure.code)) {
862
+ recordRetryableFailure(options.db, options.runId, options.claim, options.step.id, stepRun, options.stepRuns, stepAttempt + 1, failure, persistableValue(result.outputs));
863
+ continue;
864
+ }
865
+ markStepFailed(options.db, options.runId, options.claim, options.step, stepRun.id, new Error(`Step "${options.step.id}" returned status failed`), persistableValue(result.outputs));
866
+ }
867
+ if (result.status === 'waiting_manual') {
868
+ const runCanceledDuringStep = abortIfCanceled(options.db, options.runId, options.claim.workerId, 'canceled_during_step');
869
+ if (runCanceledDuringStep) {
870
+ markCanceledStep(options.db, options.runId, options.claim.workerId, options.step, stepRun.id);
871
+ return {
872
+ canceledRun: runCanceledDuringStep,
873
+ shouldContinue: false,
874
+ };
875
+ }
876
+ const paused = withRunClaim(options.db, options.runId, options.claim, () => {
877
+ appendStepEventsInTransaction(options.db, options.runId, stepRun.id, options.claim.workerId, result.events);
878
+ updateStepRunStatus(options.db, stepRun.id, 'waiting_manual', {
879
+ finishedAt: new Date().toISOString(),
880
+ output: persistableValue(result.outputs),
881
+ });
882
+ markRunWaitingManual(options.db, options.runId, options.claim.workerId, options.claim.claimEpoch);
883
+ return true;
884
+ });
885
+ if (!paused) {
886
+ throw new Error(`Workflow run "${options.runId}" lost claim ownership before pausing manual step "${options.step.id}"`);
887
+ }
888
+ return {
889
+ canceledRun: getRun(options.db, options.runId),
890
+ shouldContinue: false,
891
+ };
892
+ }
893
+ if (result.status === 'skipped') {
894
+ const skippedOutput = result.outputs ?? {};
895
+ options.state.completedSteps.set(options.step.id, {
896
+ output: skippedOutput,
897
+ status: 'skipped',
898
+ });
899
+ const skipped = handleSkippedStep(options.db, options.claim, {
900
+ output: skippedOutput,
901
+ runId: options.runId,
902
+ step: options.step,
903
+ stepRun,
904
+ stepRuns: options.stepRuns,
905
+ }, options.stepIndex + 1, options.template.steps[options.stepIndex + 1]?.id ?? null);
906
+ if (!skipped) {
907
+ const canceledAfterLoss = abortIfCanceled(options.db, options.runId, options.claim.workerId, 'canceled_during_step');
908
+ if (canceledAfterLoss) {
909
+ return {
910
+ canceledRun: canceledAfterLoss,
911
+ shouldContinue: false,
912
+ };
913
+ }
914
+ throw new Error(`Workflow run "${options.runId}" lost claim ownership before finalizing skipped step "${options.step.id}"`);
915
+ }
916
+ const runCanceledDuringStep = abortIfCanceled(options.db, options.runId, options.claim.workerId, 'canceled_during_step');
917
+ if (runCanceledDuringStep) {
918
+ markCanceledStep(options.db, options.runId, options.claim.workerId, options.step, stepRun.id);
919
+ return {
920
+ canceledRun: runCanceledDuringStep,
921
+ shouldContinue: false,
922
+ };
923
+ }
924
+ return {
925
+ canceledRun: null,
926
+ shouldContinue: true,
927
+ };
928
+ }
929
+ if (!getRunForClaim(options.db, options.runId, options.claim)) {
930
+ const canceledAfterLoss = abortIfCanceled(options.db, options.runId, options.claim.workerId, 'canceled_during_step');
931
+ if (canceledAfterLoss) {
932
+ markCanceledStep(options.db, options.runId, options.claim.workerId, options.step, stepRun.id);
933
+ return {
934
+ canceledRun: canceledAfterLoss,
935
+ shouldContinue: false,
936
+ };
937
+ }
938
+ throw new Error(`Workflow run "${options.runId}" lost claim ownership before persisting step "${options.step.id}"`);
939
+ }
940
+ const stagedArtifacts = stageArtifacts(options.artifactBaseDir, options.runId, options.step, stepAttempt, stepRun.id, result.artifacts ?? []);
941
+ let storedArtifacts = {};
942
+ const completed = withRunClaim(options.db, options.runId, options.claim, () => {
943
+ storedArtifacts = finalizeArtifacts(options.db, stagedArtifacts);
944
+ appendStepEventsInTransaction(options.db, options.runId, stepRun.id, options.claim.workerId, result.events);
945
+ updateStepRunStatus(options.db, stepRun.id, 'succeeded', {
946
+ finishedAt: new Date().toISOString(),
947
+ output: persistableValue(result.outputs),
948
+ });
949
+ appendEventInTransaction(options.db, options.runId, 'step_succeeded', {
950
+ artifact_names: Object.keys(storedArtifacts),
951
+ step_id: options.step.id,
952
+ }, {
953
+ actor: `worker:${options.claim.workerId}`,
954
+ stepRunId: stepRun.id,
955
+ });
956
+ updateRunCursor(options.db, options.runId, options.claim.workerId, options.claim.claimEpoch, options.stepIndex + 1, options.template.steps[options.stepIndex + 1]?.id ?? null);
957
+ return true;
958
+ });
959
+ if (!completed) {
960
+ cleanupStagedArtifacts(stagedArtifacts);
961
+ const canceledAfterLoss = abortIfCanceled(options.db, options.runId, options.claim.workerId, 'canceled_during_step');
962
+ if (canceledAfterLoss) {
963
+ return {
964
+ canceledRun: canceledAfterLoss,
965
+ shouldContinue: false,
966
+ };
967
+ }
968
+ throw new Error(`Workflow run "${options.runId}" lost claim ownership before completing step "${options.step.id}"`);
969
+ }
970
+ Object.assign(options.state.artifacts, storedArtifacts);
971
+ options.state.completedSteps.set(options.step.id, {
972
+ output: result.outputs,
973
+ status: 'succeeded',
974
+ });
975
+ const runCanceledDuringStep = abortIfCanceled(options.db, options.runId, options.claim.workerId, 'canceled_during_step');
976
+ if (runCanceledDuringStep) {
977
+ markCanceledStep(options.db, options.runId, options.claim.workerId, options.step, stepRun.id);
978
+ return {
979
+ canceledRun: runCanceledDuringStep,
980
+ shouldContinue: false,
981
+ };
982
+ }
983
+ options.stepRuns.push({
984
+ ...stepRun,
985
+ output_json: safeJsonStringify(persistableValue(result.outputs)),
986
+ status: 'succeeded',
987
+ });
988
+ return {
989
+ canceledRun: null,
990
+ shouldContinue: true,
991
+ };
992
+ }
993
+ }
994
+ export async function executeRun(runId, claim, options) {
995
+ const rootDir = path.resolve(options.rootDir ?? process.cwd());
996
+ const artifactBaseDir = path.resolve(options.artifactBaseDir ?? rootDir);
997
+ const run = getRun(options.db, runId);
998
+ if (!run) {
999
+ throw new Error(`Workflow run "${runId}" was not found`);
1000
+ }
1001
+ if (run.status !== 'running' ||
1002
+ run.claimed_by !== claim.workerId ||
1003
+ run.claim_epoch !== claim.claimEpoch) {
1004
+ throw new Error(`Workflow run "${runId}" is not claimed by worker "${claim.workerId}"`);
1005
+ }
1006
+ const workflow = getWorkflow(options.db, run.workflow_id, run.workflow_version);
1007
+ if (!workflow) {
1008
+ throw new Error(`Workflow "${run.workflow_id}"@${run.workflow_version} was not found`);
1009
+ }
1010
+ if (workflow.hash !== run.workflow_hash) {
1011
+ throw new Error(`Workflow run "${run.id}" cannot execute because its scheduled hash no longer matches the registered workflow`);
1012
+ }
1013
+ const templatePath = resolvePathWithinBase(rootDir, workflow.source_path, 'workflow source_path');
1014
+ assertWorkflowTemplateIdentity(templatePath, run.workflow_hash, `Workflow run "${run.id}" cannot execute because the registered workflow source changed after scheduling`);
1015
+ const { template } = loadAndValidateTemplateFromFile(templatePath);
1016
+ const inputs = parseJson(run.inputs_json, {});
1017
+ const state = resolveRunState(artifactBaseDir, template, run, options.db);
1018
+ const stepRuns = listStepRuns(options.db, run.id);
1019
+ mkdirSync(artifactsDir(artifactBaseDir, run.id), { recursive: true });
1020
+ for (let stepIndex = state.nextStepIndex; stepIndex < template.steps.length; stepIndex += 1) {
1021
+ const step = template.steps[stepIndex];
1022
+ if (!step) {
1023
+ break;
1024
+ }
1025
+ const stepResult = await executeStep({
1026
+ artifactBaseDir,
1027
+ claim,
1028
+ db: options.db,
1029
+ executors: options.executors,
1030
+ inputs,
1031
+ runId: run.id,
1032
+ state,
1033
+ step,
1034
+ stepIndex,
1035
+ stepRuns,
1036
+ template,
1037
+ });
1038
+ if (stepResult.canceledRun) {
1039
+ return stepResult.canceledRun;
1040
+ }
1041
+ if (!stepResult.shouldContinue) {
1042
+ break;
1043
+ }
1044
+ }
1045
+ const workflowResult = resolveWorkflowOutputs(template, inputs, state.artifacts);
1046
+ const completedRun = withRunClaim(options.db, run.id, claim, () => {
1047
+ appendEventInTransaction(options.db, run.id, 'workflow_succeeded', {
1048
+ result: workflowResult,
1049
+ }, {
1050
+ actor: `worker:${claim.workerId}`,
1051
+ });
1052
+ return markRunSucceeded(options.db, run.id, claim.workerId, claim.claimEpoch, {
1053
+ result: workflowResult,
1054
+ });
1055
+ });
1056
+ if (!completedRun) {
1057
+ throw new Error(`Workflow run "${run.id}" lost claim ownership before completion`);
1058
+ }
1059
+ return getRun(options.db, run.id);
1060
+ }
1061
+ //# sourceMappingURL=runner.js.map