@cascade-flow/backend-interface 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -0
- package/dist/analytics-helpers.d.ts +51 -0
- package/dist/analytics-helpers.d.ts.map +1 -0
- package/dist/analytics.d.ts +338 -0
- package/dist/analytics.d.ts.map +1 -0
- package/dist/error-fingerprint.d.ts +43 -0
- package/dist/error-fingerprint.d.ts.map +1 -0
- package/dist/events.d.ts +818 -0
- package/dist/events.d.ts.map +1 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +13780 -0
- package/dist/index.js.map +85 -0
- package/dist/interface.d.ts +745 -0
- package/dist/interface.d.ts.map +1 -0
- package/dist/projection.d.ts +93 -0
- package/dist/projection.d.ts.map +1 -0
- package/dist/schema-conversion.d.ts +40 -0
- package/dist/schema-conversion.d.ts.map +1 -0
- package/dist/schemas.d.ts +271 -0
- package/dist/schemas.d.ts.map +1 -0
- package/dist/serialization.d.ts +37 -0
- package/dist/serialization.d.ts.map +1 -0
- package/dist/time-utils.d.ts +28 -0
- package/dist/time-utils.d.ts.map +1 -0
- package/package.json +52 -0
|
@@ -0,0 +1,745 @@
|
|
|
1
|
+
import type { StepRecord, StepError, LogEntry, RunSubmission, RunState, WorkflowMetadata, WorkflowRegistration, StepDefinition } from "./schemas.ts";
|
|
2
|
+
import type { Event, StepEvent, WorkflowEvent } from "./events.ts";
|
|
3
|
+
import type { AnalyticsOptions, ErrorAnalysis, RetryAnalysis, SchedulingLatency, StepDuration, WorkflowDuration, WorkerStability, Throughput, QueueDepth, QueueDepthByWorkflow, SuccessRate, AnalyticsSummary } from "./analytics.ts";
|
|
4
|
+
/**
|
|
5
|
+
* Metadata for when a step starts
|
|
6
|
+
*/
|
|
7
|
+
export type StepStartMetadata = {
|
|
8
|
+
dependencies: string[];
|
|
9
|
+
timestamp: number;
|
|
10
|
+
attemptNumber: number;
|
|
11
|
+
};
|
|
12
|
+
/**
|
|
13
|
+
* Metadata for when a step completes
|
|
14
|
+
*/
|
|
15
|
+
export type StepCompleteMetadata = {
|
|
16
|
+
timestamp: number;
|
|
17
|
+
duration: number;
|
|
18
|
+
logs?: LogEntry[];
|
|
19
|
+
attemptNumber: number;
|
|
20
|
+
output: unknown;
|
|
21
|
+
};
|
|
22
|
+
/**
|
|
23
|
+
* Detailed information about a workflow including its steps
|
|
24
|
+
* @deprecated Use WorkflowRegistration from schemas.ts instead
|
|
25
|
+
*/
|
|
26
|
+
export type WorkflowDetails = WorkflowMetadata & {
|
|
27
|
+
steps: StepDefinition[];
|
|
28
|
+
stepCount: number;
|
|
29
|
+
};
|
|
30
|
+
/**
|
|
31
|
+
* Abstract backend interface for persisting workflow execution state
|
|
32
|
+
*
|
|
33
|
+
* Implementations can store data in various backends (filesystem, database, cloud storage, etc.)
|
|
34
|
+
*
|
|
35
|
+
* This interface supports event sourcing: all state changes are recorded as
|
|
36
|
+
* immutable events, and current state is computed by replaying events.
|
|
37
|
+
*/
|
|
38
|
+
export declare abstract class Backend {
|
|
39
|
+
/**
|
|
40
|
+
* Initialize the backend (e.g., run migrations, create schema)
|
|
41
|
+
* Must be called before using the backend for any operations.
|
|
42
|
+
*
|
|
43
|
+
* For database backends, this runs schema migrations.
|
|
44
|
+
* For filesystem backends, this is typically a no-op.
|
|
45
|
+
*/
|
|
46
|
+
abstract initialize(): Promise<void>;
|
|
47
|
+
/**
|
|
48
|
+
* Initialize storage for a new workflow run
|
|
49
|
+
* Creates necessary directories, tables, or resources
|
|
50
|
+
*
|
|
51
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
52
|
+
* @param runId - Unique identifier for the workflow run
|
|
53
|
+
*/
|
|
54
|
+
abstract initializeRun(workflowSlug: string, runId: string): Promise<void>;
|
|
55
|
+
/**
|
|
56
|
+
* Record that a step has been scheduled for execution
|
|
57
|
+
*
|
|
58
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
59
|
+
* @param runId - Unique identifier for the workflow run
|
|
60
|
+
* @param stepId - Unique step identifier (directory name)
|
|
61
|
+
* @param metadata - Scheduling metadata
|
|
62
|
+
*/
|
|
63
|
+
abstract saveStepScheduled(workflowSlug: string, runId: string, stepId: string, metadata: {
|
|
64
|
+
availableAt: number;
|
|
65
|
+
reason: "initial" | "retry" | "dependency-satisfied";
|
|
66
|
+
attemptNumber: number;
|
|
67
|
+
retryDelayMs?: number;
|
|
68
|
+
}): Promise<void>;
|
|
69
|
+
/**
|
|
70
|
+
* Record that a step has started executing
|
|
71
|
+
*
|
|
72
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
73
|
+
* @param runId - Unique identifier for the workflow run
|
|
74
|
+
* @param stepId - Unique step identifier (directory name)
|
|
75
|
+
* @param workerId - Worker executing the step
|
|
76
|
+
* @param metadata - Metadata about the step start (dependencies, timestamp)
|
|
77
|
+
*/
|
|
78
|
+
abstract saveStepStart(workflowSlug: string, runId: string, stepId: string, workerId: string, metadata: StepStartMetadata): Promise<void>;
|
|
79
|
+
/**
|
|
80
|
+
* Record that a step has completed successfully
|
|
81
|
+
*
|
|
82
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
83
|
+
* @param runId - Unique identifier for the workflow run
|
|
84
|
+
* @param stepId - Unique step identifier (directory name)
|
|
85
|
+
* @param output - The output produced by the step (will be serialized)
|
|
86
|
+
* @param metadata - Metadata about the completion (timestamp, duration)
|
|
87
|
+
* @param exportOutput - Whether this step's output should be included in final workflow output (defaults to false)
|
|
88
|
+
*/
|
|
89
|
+
abstract saveStepComplete(workflowSlug: string, runId: string, stepId: string, output: unknown, metadata: StepCompleteMetadata, exportOutput?: boolean): Promise<void>;
|
|
90
|
+
/**
|
|
91
|
+
* Record that a step has failed
|
|
92
|
+
*
|
|
93
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
94
|
+
* @param runId - Unique identifier for the workflow run
|
|
95
|
+
* @param stepId - Unique step identifier (directory name)
|
|
96
|
+
* @param error - The error that caused the failure
|
|
97
|
+
* @param metadata - Failure metadata
|
|
98
|
+
*/
|
|
99
|
+
abstract saveStepFailed(workflowSlug: string, runId: string, stepId: string, error: StepError, metadata: {
|
|
100
|
+
duration: number;
|
|
101
|
+
attemptNumber: number;
|
|
102
|
+
terminal: boolean;
|
|
103
|
+
nextRetryAt?: number;
|
|
104
|
+
failureReason: "exhausted-retries" | "worker-crash" | "timeout" | "cancelled" | "execution-error";
|
|
105
|
+
}): Promise<void>;
|
|
106
|
+
/**
|
|
107
|
+
* Atomically record a step failure and schedule its retry
|
|
108
|
+
*
|
|
109
|
+
* This combines saveStepFailed + saveStepScheduled in a single atomic operation
|
|
110
|
+
* to prevent the race condition where only the failure event gets persisted.
|
|
111
|
+
*
|
|
112
|
+
* Emits:
|
|
113
|
+
* 1. StepFailed (terminal: false)
|
|
114
|
+
* 2. StepRetrying (informational)
|
|
115
|
+
* 3. StepScheduled (for retry)
|
|
116
|
+
*
|
|
117
|
+
* All three events are written atomically - either all succeed or none do.
|
|
118
|
+
*
|
|
119
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
120
|
+
* @param runId - Unique identifier for the workflow run
|
|
121
|
+
* @param stepId - Unique step identifier (directory name)
|
|
122
|
+
* @param error - The error that caused the failure
|
|
123
|
+
* @param failureMetadata - Failure metadata (duration, attemptNumber, nextRetryAt)
|
|
124
|
+
* @param scheduleMetadata - Retry scheduling metadata (availableAt, retryDelayMs)
|
|
125
|
+
*/
|
|
126
|
+
abstract saveStepFailedAndScheduleRetry(workflowSlug: string, runId: string, stepId: string, error: StepError, failureMetadata: {
|
|
127
|
+
duration: number;
|
|
128
|
+
attemptNumber: number;
|
|
129
|
+
nextRetryAt: number;
|
|
130
|
+
failureReason: "execution-error" | "timeout";
|
|
131
|
+
}, scheduleMetadata: {
|
|
132
|
+
availableAt: number;
|
|
133
|
+
nextAttemptNumber: number;
|
|
134
|
+
retryDelayMs: number;
|
|
135
|
+
maxRetries: number;
|
|
136
|
+
}): Promise<void>;
|
|
137
|
+
/**
|
|
138
|
+
* Record that a step has been skipped
|
|
139
|
+
*
|
|
140
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
141
|
+
* @param runId - Unique identifier for the workflow run
|
|
142
|
+
* @param stepId - Unique step identifier (directory name)
|
|
143
|
+
* @param metadata - Skip metadata
|
|
144
|
+
*/
|
|
145
|
+
abstract saveStepSkipped(workflowSlug: string, runId: string, stepId: string, metadata: {
|
|
146
|
+
skipType: "primary" | "cascade";
|
|
147
|
+
reason: string;
|
|
148
|
+
metadata?: Record<string, any>;
|
|
149
|
+
duration: number;
|
|
150
|
+
attemptNumber: number;
|
|
151
|
+
cascadedFrom?: string;
|
|
152
|
+
}): Promise<void>;
|
|
153
|
+
/**
|
|
154
|
+
* Record a heartbeat for a running step
|
|
155
|
+
*
|
|
156
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
157
|
+
* @param runId - Unique identifier for the workflow run
|
|
158
|
+
* @param stepId - Unique step identifier (directory name)
|
|
159
|
+
* @param workerId - Worker executing the step
|
|
160
|
+
* @param attemptNumber - Current attempt number
|
|
161
|
+
*/
|
|
162
|
+
abstract saveStepHeartbeat(workflowSlug: string, runId: string, stepId: string, workerId: string, attemptNumber: number): Promise<void>;
|
|
163
|
+
/**
|
|
164
|
+
* Record that a step has been reclaimed from a stale worker
|
|
165
|
+
*
|
|
166
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
167
|
+
* @param runId - Unique identifier for the workflow run
|
|
168
|
+
* @param stepId - Unique step identifier (directory name)
|
|
169
|
+
* @param metadata - Reclamation metadata
|
|
170
|
+
*/
|
|
171
|
+
abstract saveStepReclaimed(workflowSlug: string, runId: string, stepId: string, metadata: {
|
|
172
|
+
originalWorkerId: string;
|
|
173
|
+
reclaimedBy: string;
|
|
174
|
+
lastHeartbeat: number;
|
|
175
|
+
staleThreshold: number;
|
|
176
|
+
staleDuration: number;
|
|
177
|
+
attemptNumber: number;
|
|
178
|
+
}): Promise<void>;
|
|
179
|
+
/**
|
|
180
|
+
* Record that the workflow has completed successfully
|
|
181
|
+
*
|
|
182
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
183
|
+
* @param runId - Unique identifier for the workflow run
|
|
184
|
+
* @param output - The final workflow output (only exported steps)
|
|
185
|
+
* @param metadata - Metadata about the workflow completion (timestamp, duration, totalSteps)
|
|
186
|
+
*/
|
|
187
|
+
abstract saveWorkflowComplete(workflowSlug: string, runId: string, output: unknown, metadata: {
|
|
188
|
+
workflowAttemptNumber: number;
|
|
189
|
+
timestamp: number;
|
|
190
|
+
duration: number;
|
|
191
|
+
totalSteps: number;
|
|
192
|
+
}): Promise<void>;
|
|
193
|
+
/**
|
|
194
|
+
* Save logs for a step
|
|
195
|
+
*
|
|
196
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
197
|
+
* @param runId - Unique identifier for the workflow run
|
|
198
|
+
* @param stepId - Unique step identifier (directory name)
|
|
199
|
+
* @param logs - Array of log entries
|
|
200
|
+
* @returns Path to the logs file
|
|
201
|
+
*/
|
|
202
|
+
abstract saveStepLogs(workflowSlug: string, runId: string, stepId: string, logs: LogEntry[]): Promise<void>;
|
|
203
|
+
/**
|
|
204
|
+
* Load logs for a step
|
|
205
|
+
*
|
|
206
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
207
|
+
* @param runId - Unique identifier for the workflow run
|
|
208
|
+
* @param stepId - Unique step identifier (directory name)
|
|
209
|
+
* @returns Array of log entries or null if no logs exist
|
|
210
|
+
*/
|
|
211
|
+
abstract loadStepLogs(workflowSlug: string, runId: string, stepId: string, attemptNumber?: number): Promise<LogEntry[] | null>;
|
|
212
|
+
/**
|
|
213
|
+
* Load all step records for a given run
|
|
214
|
+
* Used for resuming interrupted workflows
|
|
215
|
+
*
|
|
216
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
217
|
+
* @param runId - Unique identifier for the workflow run
|
|
218
|
+
* @returns Array of validated step records
|
|
219
|
+
*/
|
|
220
|
+
abstract loadRun(workflowSlug: string, runId: string): Promise<StepRecord[]>;
|
|
221
|
+
/**
|
|
222
|
+
* Check if a run exists in the backend
|
|
223
|
+
*
|
|
224
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
225
|
+
* @param runId - Unique identifier for the workflow run
|
|
226
|
+
* @returns true if the run exists, false otherwise
|
|
227
|
+
*/
|
|
228
|
+
abstract runExists(workflowSlug: string, runId: string): Promise<boolean>;
|
|
229
|
+
/**
|
|
230
|
+
* Append an event to the event log
|
|
231
|
+
*
|
|
232
|
+
* Events are immutable and append-only. This is the core of event sourcing.
|
|
233
|
+
* Each event represents a state transition or action that occurred during execution.
|
|
234
|
+
* Events are routed to the appropriate directory based on their category (workflow vs step).
|
|
235
|
+
*
|
|
236
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
237
|
+
* @param runId - Unique identifier for the workflow run
|
|
238
|
+
* @param event - The event to append (either step or workflow event)
|
|
239
|
+
*/
|
|
240
|
+
abstract appendEvent(workflowSlug: string, runId: string, event: Event): Promise<void>;
|
|
241
|
+
/**
|
|
242
|
+
* Load events from the event log
|
|
243
|
+
*
|
|
244
|
+
* Returns events in chronological order (sorted by timestamp).
|
|
245
|
+
* Can filter by category and/or stepId.
|
|
246
|
+
*
|
|
247
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
248
|
+
* @param runId - Unique identifier for the workflow run
|
|
249
|
+
* @param options - Optional filters for category and stepId (unique identifier)
|
|
250
|
+
* @returns Array of events in chronological order
|
|
251
|
+
*/
|
|
252
|
+
abstract loadEvents(workflowSlug: string, runId: string, options: {
|
|
253
|
+
category: "step";
|
|
254
|
+
stepId?: string;
|
|
255
|
+
}): Promise<StepEvent[]>;
|
|
256
|
+
abstract loadEvents(workflowSlug: string, runId: string, options: {
|
|
257
|
+
category: "workflow";
|
|
258
|
+
}): Promise<WorkflowEvent[]>;
|
|
259
|
+
abstract loadEvents(workflowSlug: string, runId: string, options?: {
|
|
260
|
+
category?: "workflow" | "step";
|
|
261
|
+
stepId?: string;
|
|
262
|
+
}): Promise<Event[]>;
|
|
263
|
+
/**
|
|
264
|
+
* Get the file path for a step's output file
|
|
265
|
+
*
|
|
266
|
+
* Used for file-based step output storage. Returns the path where
|
|
267
|
+
* the subprocess executor should write the step's output.
|
|
268
|
+
*
|
|
269
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
270
|
+
* @param runId - Unique identifier for the workflow run
|
|
271
|
+
* @param stepId - Unique step identifier (directory name)
|
|
272
|
+
* @param attemptNumber - Attempt number (for retries)
|
|
273
|
+
* @returns Absolute path to the output file
|
|
274
|
+
*/
|
|
275
|
+
abstract getStepOutputPath(workflowSlug: string, runId: string, stepId: string, attemptNumber: number): string;
|
|
276
|
+
/**
|
|
277
|
+
* Record that a workflow has started
|
|
278
|
+
*
|
|
279
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
280
|
+
* @param runId - Unique identifier for the workflow run
|
|
281
|
+
* @param metadata - Metadata about the workflow start
|
|
282
|
+
*/
|
|
283
|
+
abstract saveWorkflowStart(workflowSlug: string, runId: string, metadata: {
|
|
284
|
+
workflowAttemptNumber: number;
|
|
285
|
+
hasInputSchema: boolean;
|
|
286
|
+
hasInput: boolean;
|
|
287
|
+
}): Promise<void>;
|
|
288
|
+
/**
|
|
289
|
+
* Record workflow input validation result (success or failure)
|
|
290
|
+
*
|
|
291
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
292
|
+
* @param runId - Unique identifier for the workflow run
|
|
293
|
+
* @param result - Validation result with schema info, success flag, and optional error
|
|
294
|
+
*/
|
|
295
|
+
abstract saveWorkflowInputValidation(workflowSlug: string, runId: string, result: {
|
|
296
|
+
workflowAttemptNumber: number;
|
|
297
|
+
hasSchema: boolean;
|
|
298
|
+
success: boolean;
|
|
299
|
+
error?: StepError;
|
|
300
|
+
validationErrors?: Array<{
|
|
301
|
+
path: string;
|
|
302
|
+
message: string;
|
|
303
|
+
}>;
|
|
304
|
+
}): Promise<void>;
|
|
305
|
+
/**
|
|
306
|
+
* Record that a workflow has failed
|
|
307
|
+
*
|
|
308
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
309
|
+
* @param runId - Unique identifier for the workflow run
|
|
310
|
+
* @param error - The error that caused the failure
|
|
311
|
+
* @param metadata - Metadata about the failure
|
|
312
|
+
* @param failureReason - Why the workflow failed (step-failed, worker-crash, etc.)
|
|
313
|
+
*/
|
|
314
|
+
abstract saveWorkflowFailed(workflowSlug: string, runId: string, error: StepError, metadata: {
|
|
315
|
+
workflowAttemptNumber: number;
|
|
316
|
+
duration: number;
|
|
317
|
+
completedSteps: number;
|
|
318
|
+
failedStep?: string;
|
|
319
|
+
}, failureReason: "step-failed" | "worker-crash" | "timeout" | "cancelled"): Promise<void>;
|
|
320
|
+
/**
|
|
321
|
+
* Record that a workflow has been resumed from a previous run
|
|
322
|
+
*
|
|
323
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
324
|
+
* @param runId - Unique identifier for the workflow run
|
|
325
|
+
* @param metadata - Metadata about the resume
|
|
326
|
+
*/
|
|
327
|
+
abstract saveWorkflowResumed(workflowSlug: string, runId: string, metadata: {
|
|
328
|
+
originalRunId: string;
|
|
329
|
+
resumedSteps: number;
|
|
330
|
+
pendingSteps: number;
|
|
331
|
+
}): Promise<void>;
|
|
332
|
+
/**
|
|
333
|
+
* Record that a workflow has been cancelled
|
|
334
|
+
*
|
|
335
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
336
|
+
* @param runId - Unique identifier for the workflow run
|
|
337
|
+
* @param metadata - Metadata about the cancellation
|
|
338
|
+
*/
|
|
339
|
+
abstract saveWorkflowCancelled(workflowSlug: string, runId: string, metadata: {
|
|
340
|
+
workflowAttemptNumber: number;
|
|
341
|
+
reason?: string;
|
|
342
|
+
duration: number;
|
|
343
|
+
completedSteps: number;
|
|
344
|
+
}): Promise<void>;
|
|
345
|
+
/**
|
|
346
|
+
* Record that a workflow retry has been initiated
|
|
347
|
+
*
|
|
348
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
349
|
+
* @param runId - Unique identifier for the workflow run
|
|
350
|
+
* @param metadata - Metadata about the retry
|
|
351
|
+
*/
|
|
352
|
+
abstract saveWorkflowRetryStarted(workflowSlug: string, runId: string, metadata: {
|
|
353
|
+
workflowAttemptNumber: number;
|
|
354
|
+
previousAttemptNumber: number;
|
|
355
|
+
retriedSteps: string[];
|
|
356
|
+
reason?: string;
|
|
357
|
+
}): Promise<void>;
|
|
358
|
+
/**
|
|
359
|
+
* Get all failed steps in a workflow run
|
|
360
|
+
*
|
|
361
|
+
* @param workflowSlug - Workflow identifier (directory name)
|
|
362
|
+
* @param runId - Unique identifier for the workflow run
|
|
363
|
+
* @returns Array of failed step information
|
|
364
|
+
*/
|
|
365
|
+
abstract getFailedSteps(workflowSlug: string, runId: string): Promise<Array<{
|
|
366
|
+
stepId: string;
|
|
367
|
+
error: StepError;
|
|
368
|
+
attemptNumber: number;
|
|
369
|
+
}>>;
|
|
370
|
+
/**
|
|
371
|
+
* Submit a run to the queue
|
|
372
|
+
*
|
|
373
|
+
* Handles idempotency: if a run with the same idempotencyKey already exists,
|
|
374
|
+
* returns the existing runId instead of creating a new one.
|
|
375
|
+
*
|
|
376
|
+
* @param submission - Run submission parameters
|
|
377
|
+
* @returns Object with runId and isNew flag (false if idempotency key matched)
|
|
378
|
+
*/
|
|
379
|
+
abstract submitRun(submission: RunSubmission): Promise<{
|
|
380
|
+
runId: string;
|
|
381
|
+
isNew: boolean;
|
|
382
|
+
}>;
|
|
383
|
+
/**
|
|
384
|
+
* List all runs matching filters
|
|
385
|
+
*
|
|
386
|
+
* EVENTS-AS-QUEUE IMPLEMENTATION:
|
|
387
|
+
* - Scan workflow directories under baseDir
|
|
388
|
+
* - For each run, load workflow events and project to RunState
|
|
389
|
+
* - Filter by status, workflow, tags
|
|
390
|
+
* - Sort by createdAt (descending)
|
|
391
|
+
* - Apply limit
|
|
392
|
+
*
|
|
393
|
+
* @param options - Optional filters and pagination
|
|
394
|
+
* @returns Array of run states sorted by createdAt (newest first)
|
|
395
|
+
*/
|
|
396
|
+
abstract listRuns(options?: {
|
|
397
|
+
workflowSlug?: string;
|
|
398
|
+
status?: RunState["status"][];
|
|
399
|
+
tags?: string[];
|
|
400
|
+
limit?: number;
|
|
401
|
+
}): Promise<RunState[]>;
|
|
402
|
+
/**
|
|
403
|
+
* Record that a run has been submitted to the queue
|
|
404
|
+
*
|
|
405
|
+
* Emits RunSubmitted event.
|
|
406
|
+
*
|
|
407
|
+
* @param workflowSlug - Workflow identifier
|
|
408
|
+
* @param runId - Unique identifier for the run
|
|
409
|
+
* @param metadata - Submission metadata
|
|
410
|
+
*/
|
|
411
|
+
abstract saveRunSubmitted(workflowSlug: string, runId: string, metadata: {
|
|
412
|
+
availableAt: number;
|
|
413
|
+
priority: number;
|
|
414
|
+
input?: string;
|
|
415
|
+
hasInputSchema: boolean;
|
|
416
|
+
timeout?: number;
|
|
417
|
+
idempotencyKey?: string;
|
|
418
|
+
metadata?: Record<string, unknown>;
|
|
419
|
+
tags?: string[];
|
|
420
|
+
}): Promise<void>;
|
|
421
|
+
/**
|
|
422
|
+
* Cancel a run
|
|
423
|
+
*
|
|
424
|
+
* Can cancel runs in pending, claimed, or running status.
|
|
425
|
+
* Emits WorkflowCancelled event.
|
|
426
|
+
*
|
|
427
|
+
* @param runId - The run to cancel
|
|
428
|
+
* @param reason - Optional cancellation reason
|
|
429
|
+
*/
|
|
430
|
+
abstract cancelRun(runId: string, reason?: string): Promise<void>;
|
|
431
|
+
/**
|
|
432
|
+
* Get the current state of a run
|
|
433
|
+
*
|
|
434
|
+
* @param runId - The run to query
|
|
435
|
+
* @returns The run state, or null if not found
|
|
436
|
+
*/
|
|
437
|
+
abstract getRun(runId: string): Promise<RunState | null>;
|
|
438
|
+
/**
|
|
439
|
+
* List all active workflows (workflows with incomplete runs)
|
|
440
|
+
*
|
|
441
|
+
* Used by the scheduler loop to find workflows that need scheduling attention.
|
|
442
|
+
* A workflow is "active" if it has at least one run that is not in a terminal state
|
|
443
|
+
* (completed, failed, or cancelled).
|
|
444
|
+
*
|
|
445
|
+
* IMPLEMENTATION NOTE:
|
|
446
|
+
* - Scan workflow directories
|
|
447
|
+
* - For each workflow, check if it has any runs in non-terminal states
|
|
448
|
+
* - Return list of workflow slugs
|
|
449
|
+
*
|
|
450
|
+
* @returns Array of workflow slugs with active runs
|
|
451
|
+
*/
|
|
452
|
+
abstract listActiveWorkflows(): Promise<string[]>;
|
|
453
|
+
/**
|
|
454
|
+
* List scheduled steps available for claiming
|
|
455
|
+
*
|
|
456
|
+
* STEP-LEVEL DISTRIBUTION:
|
|
457
|
+
* Scans all workflows for steps in "scheduled" status where availableAt <= now.
|
|
458
|
+
* Returns steps sorted by availableAt (earliest first).
|
|
459
|
+
*
|
|
460
|
+
* @param options - Optional filters
|
|
461
|
+
* @returns Array of step identifiers ready for execution
|
|
462
|
+
*/
|
|
463
|
+
abstract listScheduledSteps(options?: {
|
|
464
|
+
availableBefore?: number;
|
|
465
|
+
workflowSlug?: string;
|
|
466
|
+
limit?: number;
|
|
467
|
+
}): Promise<Array<{
|
|
468
|
+
workflowSlug: string;
|
|
469
|
+
runId: string;
|
|
470
|
+
stepId: string;
|
|
471
|
+
}>>;
|
|
472
|
+
/**
|
|
473
|
+
* Check if a step is claimable for execution
|
|
474
|
+
*
|
|
475
|
+
* STEP-LEVEL DISTRIBUTION:
|
|
476
|
+
* - Load step events and project to StepState
|
|
477
|
+
* - Check if status === "scheduled" && availableAt <= now
|
|
478
|
+
* - Returns true if step can be claimed, false otherwise
|
|
479
|
+
*
|
|
480
|
+
* This is used by workers before emitting StepStarted to ensure atomicity.
|
|
481
|
+
*
|
|
482
|
+
* @param workflowSlug - Workflow identifier
|
|
483
|
+
* @param runId - Run identifier
|
|
484
|
+
* @param stepId - Unique step identifier (directory name)
|
|
485
|
+
* @returns true if step is claimable, false otherwise
|
|
486
|
+
*/
|
|
487
|
+
abstract isStepClaimable(workflowSlug: string, runId: string, stepId: string): Promise<boolean>;
|
|
488
|
+
/**
|
|
489
|
+
* Atomically claim a scheduled step for execution.
|
|
490
|
+
*
|
|
491
|
+
* Implementations must ensure only one worker can transition a step
|
|
492
|
+
* from scheduled → running for a given attempt. If the step is no longer
|
|
493
|
+
* claimable (e.g. already running, completed, or not yet available),
|
|
494
|
+
* the method returns null.
|
|
495
|
+
*
|
|
496
|
+
* @param workflowSlug - Workflow identifier
|
|
497
|
+
* @param runId - Run identifier
|
|
498
|
+
* @param stepId - Unique step identifier (directory name)
|
|
499
|
+
* @param workerId - Worker attempting the claim
|
|
500
|
+
* @param metadata - Step start metadata (dependencies, timestamp)
|
|
501
|
+
* @returns Attempt number when claimed, or null if step could not be claimed
|
|
502
|
+
*/
|
|
503
|
+
abstract claimScheduledStep(workflowSlug: string, runId: string, stepId: string, workerId: string, metadata: StepStartMetadata): Promise<{
|
|
504
|
+
attemptNumber: number;
|
|
505
|
+
} | null>;
|
|
506
|
+
/**
|
|
507
|
+
* Find steps with stale heartbeats and reclaim them
|
|
508
|
+
*
|
|
509
|
+
* STEP-LEVEL DISTRIBUTION:
|
|
510
|
+
* - Scan all workflows for steps in "running" status
|
|
511
|
+
* - Check lastHeartbeat timestamp
|
|
512
|
+
* - If stale (no heartbeat > threshold), emit StepReclaimed and re-schedule
|
|
513
|
+
*
|
|
514
|
+
* @param staleThreshold - Milliseconds since last heartbeat before considering stale
|
|
515
|
+
* @param reclaimedBy - Worker ID performing reclamation
|
|
516
|
+
* @returns Array of reclaimed step identifiers
|
|
517
|
+
*/
|
|
518
|
+
abstract reclaimStaleSteps(staleThreshold: number, reclaimedBy: string): Promise<Array<{
|
|
519
|
+
workflowSlug: string;
|
|
520
|
+
runId: string;
|
|
521
|
+
stepId: string;
|
|
522
|
+
}>>;
|
|
523
|
+
/**
|
|
524
|
+
* Register a workflow with the backend
|
|
525
|
+
*
|
|
526
|
+
* Stores workflow metadata, input schema (as JSON Schema), and step definitions.
|
|
527
|
+
* This is called during worker startup after discovering workflows from the filesystem.
|
|
528
|
+
*
|
|
529
|
+
* @param registration - Complete workflow registration data
|
|
530
|
+
*/
|
|
531
|
+
abstract registerWorkflow(registration: WorkflowRegistration): Promise<void>;
|
|
532
|
+
/**
|
|
533
|
+
* Get metadata for a specific workflow
|
|
534
|
+
*
|
|
535
|
+
* Returns workflow metadata including name, location, and input schema.
|
|
536
|
+
*
|
|
537
|
+
* @param slug - Workflow identifier
|
|
538
|
+
* @returns Workflow metadata, or null if not registered
|
|
539
|
+
*/
|
|
540
|
+
abstract getWorkflowMetadata(slug: string): Promise<WorkflowMetadata | null>;
|
|
541
|
+
/**
|
|
542
|
+
* List all registered workflows
|
|
543
|
+
*
|
|
544
|
+
* Returns metadata for all workflows that have been registered with the backend.
|
|
545
|
+
*
|
|
546
|
+
* @returns Array of workflow metadata
|
|
547
|
+
*/
|
|
548
|
+
abstract listWorkflowMetadata(): Promise<WorkflowMetadata[]>;
|
|
549
|
+
/**
|
|
550
|
+
* Get step definitions for a workflow
|
|
551
|
+
*
|
|
552
|
+
* Returns the step structure (names, dependencies, export flags) for a workflow.
|
|
553
|
+
*
|
|
554
|
+
* @param slug - Workflow identifier
|
|
555
|
+
* @returns Array of step definitions, or empty array if workflow not found
|
|
556
|
+
*/
|
|
557
|
+
abstract getWorkflowSteps(slug: string): Promise<StepDefinition[]>;
|
|
558
|
+
/**
|
|
559
|
+
* List all run IDs for a workflow
|
|
560
|
+
*
|
|
561
|
+
* Returns all run IDs that exist for a given workflow.
|
|
562
|
+
* Used by worker to discover existing runs.
|
|
563
|
+
*
|
|
564
|
+
* @param workflowSlug - Workflow identifier
|
|
565
|
+
* @returns Array of run IDs
|
|
566
|
+
*/
|
|
567
|
+
abstract listRunIds(workflowSlug: string): Promise<string[]>;
|
|
568
|
+
/**
|
|
569
|
+
* Close the backend and clean up resources
|
|
570
|
+
*
|
|
571
|
+
* This method should be called when the backend is no longer needed.
|
|
572
|
+
* For database backends, this closes connection pools.
|
|
573
|
+
* For filesystem backends, this is typically a no-op.
|
|
574
|
+
*
|
|
575
|
+
* After calling close(), the backend should not be used again.
|
|
576
|
+
*/
|
|
577
|
+
abstract close(): Promise<void>;
|
|
578
|
+
/**
|
|
579
|
+
* Get error analysis for workflows and steps
|
|
580
|
+
*
|
|
581
|
+
* Analyzes failure patterns, error types, and common error messages.
|
|
582
|
+
*
|
|
583
|
+
* @param options - Optional filters for time range, workflow, or step
|
|
584
|
+
* @returns Error analysis data
|
|
585
|
+
*/
|
|
586
|
+
abstract getErrorAnalysis(options?: AnalyticsOptions): Promise<ErrorAnalysis>;
|
|
587
|
+
/**
|
|
588
|
+
* Get paginated list of errors grouped by fingerprint
|
|
589
|
+
*
|
|
590
|
+
* Returns errors grouped by composable fingerprints with aggregated statistics.
|
|
591
|
+
*
|
|
592
|
+
* @param options - Filtering and pagination options
|
|
593
|
+
* @returns Paginated list of error groups and total count
|
|
594
|
+
*/
|
|
595
|
+
abstract getErrorsList(options?: {
|
|
596
|
+
timeRange?: {
|
|
597
|
+
start: number;
|
|
598
|
+
end: number;
|
|
599
|
+
};
|
|
600
|
+
workflowSlug?: string;
|
|
601
|
+
groupingStrategy?: 'exact' | 'normalized' | 'portable';
|
|
602
|
+
limit?: number;
|
|
603
|
+
offset?: number;
|
|
604
|
+
}): Promise<{
|
|
605
|
+
errors: Array<{
|
|
606
|
+
fingerprint: string;
|
|
607
|
+
errorMessage: string;
|
|
608
|
+
errorName: string;
|
|
609
|
+
sampleStack: string;
|
|
610
|
+
count: number;
|
|
611
|
+
affectedRuns: number;
|
|
612
|
+
firstSeen: number;
|
|
613
|
+
lastSeen: number;
|
|
614
|
+
}>;
|
|
615
|
+
total: number;
|
|
616
|
+
}>;
|
|
617
|
+
/**
|
|
618
|
+
* Get detailed information about a specific error by fingerprint
|
|
619
|
+
*
|
|
620
|
+
* Returns all occurrences of an error matching the given fingerprint.
|
|
621
|
+
*
|
|
622
|
+
* @param fingerprint - Composite fingerprint (nameHash:messageHash:stackHash)
|
|
623
|
+
* @param groupingStrategy - Which stack hash variant to use
|
|
624
|
+
* @param options - Filtering and pagination options
|
|
625
|
+
* @returns Error details with occurrences
|
|
626
|
+
*/
|
|
627
|
+
abstract getErrorDetail(fingerprint: string, groupingStrategy: 'exact' | 'normalized' | 'portable', options?: {
|
|
628
|
+
timeRange?: {
|
|
629
|
+
start: number;
|
|
630
|
+
end: number;
|
|
631
|
+
};
|
|
632
|
+
limit?: number;
|
|
633
|
+
offset?: number;
|
|
634
|
+
}): Promise<{
|
|
635
|
+
fingerprint: string;
|
|
636
|
+
errorMessage: string;
|
|
637
|
+
errorName: string;
|
|
638
|
+
sampleStack: string;
|
|
639
|
+
totalCount: number;
|
|
640
|
+
affectedRuns: number;
|
|
641
|
+
firstSeen: number;
|
|
642
|
+
lastSeen: number;
|
|
643
|
+
occurrences: Array<{
|
|
644
|
+
workflowSlug: string;
|
|
645
|
+
runId: string;
|
|
646
|
+
stepId: string;
|
|
647
|
+
attemptNumber: number;
|
|
648
|
+
timestampUs: number;
|
|
649
|
+
}>;
|
|
650
|
+
total: number;
|
|
651
|
+
}>;
|
|
652
|
+
/**
|
|
653
|
+
* Get retry analysis metrics
|
|
654
|
+
*
|
|
655
|
+
* Analyzes retry patterns, success rates after retries, and retry effectiveness.
|
|
656
|
+
*
|
|
657
|
+
* @param options - Optional filters for time range, workflow, or step
|
|
658
|
+
* @returns Retry analysis data
|
|
659
|
+
*/
|
|
660
|
+
abstract getRetryAnalysis(options?: AnalyticsOptions): Promise<RetryAnalysis>;
|
|
661
|
+
/**
|
|
662
|
+
* Get scheduling latency metrics
|
|
663
|
+
*
|
|
664
|
+
* Measures time between step being scheduled and actually starting execution.
|
|
665
|
+
* High latency indicates worker starvation or queue congestion.
|
|
666
|
+
*
|
|
667
|
+
* @param options - Optional filters for time range, workflow, or step
|
|
668
|
+
* @returns Scheduling latency statistics
|
|
669
|
+
*/
|
|
670
|
+
abstract getSchedulingLatency(options?: AnalyticsOptions): Promise<SchedulingLatency>;
|
|
671
|
+
/**
|
|
672
|
+
* Get step duration metrics
|
|
673
|
+
*
|
|
674
|
+
* Analyzes how long steps take to execute.
|
|
675
|
+
*
|
|
676
|
+
* @param options - Optional filters for time range, workflow, or step
|
|
677
|
+
* @returns Step duration statistics
|
|
678
|
+
*/
|
|
679
|
+
abstract getStepDuration(options?: AnalyticsOptions): Promise<StepDuration>;
|
|
680
|
+
/**
|
|
681
|
+
* Get workflow duration metrics
|
|
682
|
+
*
|
|
683
|
+
* Analyzes end-to-end workflow execution time (from submission to completion).
|
|
684
|
+
*
|
|
685
|
+
* @param options - Optional filters for time range or workflow
|
|
686
|
+
* @returns Workflow duration statistics
|
|
687
|
+
*/
|
|
688
|
+
abstract getWorkflowDuration(options?: AnalyticsOptions): Promise<WorkflowDuration>;
|
|
689
|
+
/**
|
|
690
|
+
* Get worker stability metrics
|
|
691
|
+
*
|
|
692
|
+
* Analyzes worker crashes, reclamations, and stale heartbeats.
|
|
693
|
+
*
|
|
694
|
+
* @param options - Optional filters for time range
|
|
695
|
+
* @returns Worker stability data
|
|
696
|
+
*/
|
|
697
|
+
abstract getWorkerStability(options?: AnalyticsOptions): Promise<WorkerStability>;
|
|
698
|
+
/**
|
|
699
|
+
* Get throughput metrics
|
|
700
|
+
*
|
|
701
|
+
* Analyzes how many runs/steps are being completed per unit time.
|
|
702
|
+
*
|
|
703
|
+
* @param options - Optional filters for time range or workflow
|
|
704
|
+
* @returns Throughput data
|
|
705
|
+
*/
|
|
706
|
+
abstract getThroughput(options?: AnalyticsOptions): Promise<Throughput>;
|
|
707
|
+
/**
|
|
708
|
+
* Get current queue depth
|
|
709
|
+
*
|
|
710
|
+
* Real-time snapshot of pending/running runs and steps.
|
|
711
|
+
*
|
|
712
|
+
* @param options - Optional filter for workflow
|
|
713
|
+
* @returns Queue depth data
|
|
714
|
+
*/
|
|
715
|
+
abstract getQueueDepth(options?: Pick<AnalyticsOptions, 'workflowSlug'>): Promise<QueueDepth>;
|
|
716
|
+
/**
|
|
717
|
+
* Get queue depth broken down by workflow
|
|
718
|
+
*
|
|
719
|
+
* Real-time snapshot showing per-workflow queue statistics.
|
|
720
|
+
* Useful for identifying which workflows have pending work.
|
|
721
|
+
*
|
|
722
|
+
* @returns Array of per-workflow queue depth data
|
|
723
|
+
*/
|
|
724
|
+
abstract getQueueDepthByWorkflow(): Promise<QueueDepthByWorkflow>;
|
|
725
|
+
/**
|
|
726
|
+
* Get success/failure rate metrics
|
|
727
|
+
*
|
|
728
|
+
* Analyzes overall health of workflows and steps.
|
|
729
|
+
*
|
|
730
|
+
* @param options - Optional filters for time range, workflow, or step
|
|
731
|
+
* @returns Success rate data
|
|
732
|
+
*/
|
|
733
|
+
abstract getSuccessRate(options?: AnalyticsOptions): Promise<SuccessRate>;
|
|
734
|
+
/**
|
|
735
|
+
* Get comprehensive analytics summary
|
|
736
|
+
*
|
|
737
|
+
* Combines all analytics metrics into a single response.
|
|
738
|
+
* Useful for dashboard views.
|
|
739
|
+
*
|
|
740
|
+
* @param options - Optional filters for time range or workflow
|
|
741
|
+
* @returns Complete analytics summary
|
|
742
|
+
*/
|
|
743
|
+
abstract getAnalyticsSummary(options?: AnalyticsOptions): Promise<AnalyticsSummary>;
|
|
744
|
+
}
|
|
745
|
+
//# sourceMappingURL=interface.d.ts.map
|