@cascade-flow/backend-interface 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,745 @@
1
+ import type { StepRecord, StepError, LogEntry, RunSubmission, RunState, WorkflowMetadata, WorkflowRegistration, StepDefinition } from "./schemas.ts";
2
+ import type { Event, StepEvent, WorkflowEvent } from "./events.ts";
3
+ import type { AnalyticsOptions, ErrorAnalysis, RetryAnalysis, SchedulingLatency, StepDuration, WorkflowDuration, WorkerStability, Throughput, QueueDepth, QueueDepthByWorkflow, SuccessRate, AnalyticsSummary } from "./analytics.ts";
4
+ /**
5
+ * Metadata for when a step starts
6
+ */
7
+ export type StepStartMetadata = {
8
+ dependencies: string[];
9
+ timestamp: number;
10
+ attemptNumber: number;
11
+ };
12
+ /**
13
+ * Metadata for when a step completes
14
+ */
15
+ export type StepCompleteMetadata = {
16
+ timestamp: number;
17
+ duration: number;
18
+ logs?: LogEntry[];
19
+ attemptNumber: number;
20
+ output: unknown;
21
+ };
22
+ /**
23
+ * Detailed information about a workflow including its steps
24
+ * @deprecated Use WorkflowRegistration from schemas.ts instead
25
+ */
26
+ export type WorkflowDetails = WorkflowMetadata & {
27
+ steps: StepDefinition[];
28
+ stepCount: number;
29
+ };
30
+ /**
31
+ * Abstract backend interface for persisting workflow execution state
32
+ *
33
+ * Implementations can store data in various backends (filesystem, database, cloud storage, etc.)
34
+ *
35
+ * This interface supports event sourcing: all state changes are recorded as
36
+ * immutable events, and current state is computed by replaying events.
37
+ */
38
+ export declare abstract class Backend {
39
+ /**
40
+ * Initialize the backend (e.g., run migrations, create schema)
41
+ * Must be called before using the backend for any operations.
42
+ *
43
+ * For database backends, this runs schema migrations.
44
+ * For filesystem backends, this is typically a no-op.
45
+ */
46
+ abstract initialize(): Promise<void>;
47
+ /**
48
+ * Initialize storage for a new workflow run
49
+ * Creates necessary directories, tables, or resources
50
+ *
51
+ * @param workflowSlug - Workflow identifier (directory name)
52
+ * @param runId - Unique identifier for the workflow run
53
+ */
54
+ abstract initializeRun(workflowSlug: string, runId: string): Promise<void>;
55
+ /**
56
+ * Record that a step has been scheduled for execution
57
+ *
58
+ * @param workflowSlug - Workflow identifier (directory name)
59
+ * @param runId - Unique identifier for the workflow run
60
+ * @param stepId - Unique step identifier (directory name)
61
+ * @param metadata - Scheduling metadata
62
+ */
63
+ abstract saveStepScheduled(workflowSlug: string, runId: string, stepId: string, metadata: {
64
+ availableAt: number;
65
+ reason: "initial" | "retry" | "dependency-satisfied";
66
+ attemptNumber: number;
67
+ retryDelayMs?: number;
68
+ }): Promise<void>;
69
+ /**
70
+ * Record that a step has started executing
71
+ *
72
+ * @param workflowSlug - Workflow identifier (directory name)
73
+ * @param runId - Unique identifier for the workflow run
74
+ * @param stepId - Unique step identifier (directory name)
75
+ * @param workerId - Worker executing the step
76
+ * @param metadata - Metadata about the step start (dependencies, timestamp)
77
+ */
78
+ abstract saveStepStart(workflowSlug: string, runId: string, stepId: string, workerId: string, metadata: StepStartMetadata): Promise<void>;
79
+ /**
80
+ * Record that a step has completed successfully
81
+ *
82
+ * @param workflowSlug - Workflow identifier (directory name)
83
+ * @param runId - Unique identifier for the workflow run
84
+ * @param stepId - Unique step identifier (directory name)
85
+ * @param output - The output produced by the step (will be serialized)
86
+ * @param metadata - Metadata about the completion (timestamp, duration)
87
+ * @param exportOutput - Whether this step's output should be included in final workflow output (defaults to false)
88
+ */
89
+ abstract saveStepComplete(workflowSlug: string, runId: string, stepId: string, output: unknown, metadata: StepCompleteMetadata, exportOutput?: boolean): Promise<void>;
90
+ /**
91
+ * Record that a step has failed
92
+ *
93
+ * @param workflowSlug - Workflow identifier (directory name)
94
+ * @param runId - Unique identifier for the workflow run
95
+ * @param stepId - Unique step identifier (directory name)
96
+ * @param error - The error that caused the failure
97
+ * @param metadata - Failure metadata
98
+ */
99
+ abstract saveStepFailed(workflowSlug: string, runId: string, stepId: string, error: StepError, metadata: {
100
+ duration: number;
101
+ attemptNumber: number;
102
+ terminal: boolean;
103
+ nextRetryAt?: number;
104
+ failureReason: "exhausted-retries" | "worker-crash" | "timeout" | "cancelled" | "execution-error";
105
+ }): Promise<void>;
106
+ /**
107
+ * Atomically record a step failure and schedule its retry
108
+ *
109
+ * This combines saveStepFailed + saveStepScheduled in a single atomic operation
110
+ * to prevent the race condition where only the failure event gets persisted.
111
+ *
112
+ * Emits:
113
+ * 1. StepFailed (terminal: false)
114
+ * 2. StepRetrying (informational)
115
+ * 3. StepScheduled (for retry)
116
+ *
117
+ * All three events are written atomically - either all succeed or none do.
118
+ *
119
+ * @param workflowSlug - Workflow identifier (directory name)
120
+ * @param runId - Unique identifier for the workflow run
121
+ * @param stepId - Unique step identifier (directory name)
122
+ * @param error - The error that caused the failure
123
+ * @param failureMetadata - Failure metadata (duration, attemptNumber, nextRetryAt)
124
+ * @param scheduleMetadata - Retry scheduling metadata (availableAt, retryDelayMs)
125
+ */
126
+ abstract saveStepFailedAndScheduleRetry(workflowSlug: string, runId: string, stepId: string, error: StepError, failureMetadata: {
127
+ duration: number;
128
+ attemptNumber: number;
129
+ nextRetryAt: number;
130
+ failureReason: "execution-error" | "timeout";
131
+ }, scheduleMetadata: {
132
+ availableAt: number;
133
+ nextAttemptNumber: number;
134
+ retryDelayMs: number;
135
+ maxRetries: number;
136
+ }): Promise<void>;
137
+ /**
138
+ * Record that a step has been skipped
139
+ *
140
+ * @param workflowSlug - Workflow identifier (directory name)
141
+ * @param runId - Unique identifier for the workflow run
142
+ * @param stepId - Unique step identifier (directory name)
143
+ * @param metadata - Skip metadata
144
+ */
145
+ abstract saveStepSkipped(workflowSlug: string, runId: string, stepId: string, metadata: {
146
+ skipType: "primary" | "cascade";
147
+ reason: string;
148
+ metadata?: Record<string, any>;
149
+ duration: number;
150
+ attemptNumber: number;
151
+ cascadedFrom?: string;
152
+ }): Promise<void>;
153
+ /**
154
+ * Record a heartbeat for a running step
155
+ *
156
+ * @param workflowSlug - Workflow identifier (directory name)
157
+ * @param runId - Unique identifier for the workflow run
158
+ * @param stepId - Unique step identifier (directory name)
159
+ * @param workerId - Worker executing the step
160
+ * @param attemptNumber - Current attempt number
161
+ */
162
+ abstract saveStepHeartbeat(workflowSlug: string, runId: string, stepId: string, workerId: string, attemptNumber: number): Promise<void>;
163
+ /**
164
+ * Record that a step has been reclaimed from a stale worker
165
+ *
166
+ * @param workflowSlug - Workflow identifier (directory name)
167
+ * @param runId - Unique identifier for the workflow run
168
+ * @param stepId - Unique step identifier (directory name)
169
+ * @param metadata - Reclamation metadata
170
+ */
171
+ abstract saveStepReclaimed(workflowSlug: string, runId: string, stepId: string, metadata: {
172
+ originalWorkerId: string;
173
+ reclaimedBy: string;
174
+ lastHeartbeat: number;
175
+ staleThreshold: number;
176
+ staleDuration: number;
177
+ attemptNumber: number;
178
+ }): Promise<void>;
179
+ /**
180
+ * Record that the workflow has completed successfully
181
+ *
182
+ * @param workflowSlug - Workflow identifier (directory name)
183
+ * @param runId - Unique identifier for the workflow run
184
+ * @param output - The final workflow output (only exported steps)
185
+ * @param metadata - Metadata about the workflow completion (timestamp, duration, totalSteps)
186
+ */
187
+ abstract saveWorkflowComplete(workflowSlug: string, runId: string, output: unknown, metadata: {
188
+ workflowAttemptNumber: number;
189
+ timestamp: number;
190
+ duration: number;
191
+ totalSteps: number;
192
+ }): Promise<void>;
193
+ /**
194
+ * Save logs for a step
195
+ *
196
+ * @param workflowSlug - Workflow identifier (directory name)
197
+ * @param runId - Unique identifier for the workflow run
198
+ * @param stepId - Unique step identifier (directory name)
199
+ * @param logs - Array of log entries
200
+ * @returns Path to the logs file
201
+ */
202
+ abstract saveStepLogs(workflowSlug: string, runId: string, stepId: string, logs: LogEntry[]): Promise<void>;
203
+ /**
204
+ * Load logs for a step
205
+ *
206
+ * @param workflowSlug - Workflow identifier (directory name)
207
+ * @param runId - Unique identifier for the workflow run
208
+ * @param stepId - Unique step identifier (directory name)
209
+ * @returns Array of log entries or null if no logs exist
210
+ */
211
+ abstract loadStepLogs(workflowSlug: string, runId: string, stepId: string, attemptNumber?: number): Promise<LogEntry[] | null>;
212
+ /**
213
+ * Load all step records for a given run
214
+ * Used for resuming interrupted workflows
215
+ *
216
+ * @param workflowSlug - Workflow identifier (directory name)
217
+ * @param runId - Unique identifier for the workflow run
218
+ * @returns Array of validated step records
219
+ */
220
+ abstract loadRun(workflowSlug: string, runId: string): Promise<StepRecord[]>;
221
+ /**
222
+ * Check if a run exists in the backend
223
+ *
224
+ * @param workflowSlug - Workflow identifier (directory name)
225
+ * @param runId - Unique identifier for the workflow run
226
+ * @returns true if the run exists, false otherwise
227
+ */
228
+ abstract runExists(workflowSlug: string, runId: string): Promise<boolean>;
229
+ /**
230
+ * Append an event to the event log
231
+ *
232
+ * Events are immutable and append-only. This is the core of event sourcing.
233
+ * Each event represents a state transition or action that occurred during execution.
234
+ * Events are routed to the appropriate directory based on their category (workflow vs step).
235
+ *
236
+ * @param workflowSlug - Workflow identifier (directory name)
237
+ * @param runId - Unique identifier for the workflow run
238
+ * @param event - The event to append (either step or workflow event)
239
+ */
240
+ abstract appendEvent(workflowSlug: string, runId: string, event: Event): Promise<void>;
241
+ /**
242
+ * Load events from the event log
243
+ *
244
+ * Returns events in chronological order (sorted by timestamp).
245
+ * Can filter by category and/or stepId.
246
+ *
247
+ * @param workflowSlug - Workflow identifier (directory name)
248
+ * @param runId - Unique identifier for the workflow run
249
+ * @param options - Optional filters for category and stepId (unique identifier)
250
+ * @returns Array of events in chronological order
251
+ */
252
+ abstract loadEvents(workflowSlug: string, runId: string, options: {
253
+ category: "step";
254
+ stepId?: string;
255
+ }): Promise<StepEvent[]>;
256
+ abstract loadEvents(workflowSlug: string, runId: string, options: {
257
+ category: "workflow";
258
+ }): Promise<WorkflowEvent[]>;
259
+ abstract loadEvents(workflowSlug: string, runId: string, options?: {
260
+ category?: "workflow" | "step";
261
+ stepId?: string;
262
+ }): Promise<Event[]>;
263
+ /**
264
+ * Get the file path for a step's output file
265
+ *
266
+ * Used for file-based step output storage. Returns the path where
267
+ * the subprocess executor should write the step's output.
268
+ *
269
+ * @param workflowSlug - Workflow identifier (directory name)
270
+ * @param runId - Unique identifier for the workflow run
271
+ * @param stepId - Unique step identifier (directory name)
272
+ * @param attemptNumber - Attempt number (for retries)
273
+ * @returns Absolute path to the output file
274
+ */
275
+ abstract getStepOutputPath(workflowSlug: string, runId: string, stepId: string, attemptNumber: number): string;
276
+ /**
277
+ * Record that a workflow has started
278
+ *
279
+ * @param workflowSlug - Workflow identifier (directory name)
280
+ * @param runId - Unique identifier for the workflow run
281
+ * @param metadata - Metadata about the workflow start
282
+ */
283
+ abstract saveWorkflowStart(workflowSlug: string, runId: string, metadata: {
284
+ workflowAttemptNumber: number;
285
+ hasInputSchema: boolean;
286
+ hasInput: boolean;
287
+ }): Promise<void>;
288
+ /**
289
+ * Record workflow input validation result (success or failure)
290
+ *
291
+ * @param workflowSlug - Workflow identifier (directory name)
292
+ * @param runId - Unique identifier for the workflow run
293
+ * @param result - Validation result with schema info, success flag, and optional error
294
+ */
295
+ abstract saveWorkflowInputValidation(workflowSlug: string, runId: string, result: {
296
+ workflowAttemptNumber: number;
297
+ hasSchema: boolean;
298
+ success: boolean;
299
+ error?: StepError;
300
+ validationErrors?: Array<{
301
+ path: string;
302
+ message: string;
303
+ }>;
304
+ }): Promise<void>;
305
+ /**
306
+ * Record that a workflow has failed
307
+ *
308
+ * @param workflowSlug - Workflow identifier (directory name)
309
+ * @param runId - Unique identifier for the workflow run
310
+ * @param error - The error that caused the failure
311
+ * @param metadata - Metadata about the failure
312
+ * @param failureReason - Why the workflow failed (step-failed, worker-crash, etc.)
313
+ */
314
+ abstract saveWorkflowFailed(workflowSlug: string, runId: string, error: StepError, metadata: {
315
+ workflowAttemptNumber: number;
316
+ duration: number;
317
+ completedSteps: number;
318
+ failedStep?: string;
319
+ }, failureReason: "step-failed" | "worker-crash" | "timeout" | "cancelled"): Promise<void>;
320
+ /**
321
+ * Record that a workflow has been resumed from a previous run
322
+ *
323
+ * @param workflowSlug - Workflow identifier (directory name)
324
+ * @param runId - Unique identifier for the workflow run
325
+ * @param metadata - Metadata about the resume
326
+ */
327
+ abstract saveWorkflowResumed(workflowSlug: string, runId: string, metadata: {
328
+ originalRunId: string;
329
+ resumedSteps: number;
330
+ pendingSteps: number;
331
+ }): Promise<void>;
332
+ /**
333
+ * Record that a workflow has been cancelled
334
+ *
335
+ * @param workflowSlug - Workflow identifier (directory name)
336
+ * @param runId - Unique identifier for the workflow run
337
+ * @param metadata - Metadata about the cancellation
338
+ */
339
+ abstract saveWorkflowCancelled(workflowSlug: string, runId: string, metadata: {
340
+ workflowAttemptNumber: number;
341
+ reason?: string;
342
+ duration: number;
343
+ completedSteps: number;
344
+ }): Promise<void>;
345
+ /**
346
+ * Record that a workflow retry has been initiated
347
+ *
348
+ * @param workflowSlug - Workflow identifier (directory name)
349
+ * @param runId - Unique identifier for the workflow run
350
+ * @param metadata - Metadata about the retry
351
+ */
352
+ abstract saveWorkflowRetryStarted(workflowSlug: string, runId: string, metadata: {
353
+ workflowAttemptNumber: number;
354
+ previousAttemptNumber: number;
355
+ retriedSteps: string[];
356
+ reason?: string;
357
+ }): Promise<void>;
358
+ /**
359
+ * Get all failed steps in a workflow run
360
+ *
361
+ * @param workflowSlug - Workflow identifier (directory name)
362
+ * @param runId - Unique identifier for the workflow run
363
+ * @returns Array of failed step information
364
+ */
365
+ abstract getFailedSteps(workflowSlug: string, runId: string): Promise<Array<{
366
+ stepId: string;
367
+ error: StepError;
368
+ attemptNumber: number;
369
+ }>>;
370
+ /**
371
+ * Submit a run to the queue
372
+ *
373
+ * Handles idempotency: if a run with the same idempotencyKey already exists,
374
+ * returns the existing runId instead of creating a new one.
375
+ *
376
+ * @param submission - Run submission parameters
377
+ * @returns Object with runId and isNew flag (false if idempotency key matched)
378
+ */
379
+ abstract submitRun(submission: RunSubmission): Promise<{
380
+ runId: string;
381
+ isNew: boolean;
382
+ }>;
383
+ /**
384
+ * List all runs matching filters
385
+ *
386
+ * EVENTS-AS-QUEUE IMPLEMENTATION:
387
+ * - Scan workflow directories under baseDir
388
+ * - For each run, load workflow events and project to RunState
389
+ * - Filter by status, workflow, tags
390
+ * - Sort by createdAt (descending)
391
+ * - Apply limit
392
+ *
393
+ * @param options - Optional filters and pagination
394
+ * @returns Array of run states sorted by createdAt (newest first)
395
+ */
396
+ abstract listRuns(options?: {
397
+ workflowSlug?: string;
398
+ status?: RunState["status"][];
399
+ tags?: string[];
400
+ limit?: number;
401
+ }): Promise<RunState[]>;
402
+ /**
403
+ * Record that a run has been submitted to the queue
404
+ *
405
+ * Emits RunSubmitted event.
406
+ *
407
+ * @param workflowSlug - Workflow identifier
408
+ * @param runId - Unique identifier for the run
409
+ * @param metadata - Submission metadata
410
+ */
411
+ abstract saveRunSubmitted(workflowSlug: string, runId: string, metadata: {
412
+ availableAt: number;
413
+ priority: number;
414
+ input?: string;
415
+ hasInputSchema: boolean;
416
+ timeout?: number;
417
+ idempotencyKey?: string;
418
+ metadata?: Record<string, unknown>;
419
+ tags?: string[];
420
+ }): Promise<void>;
421
+ /**
422
+ * Cancel a run
423
+ *
424
+ * Can cancel runs in pending, claimed, or running status.
425
+ * Emits WorkflowCancelled event.
426
+ *
427
+ * @param runId - The run to cancel
428
+ * @param reason - Optional cancellation reason
429
+ */
430
+ abstract cancelRun(runId: string, reason?: string): Promise<void>;
431
+ /**
432
+ * Get the current state of a run
433
+ *
434
+ * @param runId - The run to query
435
+ * @returns The run state, or null if not found
436
+ */
437
+ abstract getRun(runId: string): Promise<RunState | null>;
438
+ /**
439
+ * List all active workflows (workflows with incomplete runs)
440
+ *
441
+ * Used by the scheduler loop to find workflows that need scheduling attention.
442
+ * A workflow is "active" if it has at least one run that is not in a terminal state
443
+ * (completed, failed, or cancelled).
444
+ *
445
+ * IMPLEMENTATION NOTE:
446
+ * - Scan workflow directories
447
+ * - For each workflow, check if it has any runs in non-terminal states
448
+ * - Return list of workflow slugs
449
+ *
450
+ * @returns Array of workflow slugs with active runs
451
+ */
452
+ abstract listActiveWorkflows(): Promise<string[]>;
453
+ /**
454
+ * List scheduled steps available for claiming
455
+ *
456
+ * STEP-LEVEL DISTRIBUTION:
457
+ * Scans all workflows for steps in "scheduled" status where availableAt <= now.
458
+ * Returns steps sorted by availableAt (earliest first).
459
+ *
460
+ * @param options - Optional filters
461
+ * @returns Array of step identifiers ready for execution
462
+ */
463
+ abstract listScheduledSteps(options?: {
464
+ availableBefore?: number;
465
+ workflowSlug?: string;
466
+ limit?: number;
467
+ }): Promise<Array<{
468
+ workflowSlug: string;
469
+ runId: string;
470
+ stepId: string;
471
+ }>>;
472
+ /**
473
+ * Check if a step is claimable for execution
474
+ *
475
+ * STEP-LEVEL DISTRIBUTION:
476
+ * - Load step events and project to StepState
477
+ * - Check if status === "scheduled" && availableAt <= now
478
+ * - Returns true if step can be claimed, false otherwise
479
+ *
480
+ * This is used by workers before emitting StepStarted to ensure atomicity.
481
+ *
482
+ * @param workflowSlug - Workflow identifier
483
+ * @param runId - Run identifier
484
+ * @param stepId - Unique step identifier (directory name)
485
+ * @returns true if step is claimable, false otherwise
486
+ */
487
+ abstract isStepClaimable(workflowSlug: string, runId: string, stepId: string): Promise<boolean>;
488
+ /**
489
+ * Atomically claim a scheduled step for execution.
490
+ *
491
+ * Implementations must ensure only one worker can transition a step
492
+ * from scheduled → running for a given attempt. If the step is no longer
493
+ * claimable (e.g. already running, completed, or not yet available),
494
+ * the method returns null.
495
+ *
496
+ * @param workflowSlug - Workflow identifier
497
+ * @param runId - Run identifier
498
+ * @param stepId - Unique step identifier (directory name)
499
+ * @param workerId - Worker attempting the claim
500
+ * @param metadata - Step start metadata (dependencies, timestamp)
501
+ * @returns Attempt number when claimed, or null if step could not be claimed
502
+ */
503
+ abstract claimScheduledStep(workflowSlug: string, runId: string, stepId: string, workerId: string, metadata: StepStartMetadata): Promise<{
504
+ attemptNumber: number;
505
+ } | null>;
506
+ /**
507
+ * Find steps with stale heartbeats and reclaim them
508
+ *
509
+ * STEP-LEVEL DISTRIBUTION:
510
+ * - Scan all workflows for steps in "running" status
511
+ * - Check lastHeartbeat timestamp
512
+ * - If stale (no heartbeat > threshold), emit StepReclaimed and re-schedule
513
+ *
514
+ * @param staleThreshold - Milliseconds since last heartbeat before considering stale
515
+ * @param reclaimedBy - Worker ID performing reclamation
516
+ * @returns Array of reclaimed step identifiers
517
+ */
518
+ abstract reclaimStaleSteps(staleThreshold: number, reclaimedBy: string): Promise<Array<{
519
+ workflowSlug: string;
520
+ runId: string;
521
+ stepId: string;
522
+ }>>;
523
+ /**
524
+ * Register a workflow with the backend
525
+ *
526
+ * Stores workflow metadata, input schema (as JSON Schema), and step definitions.
527
+ * This is called during worker startup after discovering workflows from the filesystem.
528
+ *
529
+ * @param registration - Complete workflow registration data
530
+ */
531
+ abstract registerWorkflow(registration: WorkflowRegistration): Promise<void>;
532
+ /**
533
+ * Get metadata for a specific workflow
534
+ *
535
+ * Returns workflow metadata including name, location, and input schema.
536
+ *
537
+ * @param slug - Workflow identifier
538
+ * @returns Workflow metadata, or null if not registered
539
+ */
540
+ abstract getWorkflowMetadata(slug: string): Promise<WorkflowMetadata | null>;
541
+ /**
542
+ * List all registered workflows
543
+ *
544
+ * Returns metadata for all workflows that have been registered with the backend.
545
+ *
546
+ * @returns Array of workflow metadata
547
+ */
548
+ abstract listWorkflowMetadata(): Promise<WorkflowMetadata[]>;
549
+ /**
550
+ * Get step definitions for a workflow
551
+ *
552
+ * Returns the step structure (names, dependencies, export flags) for a workflow.
553
+ *
554
+ * @param slug - Workflow identifier
555
+ * @returns Array of step definitions, or empty array if workflow not found
556
+ */
557
+ abstract getWorkflowSteps(slug: string): Promise<StepDefinition[]>;
558
+ /**
559
+ * List all run IDs for a workflow
560
+ *
561
+ * Returns all run IDs that exist for a given workflow.
562
+ * Used by worker to discover existing runs.
563
+ *
564
+ * @param workflowSlug - Workflow identifier
565
+ * @returns Array of run IDs
566
+ */
567
+ abstract listRunIds(workflowSlug: string): Promise<string[]>;
568
+ /**
569
+ * Close the backend and clean up resources
570
+ *
571
+ * This method should be called when the backend is no longer needed.
572
+ * For database backends, this closes connection pools.
573
+ * For filesystem backends, this is typically a no-op.
574
+ *
575
+ * After calling close(), the backend should not be used again.
576
+ */
577
+ abstract close(): Promise<void>;
578
+ /**
579
+ * Get error analysis for workflows and steps
580
+ *
581
+ * Analyzes failure patterns, error types, and common error messages.
582
+ *
583
+ * @param options - Optional filters for time range, workflow, or step
584
+ * @returns Error analysis data
585
+ */
586
+ abstract getErrorAnalysis(options?: AnalyticsOptions): Promise<ErrorAnalysis>;
587
+ /**
588
+ * Get paginated list of errors grouped by fingerprint
589
+ *
590
+ * Returns errors grouped by composable fingerprints with aggregated statistics.
591
+ *
592
+ * @param options - Filtering and pagination options
593
+ * @returns Paginated list of error groups and total count
594
+ */
595
+ abstract getErrorsList(options?: {
596
+ timeRange?: {
597
+ start: number;
598
+ end: number;
599
+ };
600
+ workflowSlug?: string;
601
+ groupingStrategy?: 'exact' | 'normalized' | 'portable';
602
+ limit?: number;
603
+ offset?: number;
604
+ }): Promise<{
605
+ errors: Array<{
606
+ fingerprint: string;
607
+ errorMessage: string;
608
+ errorName: string;
609
+ sampleStack: string;
610
+ count: number;
611
+ affectedRuns: number;
612
+ firstSeen: number;
613
+ lastSeen: number;
614
+ }>;
615
+ total: number;
616
+ }>;
617
+ /**
618
+ * Get detailed information about a specific error by fingerprint
619
+ *
620
+ * Returns all occurrences of an error matching the given fingerprint.
621
+ *
622
+ * @param fingerprint - Composite fingerprint (nameHash:messageHash:stackHash)
623
+ * @param groupingStrategy - Which stack hash variant to use
624
+ * @param options - Filtering and pagination options
625
+ * @returns Error details with occurrences
626
+ */
627
+ abstract getErrorDetail(fingerprint: string, groupingStrategy: 'exact' | 'normalized' | 'portable', options?: {
628
+ timeRange?: {
629
+ start: number;
630
+ end: number;
631
+ };
632
+ limit?: number;
633
+ offset?: number;
634
+ }): Promise<{
635
+ fingerprint: string;
636
+ errorMessage: string;
637
+ errorName: string;
638
+ sampleStack: string;
639
+ totalCount: number;
640
+ affectedRuns: number;
641
+ firstSeen: number;
642
+ lastSeen: number;
643
+ occurrences: Array<{
644
+ workflowSlug: string;
645
+ runId: string;
646
+ stepId: string;
647
+ attemptNumber: number;
648
+ timestampUs: number;
649
+ }>;
650
+ total: number;
651
+ }>;
652
+ /**
653
+ * Get retry analysis metrics
654
+ *
655
+ * Analyzes retry patterns, success rates after retries, and retry effectiveness.
656
+ *
657
+ * @param options - Optional filters for time range, workflow, or step
658
+ * @returns Retry analysis data
659
+ */
660
+ abstract getRetryAnalysis(options?: AnalyticsOptions): Promise<RetryAnalysis>;
661
+ /**
662
+ * Get scheduling latency metrics
663
+ *
664
+ * Measures time between step being scheduled and actually starting execution.
665
+ * High latency indicates worker starvation or queue congestion.
666
+ *
667
+ * @param options - Optional filters for time range, workflow, or step
668
+ * @returns Scheduling latency statistics
669
+ */
670
+ abstract getSchedulingLatency(options?: AnalyticsOptions): Promise<SchedulingLatency>;
671
+ /**
672
+ * Get step duration metrics
673
+ *
674
+ * Analyzes how long steps take to execute.
675
+ *
676
+ * @param options - Optional filters for time range, workflow, or step
677
+ * @returns Step duration statistics
678
+ */
679
+ abstract getStepDuration(options?: AnalyticsOptions): Promise<StepDuration>;
680
+ /**
681
+ * Get workflow duration metrics
682
+ *
683
+ * Analyzes end-to-end workflow execution time (from submission to completion).
684
+ *
685
+ * @param options - Optional filters for time range or workflow
686
+ * @returns Workflow duration statistics
687
+ */
688
+ abstract getWorkflowDuration(options?: AnalyticsOptions): Promise<WorkflowDuration>;
689
+ /**
690
+ * Get worker stability metrics
691
+ *
692
+ * Analyzes worker crashes, reclamations, and stale heartbeats.
693
+ *
694
+ * @param options - Optional filters for time range
695
+ * @returns Worker stability data
696
+ */
697
+ abstract getWorkerStability(options?: AnalyticsOptions): Promise<WorkerStability>;
698
+ /**
699
+ * Get throughput metrics
700
+ *
701
+ * Analyzes how many runs/steps are being completed per unit time.
702
+ *
703
+ * @param options - Optional filters for time range or workflow
704
+ * @returns Throughput data
705
+ */
706
+ abstract getThroughput(options?: AnalyticsOptions): Promise<Throughput>;
707
+ /**
708
+ * Get current queue depth
709
+ *
710
+ * Real-time snapshot of pending/running runs and steps.
711
+ *
712
+ * @param options - Optional filter for workflow
713
+ * @returns Queue depth data
714
+ */
715
+ abstract getQueueDepth(options?: Pick<AnalyticsOptions, 'workflowSlug'>): Promise<QueueDepth>;
716
+ /**
717
+ * Get queue depth broken down by workflow
718
+ *
719
+ * Real-time snapshot showing per-workflow queue statistics.
720
+ * Useful for identifying which workflows have pending work.
721
+ *
722
+ * @returns Array of per-workflow queue depth data
723
+ */
724
+ abstract getQueueDepthByWorkflow(): Promise<QueueDepthByWorkflow>;
725
+ /**
726
+ * Get success/failure rate metrics
727
+ *
728
+ * Analyzes overall health of workflows and steps.
729
+ *
730
+ * @param options - Optional filters for time range, workflow, or step
731
+ * @returns Success rate data
732
+ */
733
+ abstract getSuccessRate(options?: AnalyticsOptions): Promise<SuccessRate>;
734
+ /**
735
+ * Get comprehensive analytics summary
736
+ *
737
+ * Combines all analytics metrics into a single response.
738
+ * Useful for dashboard views.
739
+ *
740
+ * @param options - Optional filters for time range or workflow
741
+ * @returns Complete analytics summary
742
+ */
743
+ abstract getAnalyticsSummary(options?: AnalyticsOptions): Promise<AnalyticsSummary>;
744
+ }
745
+ //# sourceMappingURL=interface.d.ts.map