@kylebegeman/pulse 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1006 -571
- package/dist/executor.d.ts +0 -4
- package/dist/executor.d.ts.map +1 -1
- package/dist/executor.js +11 -23
- package/dist/executor.js.map +1 -1
- package/dist/queue.js +6 -6
- package/dist/runs.d.ts +1 -12
- package/dist/runs.d.ts.map +1 -1
- package/dist/runs.js +3 -28
- package/dist/runs.js.map +1 -1
- package/dist/scheduler.js +4 -4
- package/dist/scheduler.js.map +1 -1
- package/dist/schema/migrate.d.ts.map +1 -1
- package/dist/schema/migrate.js +16 -51
- package/dist/schema/migrate.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,42 +1,74 @@
|
|
|
1
|
-
<
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
</
|
|
1
|
+
<p align="center">
|
|
2
|
+
<strong>pulse</strong>
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
<p align="center">
|
|
6
|
+
Universal workflow engine — signal-driven automation for multi-tenant applications
|
|
7
|
+
</p>
|
|
8
|
+
|
|
9
|
+
<p align="center">
|
|
10
|
+
<a href="#quick-start">Quick Start</a> ·
|
|
11
|
+
<a href="#core-concepts">Core Concepts</a> ·
|
|
12
|
+
<a href="#api-reference">API Reference</a> ·
|
|
13
|
+
<a href="#database-schema">Database Schema</a> ·
|
|
14
|
+
<a href="#examples">Examples</a>
|
|
15
|
+
</p>
|
|
16
16
|
|
|
17
17
|
---
|
|
18
18
|
|
|
19
19
|
## Overview
|
|
20
20
|
|
|
21
|
-
Pulse is a TypeScript library
|
|
21
|
+
Pulse is a TypeScript library that powers event-driven workflow automation. Your application emits **signals** (triggers), and the engine matches them to registered **workflows**, scheduling and executing **steps** in sequence with full persistence, observability, and replay support.
|
|
22
22
|
|
|
23
23
|
```
|
|
24
|
-
Signal
|
|
24
|
+
Signal → Match → Schedule Steps → Evaluate Conditions → Execute Actions → Record Results
|
|
25
25
|
```
|
|
26
26
|
|
|
27
|
-
Built for multi-tenant SaaS.
|
|
27
|
+
Built for multi-tenant SaaS applications. Each signal, workflow, and run is scoped to a tenant. The engine runs in-process alongside your application and uses your existing PostgreSQL database and Redis instance.
|
|
28
28
|
|
|
29
|
-
|
|
29
|
+
---
|
|
30
30
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
-
|
|
34
|
-
-
|
|
35
|
-
-
|
|
36
|
-
-
|
|
37
|
-
-
|
|
38
|
-
-
|
|
39
|
-
-
|
|
31
|
+
## Table of Contents
|
|
32
|
+
|
|
33
|
+
- [Quick Start](#quick-start)
|
|
34
|
+
- [Installation](#installation)
|
|
35
|
+
- [Core Concepts](#core-concepts)
|
|
36
|
+
- [Signals (Triggers)](#signals-triggers)
|
|
37
|
+
- [Workflows](#workflows)
|
|
38
|
+
- [Steps](#steps)
|
|
39
|
+
- [Parallel Steps](#parallel-steps)
|
|
40
|
+
- [Cron-Triggered Workflows](#cron-triggered-workflows)
|
|
41
|
+
- [Handler Context](#handler-context)
|
|
42
|
+
- [Lifecycle Hooks](#lifecycle-hooks)
|
|
43
|
+
- [Replay](#replay)
|
|
44
|
+
- [Run Timeline](#run-timeline)
|
|
45
|
+
- [Cancel & Retry](#cancel--retry)
|
|
46
|
+
- [API Reference](#api-reference)
|
|
47
|
+
- [createEngine(config)](#createengineconfig)
|
|
48
|
+
- [Engine Methods](#engine-methods)
|
|
49
|
+
- [Registration](#registration)
|
|
50
|
+
- [Signals](#signals)
|
|
51
|
+
- [Lifecycle](#lifecycle)
|
|
52
|
+
- [Queries](#queries)
|
|
53
|
+
- [Timeline](#timeline)
|
|
54
|
+
- [Cancel & Retry](#cancel--retry-1)
|
|
55
|
+
- [Replay](#replay-1)
|
|
56
|
+
- [Workflow Management](#workflow-management)
|
|
57
|
+
- [Schema](#schema)
|
|
58
|
+
- [Type Reference](#type-reference)
|
|
59
|
+
- [Database Schema](#database-schema)
|
|
60
|
+
- [Runtime Guarantees](#runtime-guarantees)
|
|
61
|
+
- [Architecture](#architecture)
|
|
62
|
+
- [Examples](#examples)
|
|
63
|
+
- [Monitoring: Incident on Missed Heartbeat](#monitoring-incident-on-missed-heartbeat)
|
|
64
|
+
- [Billing: Trial Expiration Workflow](#billing-trial-expiration-workflow)
|
|
65
|
+
- [Chaining: Workflow That Triggers Another Workflow](#chaining-workflow-that-triggers-another-workflow)
|
|
66
|
+
- [Parallel: Multi-Channel Notification](#parallel-multi-channel-notification)
|
|
67
|
+
- [Cron: Daily Cleanup Job](#cron-daily-cleanup-job)
|
|
68
|
+
- [Observability: Run Timeline Dashboard](#observability-run-timeline-dashboard)
|
|
69
|
+
- [Error Recovery: Retry Failed Runs](#error-recovery-retry-failed-runs)
|
|
70
|
+
- [Requirements](#requirements)
|
|
71
|
+
- [License](#license)
|
|
40
72
|
|
|
41
73
|
---
|
|
42
74
|
|
|
@@ -47,34 +79,39 @@ import { createEngine } from '@kylebegeman/pulse'
|
|
|
47
79
|
import { Pool } from 'pg'
|
|
48
80
|
import Redis from 'ioredis'
|
|
49
81
|
|
|
82
|
+
// 1. Create the engine
|
|
50
83
|
const engine = createEngine({
|
|
51
84
|
db: new Pool({ connectionString: process.env.DATABASE_URL }),
|
|
52
85
|
redis: new Redis(process.env.REDIS_URL),
|
|
53
86
|
})
|
|
54
87
|
|
|
55
|
-
// Register a trigger
|
|
88
|
+
// 2. Register a trigger type with optional schema validation
|
|
56
89
|
engine.registerTrigger('heartbeat.missed', {
|
|
57
90
|
source: 'heartbeat',
|
|
58
91
|
resourceType: 'service',
|
|
59
92
|
})
|
|
60
93
|
|
|
94
|
+
// 3. Register an action handler
|
|
61
95
|
engine.registerAction('create_incident', async (ctx) => {
|
|
62
96
|
const incident = await createIncident({
|
|
63
97
|
service: ctx.trigger.resourceId,
|
|
64
98
|
tenant: ctx.tenantId,
|
|
99
|
+
reason: 'Heartbeat missed',
|
|
65
100
|
})
|
|
66
101
|
ctx.log('Incident created', { incidentId: incident.id })
|
|
67
102
|
return { success: true, data: { incidentId: incident.id } }
|
|
68
103
|
}, { replaySafe: true })
|
|
69
104
|
|
|
105
|
+
// 4. Register a condition
|
|
70
106
|
engine.registerCondition('is_still_failing', async (ctx) => {
|
|
71
107
|
const status = await checkServiceHealth(ctx.trigger.resourceId)
|
|
72
108
|
return status === 'unhealthy'
|
|
73
109
|
})
|
|
74
110
|
|
|
75
|
-
// Run migrations
|
|
111
|
+
// 5. Run migrations (creates pulse_* tables)
|
|
76
112
|
await engine.migrate()
|
|
77
113
|
|
|
114
|
+
// 6. Create a workflow
|
|
78
115
|
await engine.createWorkflow({
|
|
79
116
|
tenantId: 'workspace_1',
|
|
80
117
|
name: 'Incident on missed heartbeat',
|
|
@@ -88,9 +125,10 @@ await engine.createWorkflow({
|
|
|
88
125
|
isEnabled: true,
|
|
89
126
|
})
|
|
90
127
|
|
|
91
|
-
// Start
|
|
128
|
+
// 7. Start the engine (BullMQ workers begin processing)
|
|
92
129
|
await engine.start()
|
|
93
130
|
|
|
131
|
+
// 8. Emit signals from anywhere in your application
|
|
94
132
|
await engine.emit({
|
|
95
133
|
tenantId: 'workspace_1',
|
|
96
134
|
source: 'heartbeat',
|
|
@@ -100,7 +138,7 @@ await engine.emit({
|
|
|
100
138
|
payload: { lastSeenAt: new Date().toISOString() },
|
|
101
139
|
})
|
|
102
140
|
|
|
103
|
-
// Graceful shutdown
|
|
141
|
+
// 9. Graceful shutdown
|
|
104
142
|
await engine.stop()
|
|
105
143
|
```
|
|
106
144
|
|
|
@@ -121,7 +159,7 @@ npm install git+ssh://git@github.com:mrbagels/pulse.git
|
|
|
121
159
|
**Peer requirements:**
|
|
122
160
|
|
|
123
161
|
| Dependency | Purpose |
|
|
124
|
-
|
|
162
|
+
|---|---|
|
|
125
163
|
| `pg` | PostgreSQL client — engine uses your connection pool |
|
|
126
164
|
| `ioredis` | Redis client — used by BullMQ for job queuing |
|
|
127
165
|
|
|
@@ -135,17 +173,17 @@ A **signal** is a structured event emitted by your application. Every signal inc
|
|
|
135
173
|
|
|
136
174
|
```ts
|
|
137
175
|
await engine.emit({
|
|
138
|
-
tenantId: 'workspace_1',
|
|
139
|
-
source: 'heartbeat',
|
|
140
|
-
type: 'heartbeat.missed',
|
|
141
|
-
resourceType: 'service',
|
|
142
|
-
resourceId: 'api-server',
|
|
143
|
-
environment: 'production',
|
|
144
|
-
payload: { lastSeenAt: '...' }
|
|
176
|
+
tenantId: 'workspace_1', // Required — tenant scope
|
|
177
|
+
source: 'heartbeat', // Required — originating system
|
|
178
|
+
type: 'heartbeat.missed', // Required — event type
|
|
179
|
+
resourceType: 'service', // Optional — what kind of resource
|
|
180
|
+
resourceId: 'api-server', // Optional — which specific resource
|
|
181
|
+
environment: 'production', // Optional — environment scope
|
|
182
|
+
payload: { lastSeenAt: '...' } // Optional — arbitrary data
|
|
145
183
|
})
|
|
146
184
|
```
|
|
147
185
|
|
|
148
|
-
Triggers are **registered** to declare their source, expected resource type, and optional
|
|
186
|
+
Triggers are **registered** to declare their source, expected resource type, and optional payload schema (validated with Zod):
|
|
149
187
|
|
|
150
188
|
```ts
|
|
151
189
|
import { z } from 'zod'
|
|
@@ -159,7 +197,9 @@ engine.registerTrigger('heartbeat.missed', {
|
|
|
159
197
|
})
|
|
160
198
|
```
|
|
161
199
|
|
|
162
|
-
All emitted signals are persisted to the database for auditing and [replay](#replay).
|
|
200
|
+
All emitted signals are **persisted** to the database for auditing and [replay](#replay).
|
|
201
|
+
|
|
202
|
+
---
|
|
163
203
|
|
|
164
204
|
### Workflows
|
|
165
205
|
|
|
@@ -169,58 +209,64 @@ A **workflow** is a named sequence of steps that runs when a matching signal is
|
|
|
169
209
|
await engine.createWorkflow({
|
|
170
210
|
tenantId: 'workspace_1',
|
|
171
211
|
name: 'Incident on missed heartbeat',
|
|
172
|
-
triggerType: 'heartbeat.missed',
|
|
173
|
-
environmentFilter: 'production',
|
|
174
|
-
resourceTypeFilter: 'service',
|
|
212
|
+
triggerType: 'heartbeat.missed', // Matches signals of this type
|
|
213
|
+
environmentFilter: 'production', // Optional — only match this environment
|
|
214
|
+
resourceTypeFilter: 'service', // Optional — only match this resource type
|
|
175
215
|
steps: [
|
|
176
216
|
{ type: 'delay', name: 'wait_5m', delayMs: 300_000 },
|
|
177
217
|
{ type: 'condition', name: 'is_still_failing' },
|
|
178
218
|
{ type: 'action', name: 'create_incident' },
|
|
179
219
|
],
|
|
180
|
-
config: { severity: 'high' },
|
|
220
|
+
config: { severity: 'high' }, // Passed to handlers via ctx.config
|
|
181
221
|
isEnabled: true,
|
|
182
222
|
})
|
|
183
223
|
```
|
|
184
224
|
|
|
185
|
-
When a signal is emitted, the engine finds all enabled workflows matching the signal type (and optional filters), creates a **run** for each, and begins scheduling steps.
|
|
225
|
+
When a signal is emitted, the engine finds **all enabled workflows** matching the signal type (and optional environment/resource filters), creates a **run** for each, and begins scheduling steps.
|
|
226
|
+
|
|
227
|
+
Workflows can be enabled/disabled at runtime:
|
|
186
228
|
|
|
187
229
|
```ts
|
|
188
230
|
await engine.disableWorkflow('wfd_abc123')
|
|
189
231
|
await engine.enableWorkflow('wfd_abc123')
|
|
190
232
|
```
|
|
191
233
|
|
|
234
|
+
---
|
|
235
|
+
|
|
192
236
|
### Steps
|
|
193
237
|
|
|
194
|
-
Each workflow contains an ordered list of steps. Steps execute sequentially — each must complete before the next begins.
|
|
238
|
+
Each workflow contains an ordered list of steps. Steps execute sequentially — each step must complete before the next begins.
|
|
195
239
|
|
|
196
|
-
|
|
197
|
-
|:--|:--|
|
|
198
|
-
| `action` | Execute a registered handler function |
|
|
199
|
-
| `condition` | Evaluate a boolean to branch or complete early |
|
|
200
|
-
| `delay` | Pause execution for a duration (via BullMQ delayed jobs) |
|
|
201
|
-
| `parallel` | Execute multiple branches concurrently |
|
|
240
|
+
#### `action` — Execute logic
|
|
202
241
|
|
|
203
|
-
|
|
242
|
+
Calls a registered handler function. The handler receives a [`WorkflowContext`](#handler-context) and returns an `ActionResult`.
|
|
204
243
|
|
|
205
244
|
```ts
|
|
206
245
|
engine.registerAction('send_alert', async (ctx) => {
|
|
207
246
|
await sendSlackMessage(ctx.config.channel, `Alert for ${ctx.trigger.resourceId}`)
|
|
208
247
|
return { success: true, data: { sent: true } }
|
|
209
|
-
}, { replaySafe: false })
|
|
248
|
+
}, { replaySafe: false }) // Will be skipped during replay
|
|
210
249
|
```
|
|
211
250
|
|
|
212
|
-
|
|
251
|
+
```ts
|
|
252
|
+
// In workflow steps:
|
|
253
|
+
{ type: 'action', name: 'send_alert' }
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
**Retry policy:** Action steps can define a retry policy for automatic retries on failure:
|
|
213
257
|
|
|
214
258
|
```ts
|
|
215
259
|
{
|
|
216
260
|
type: 'action',
|
|
217
261
|
name: 'send_alert',
|
|
218
262
|
retryPolicy: { maxAttempts: 3, backoffMs: 5000 },
|
|
219
|
-
timeoutMs: 30_000,
|
|
263
|
+
timeoutMs: 30_000, // Optional step timeout
|
|
220
264
|
}
|
|
221
265
|
```
|
|
222
266
|
|
|
223
|
-
####
|
|
267
|
+
#### `condition` — Branch on logic
|
|
268
|
+
|
|
269
|
+
Evaluates a boolean. If the condition returns `false`, the workflow **completes early** by default (status: `completed`, not `failed`).
|
|
224
270
|
|
|
225
271
|
```ts
|
|
226
272
|
engine.registerCondition('monitor_still_failing', async (ctx) => {
|
|
@@ -229,90 +275,159 @@ engine.registerCondition('monitor_still_failing', async (ctx) => {
|
|
|
229
275
|
})
|
|
230
276
|
```
|
|
231
277
|
|
|
232
|
-
|
|
278
|
+
```ts
|
|
279
|
+
// In workflow steps:
|
|
280
|
+
{ type: 'condition', name: 'monitor_still_failing' }
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
**`onFalse` behavior:** Control what happens when a condition returns `false`:
|
|
233
284
|
|
|
234
285
|
```ts
|
|
235
|
-
|
|
236
|
-
{ type: 'condition', name: 'check', onFalse: '
|
|
237
|
-
|
|
286
|
+
// Default: complete the workflow early
|
|
287
|
+
{ type: 'condition', name: 'check', onFalse: 'complete' }
|
|
288
|
+
|
|
289
|
+
// Skip the next step and continue
|
|
290
|
+
{ type: 'condition', name: 'check', onFalse: 'skip' }
|
|
291
|
+
|
|
292
|
+
// Skip the next N steps
|
|
293
|
+
{ type: 'condition', name: 'check', onFalse: 3 }
|
|
238
294
|
```
|
|
239
295
|
|
|
240
|
-
####
|
|
296
|
+
#### `delay` — Wait before continuing
|
|
297
|
+
|
|
298
|
+
Pauses execution for a duration in milliseconds. Uses BullMQ delayed jobs for reliable scheduling. Maximum delay: 30 days.
|
|
241
299
|
|
|
242
300
|
```ts
|
|
301
|
+
// In workflow steps:
|
|
243
302
|
{ type: 'delay', name: 'wait_5_minutes', delayMs: 5 * 60 * 1000 }
|
|
244
303
|
```
|
|
245
304
|
|
|
246
|
-
|
|
305
|
+
#### `parallel` — Execute branches concurrently
|
|
306
|
+
|
|
307
|
+
Runs multiple branches of steps simultaneously. See [Parallel Steps](#parallel-steps) below.
|
|
308
|
+
|
|
309
|
+
---
|
|
247
310
|
|
|
248
|
-
|
|
311
|
+
### Parallel Steps
|
|
249
312
|
|
|
250
|
-
|
|
313
|
+
The `parallel` step type executes multiple branches concurrently. Each branch is an independent sequence of steps. All branches must complete before the workflow continues to the next top-level step.
|
|
251
314
|
|
|
252
315
|
```ts
|
|
253
|
-
{
|
|
254
|
-
|
|
255
|
-
name: '
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
316
|
+
await engine.createWorkflow({
|
|
317
|
+
tenantId: 'workspace_1',
|
|
318
|
+
name: 'Multi-channel notification',
|
|
319
|
+
triggerType: 'alert.triggered',
|
|
320
|
+
steps: [
|
|
321
|
+
{
|
|
322
|
+
type: 'parallel',
|
|
323
|
+
name: 'notify_all_channels',
|
|
324
|
+
branches: [
|
|
325
|
+
// Branch 0: Email notification
|
|
326
|
+
[
|
|
327
|
+
{ type: 'action', name: 'send_email' },
|
|
328
|
+
{ type: 'action', name: 'log_email_sent' },
|
|
329
|
+
],
|
|
330
|
+
// Branch 1: Slack notification
|
|
331
|
+
[
|
|
332
|
+
{ type: 'action', name: 'send_slack' },
|
|
333
|
+
],
|
|
334
|
+
// Branch 2: SMS notification
|
|
335
|
+
[
|
|
336
|
+
{ type: 'action', name: 'send_sms' },
|
|
337
|
+
],
|
|
338
|
+
],
|
|
339
|
+
},
|
|
340
|
+
// This step runs after ALL branches complete
|
|
341
|
+
{ type: 'action', name: 'mark_notified' },
|
|
260
342
|
],
|
|
261
|
-
}
|
|
343
|
+
config: {},
|
|
344
|
+
isEnabled: true,
|
|
345
|
+
})
|
|
262
346
|
```
|
|
263
347
|
|
|
264
|
-
|
|
265
|
-
-
|
|
266
|
-
-
|
|
267
|
-
-
|
|
348
|
+
**Behavior:**
|
|
349
|
+
- Each branch executes independently and concurrently via BullMQ jobs
|
|
350
|
+
- Branches can contain any step types (action, condition, delay)
|
|
351
|
+
- All branches share the run's `context` object
|
|
352
|
+
- The parallel step completes when **all** branches complete
|
|
353
|
+
- If **any** branch fails, the parallel step fails and the run fails
|
|
354
|
+
- A parallel step must have at least 2 branches
|
|
268
355
|
|
|
269
|
-
|
|
356
|
+
**Branch step tracking:** Each step within a branch is tracked with a `branchIndex` and `parentStepRunId`, linking it back to the parallel step that spawned it. This is visible in the step runs returned by `getRunSteps()`.
|
|
270
357
|
|
|
271
|
-
|
|
358
|
+
---
|
|
359
|
+
|
|
360
|
+
### Cron-Triggered Workflows
|
|
361
|
+
|
|
362
|
+
Workflows can be scheduled to run on a recurring basis using cron expressions. When a cron fires, the engine automatically emits a trigger of the workflow's `triggerType`, which matches and creates a new run.
|
|
272
363
|
|
|
273
364
|
```ts
|
|
274
365
|
await engine.createWorkflow({
|
|
275
366
|
tenantId: 'workspace_1',
|
|
276
367
|
name: 'Daily cleanup',
|
|
277
368
|
triggerType: 'maintenance.cleanup',
|
|
278
|
-
steps: [
|
|
369
|
+
steps: [
|
|
370
|
+
{ type: 'action', name: 'cleanup_old_records' },
|
|
371
|
+
{ type: 'action', name: 'send_cleanup_report' },
|
|
372
|
+
],
|
|
279
373
|
config: {},
|
|
280
374
|
isEnabled: true,
|
|
281
|
-
cronExpression: '0 2 * * *',
|
|
375
|
+
cronExpression: '0 2 * * *', // Run daily at 2:00 AM
|
|
282
376
|
})
|
|
283
377
|
```
|
|
284
378
|
|
|
285
|
-
|
|
379
|
+
**Behavior:**
|
|
380
|
+
- Cron jobs are managed via BullMQ repeatable jobs on a dedicated cron queue
|
|
381
|
+
- When `engine.start()` is called, all enabled cron definitions are restored
|
|
382
|
+
- Enabling/disabling a workflow also starts/stops its cron job
|
|
383
|
+
- The cron fires a trigger with `source: 'cron'` and the workflow's `triggerType`
|
|
384
|
+
- Standard cron expression format: `minute hour dayOfMonth month dayOfWeek`
|
|
385
|
+
|
|
386
|
+
**Examples of cron expressions:**
|
|
387
|
+
- `* * * * *` — every minute
|
|
388
|
+
- `0 */6 * * *` — every 6 hours
|
|
389
|
+
- `0 9 * * 1-5` — weekdays at 9:00 AM
|
|
390
|
+
- `0 0 1 * *` — first day of every month at midnight
|
|
391
|
+
|
|
392
|
+
---
|
|
286
393
|
|
|
287
394
|
### Handler Context
|
|
288
395
|
|
|
289
|
-
Every action and condition handler receives a `WorkflowContext
|
|
396
|
+
Every action and condition handler receives a `WorkflowContext` object:
|
|
290
397
|
|
|
291
398
|
```ts
|
|
292
399
|
engine.registerAction('my_action', async (ctx) => {
|
|
293
|
-
ctx.tenantId
|
|
294
|
-
ctx.trigger
|
|
295
|
-
ctx.run
|
|
296
|
-
ctx.step
|
|
297
|
-
ctx.config
|
|
298
|
-
ctx.isReplay
|
|
299
|
-
ctx.emit(...)
|
|
300
|
-
ctx.log(...)
|
|
400
|
+
ctx.tenantId // string — current tenant
|
|
401
|
+
ctx.trigger // TriggerEnvelope — the signal that started this run
|
|
402
|
+
ctx.run // WorkflowRun — current run state
|
|
403
|
+
ctx.step // WorkflowStepRun — current step state
|
|
404
|
+
ctx.config // Record<string, unknown> — workflow-level config
|
|
405
|
+
ctx.isReplay // boolean — true if this is a replay execution
|
|
406
|
+
ctx.emit(...) // Emit another signal (enables workflow chaining)
|
|
407
|
+
ctx.log(...) // Write to execution log
|
|
301
408
|
|
|
302
409
|
return { success: true }
|
|
303
410
|
})
|
|
304
411
|
```
|
|
305
412
|
|
|
413
|
+
The `emit` function on the context lets actions trigger other workflows, enabling powerful **workflow chaining** patterns. See [the chaining example](#chaining-workflow-that-triggers-another-workflow).
|
|
414
|
+
|
|
415
|
+
---
|
|
416
|
+
|
|
306
417
|
### Lifecycle Hooks
|
|
307
418
|
|
|
419
|
+
The engine supports lifecycle hooks for observability and integration:
|
|
420
|
+
|
|
308
421
|
```ts
|
|
309
422
|
const engine = createEngine({
|
|
310
423
|
db: pool,
|
|
311
424
|
redis: redisClient,
|
|
312
425
|
onStepComplete: (event) => {
|
|
426
|
+
// Called after each step completes, fails, or is skipped
|
|
313
427
|
console.log(`Step ${event.step.stepName}: ${event.step.status}`)
|
|
314
428
|
},
|
|
315
429
|
onRunComplete: (event) => {
|
|
430
|
+
// Called when a run reaches completed, failed, or canceled status
|
|
316
431
|
console.log(`Run ${event.run.id}: ${event.status}`)
|
|
317
432
|
},
|
|
318
433
|
})
|
|
@@ -320,384 +435,500 @@ const engine = createEngine({
|
|
|
320
435
|
|
|
321
436
|
Hooks are fire-and-forget — errors in hooks do not affect workflow execution.
|
|
322
437
|
|
|
438
|
+
---
|
|
439
|
+
|
|
323
440
|
### Replay
|
|
324
441
|
|
|
325
|
-
|
|
442
|
+
All signals are persisted, enabling **replay** — re-processing a historical signal through the matching pipeline as if it were emitted again.
|
|
326
443
|
|
|
327
444
|
```ts
|
|
328
|
-
|
|
329
|
-
await engine.replay('trg_abc123'
|
|
445
|
+
// Full replay — creates new runs and executes all steps
|
|
446
|
+
await engine.replay('trg_abc123')
|
|
447
|
+
|
|
448
|
+
// Dry run — creates runs but skips all actions (logs only)
|
|
449
|
+
await engine.replay('trg_abc123', { dryRun: true })
|
|
330
450
|
```
|
|
331
451
|
|
|
332
|
-
|
|
452
|
+
**Replay safety:** Each action declares whether it is safe to re-execute via the `replaySafe` option:
|
|
333
453
|
|
|
334
454
|
```ts
|
|
455
|
+
// Safe to replay — idempotent or read-only
|
|
335
456
|
engine.registerAction('create_incident', handler, { replaySafe: true })
|
|
457
|
+
|
|
458
|
+
// Not safe to replay — side effects like emails, charges
|
|
336
459
|
engine.registerAction('send_email', handler, { replaySafe: false })
|
|
337
460
|
```
|
|
338
461
|
|
|
339
|
-
|
|
462
|
+
During replay, actions with `replaySafe: false` are **skipped** (status: `skipped`). This prevents duplicate emails, charges, or other non-idempotent side effects.
|
|
463
|
+
|
|
464
|
+
Runs created by replay are marked with `isReplay: true` and handlers can check `ctx.isReplay` to adjust behavior.
|
|
465
|
+
|
|
466
|
+
---
|
|
467
|
+
|
|
468
|
+
### Run Timeline
|
|
340
469
|
|
|
341
|
-
|
|
470
|
+
The run timeline provides a chronological view of everything that happened during a workflow run — useful for debugging dashboards and audit trails.
|
|
342
471
|
|
|
343
472
|
```ts
|
|
344
|
-
const
|
|
473
|
+
const timeline = await engine.getRunTimeline('run_abc123')
|
|
474
|
+
|
|
475
|
+
for (const entry of timeline) {
|
|
476
|
+
console.log(`[${entry.timestamp}] ${entry.type}`, {
|
|
477
|
+
stepName: entry.stepName,
|
|
478
|
+
detail: entry.detail,
|
|
479
|
+
})
|
|
480
|
+
}
|
|
345
481
|
```
|
|
346
482
|
|
|
347
|
-
|
|
483
|
+
**Entry types:**
|
|
484
|
+
- `run_created` — run was created
|
|
485
|
+
- `step_scheduled` — step was queued for execution
|
|
486
|
+
- `step_started` — step began executing
|
|
487
|
+
- `step_completed` — step finished successfully
|
|
488
|
+
- `step_failed` — step failed with an error
|
|
489
|
+
- `step_skipped` — step was skipped (condition false, replay, etc.)
|
|
490
|
+
- `run_completed` — run finished successfully
|
|
491
|
+
- `run_failed` — run ended due to a step failure
|
|
492
|
+
- `run_canceled` — run was canceled (includes cancel reason in `detail`)
|
|
493
|
+
- `log` — execution log entry from `ctx.log()` (includes level, message, data in `detail`)
|
|
494
|
+
|
|
495
|
+
The timeline is assembled from a single SQL query across runs, steps, and execution logs tables, ordered chronologically.
|
|
496
|
+
|
|
497
|
+
---
|
|
498
|
+
|
|
499
|
+
### Cancel & Retry
|
|
500
|
+
|
|
501
|
+
#### Canceling a Running Workflow
|
|
348
502
|
|
|
349
|
-
|
|
503
|
+
Cancel an in-progress run to stop further step execution:
|
|
350
504
|
|
|
351
505
|
```ts
|
|
352
|
-
const
|
|
506
|
+
const canceledRun = await engine.cancelRun('run_abc123', 'No longer needed')
|
|
353
507
|
```
|
|
354
508
|
|
|
355
|
-
|
|
509
|
+
**Behavior:**
|
|
510
|
+
- Sets the run status to `canceled` with a timestamp and optional reason
|
|
511
|
+
- Marks all pending/scheduled step runs as `skipped`
|
|
512
|
+
- Removes pending BullMQ jobs for that run
|
|
513
|
+
- The executor checks for canceled status before executing each step
|
|
514
|
+
- Fires the `onRunComplete` hook with `status: 'canceled'`
|
|
515
|
+
- Cannot cancel runs that are already `completed`, `failed`, or `canceled`
|
|
356
516
|
|
|
357
|
-
|
|
517
|
+
#### Retrying Failed Runs
|
|
518
|
+
|
|
519
|
+
Resume a failed run from the step that failed:
|
|
358
520
|
|
|
359
521
|
```ts
|
|
360
|
-
const
|
|
361
|
-
const tenantFailed = await engine.getFailedRuns('workspace_1')
|
|
522
|
+
const retriedRun = await engine.retryRun('run_abc123')
|
|
362
523
|
```
|
|
363
524
|
|
|
364
|
-
|
|
525
|
+
**Behavior:**
|
|
526
|
+
- Finds the first failed step in the run
|
|
527
|
+
- Resets that step to `pending` status (clears error, timestamps)
|
|
528
|
+
- Sets the run back to `running` status
|
|
529
|
+
- Re-enqueues the step in BullMQ with its original retry policy
|
|
530
|
+
- Cannot retry runs that aren't in `failed` status
|
|
531
|
+
|
|
532
|
+
#### Listing Failed Runs
|
|
365
533
|
|
|
366
|
-
|
|
534
|
+
Query failed runs for monitoring dashboards:
|
|
367
535
|
|
|
368
536
|
```ts
|
|
369
|
-
|
|
537
|
+
// All failed runs
|
|
538
|
+
const allFailed = await engine.getFailedRuns()
|
|
370
539
|
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
}
|
|
540
|
+
// Failed runs for a specific tenant
|
|
541
|
+
const tenantFailed = await engine.getFailedRuns('workspace_1')
|
|
374
542
|
```
|
|
375
543
|
|
|
376
|
-
Entry types: `run_created`, `step_scheduled`, `step_started`, `step_completed`, `step_failed`, `step_skipped`, `run_completed`, `run_failed`, `run_canceled`, `log`
|
|
377
|
-
|
|
378
544
|
---
|
|
379
545
|
|
|
380
546
|
## API Reference
|
|
381
547
|
|
|
382
548
|
### `createEngine(config)`
|
|
383
549
|
|
|
550
|
+
Creates and returns an engine instance.
|
|
551
|
+
|
|
384
552
|
```ts
|
|
385
553
|
import { createEngine } from '@kylebegeman/pulse'
|
|
386
554
|
|
|
387
555
|
const engine = createEngine({
|
|
388
|
-
db: pool,
|
|
389
|
-
redis: redisClient,
|
|
390
|
-
tablePrefix: 'pulse_',
|
|
391
|
-
queuePrefix: 'pulse',
|
|
392
|
-
concurrency: 5,
|
|
393
|
-
onStepComplete: (e) => {},
|
|
394
|
-
onRunComplete: (e) => {},
|
|
556
|
+
db: pool, // Required — pg Pool instance
|
|
557
|
+
redis: redisClient, // Required — ioredis instance
|
|
558
|
+
tablePrefix: 'pulse_', // Optional — table name prefix (default: 'pulse_')
|
|
559
|
+
queuePrefix: 'pulse', // Optional — BullMQ queue prefix (default: 'pulse')
|
|
560
|
+
concurrency: 5, // Optional — step worker concurrency (default: 5)
|
|
561
|
+
onStepComplete: (e) => {}, // Optional — lifecycle hook
|
|
562
|
+
onRunComplete: (e) => {}, // Optional — lifecycle hook
|
|
395
563
|
})
|
|
396
564
|
```
|
|
397
565
|
|
|
566
|
+
---
|
|
567
|
+
|
|
398
568
|
### Engine Methods
|
|
399
569
|
|
|
400
570
|
#### Registration
|
|
401
571
|
|
|
402
572
|
| Method | Description |
|
|
403
|
-
|
|
573
|
+
|---|---|
|
|
404
574
|
| `registerTrigger(type, registration)` | Register a signal type with source, optional resource type, and optional Zod payload schema |
|
|
405
|
-
| `registerAction(name, handler, options?)` | Register an action handler. Set `replaySafe` in options |
|
|
406
|
-
| `registerCondition(name, handler)` | Register a condition handler
|
|
575
|
+
| `registerAction(name, handler, options?)` | Register an action handler. Set `replaySafe: true/false` in options |
|
|
576
|
+
| `registerCondition(name, handler)` | Register a condition handler that returns a boolean |
|
|
407
577
|
|
|
408
578
|
#### Signals
|
|
409
579
|
|
|
410
580
|
| Method | Returns | Description |
|
|
411
|
-
|
|
412
|
-
| `emit(trigger)` | `TriggerEnvelope
|
|
581
|
+
|---|---|---|
|
|
582
|
+
| `emit(trigger)` | `Promise<TriggerEnvelope>` | Emit a signal. Persists the trigger, matches workflows, and creates runs |
|
|
413
583
|
|
|
414
584
|
#### Lifecycle
|
|
415
585
|
|
|
416
|
-
| Method | Description |
|
|
417
|
-
|
|
418
|
-
| `start()` | Start BullMQ workers and restore cron jobs |
|
|
419
|
-
| `stop()` | Graceful shutdown — waits for active jobs, stops cron |
|
|
586
|
+
| Method | Returns | Description |
|
|
587
|
+
|---|---|---|
|
|
588
|
+
| `start()` | `Promise<void>` | Start BullMQ workers and restore cron jobs. Call after registering handlers |
|
|
589
|
+
| `stop()` | `Promise<void>` | Graceful shutdown — waits for active jobs to complete, stops cron workers |
|
|
420
590
|
|
|
421
591
|
#### Queries
|
|
422
592
|
|
|
423
593
|
| Method | Returns | Description |
|
|
424
|
-
|
|
425
|
-
| `getRun(runId)` | `WorkflowRun \| null
|
|
426
|
-
| `getRunSteps(runId)` | `WorkflowStepRun[]
|
|
427
|
-
| `getRunsByTrigger(triggerId)` | `WorkflowRun[]
|
|
428
|
-
| `getTrigger(triggerId)` | `TriggerEnvelope \| null
|
|
594
|
+
|---|---|---|
|
|
595
|
+
| `getRun(runId)` | `Promise<WorkflowRun \| null>` | Get a workflow run by ID |
|
|
596
|
+
| `getRunSteps(runId)` | `Promise<WorkflowStepRun[]>` | Get all step runs for a workflow run (includes branch steps) |
|
|
597
|
+
| `getRunsByTrigger(triggerId)` | `Promise<WorkflowRun[]>` | Get all runs spawned by a trigger |
|
|
598
|
+
| `getTrigger(triggerId)` | `Promise<TriggerEnvelope \| null>` | Get a persisted trigger by ID |
|
|
429
599
|
|
|
430
600
|
#### Timeline
|
|
431
601
|
|
|
432
602
|
| Method | Returns | Description |
|
|
433
|
-
|
|
434
|
-
| `getRunTimeline(runId)` | `RunTimelineEntry[]
|
|
603
|
+
|---|---|---|
|
|
604
|
+
| `getRunTimeline(runId)` | `Promise<RunTimelineEntry[]>` | Chronological lifecycle view of a run — steps, logs, status changes |
|
|
435
605
|
|
|
436
606
|
#### Cancel & Retry
|
|
437
607
|
|
|
438
608
|
| Method | Returns | Description |
|
|
439
|
-
|
|
440
|
-
| `cancelRun(runId, reason?)` | `WorkflowRun
|
|
441
|
-
| `retryRun(runId)` | `WorkflowRun
|
|
442
|
-
| `getFailedRuns(tenantId?)` | `WorkflowRun[]
|
|
609
|
+
|---|---|---|
|
|
610
|
+
| `cancelRun(runId, reason?)` | `Promise<WorkflowRun>` | Cancel an in-progress run. Removes pending jobs |
|
|
611
|
+
| `retryRun(runId)` | `Promise<WorkflowRun>` | Retry a failed run from the failed step |
|
|
612
|
+
| `getFailedRuns(tenantId?)` | `Promise<WorkflowRun[]>` | List failed runs, optionally filtered by tenant |
|
|
443
613
|
|
|
444
614
|
#### Replay
|
|
445
615
|
|
|
446
616
|
| Method | Returns | Description |
|
|
447
|
-
|
|
448
|
-
| `replay(triggerId, options?)` | `WorkflowRun[]
|
|
617
|
+
|---|---|---|
|
|
618
|
+
| `replay(triggerId, options?)` | `Promise<WorkflowRun[]>` | Re-emit a historical trigger. Pass `{ dryRun: true }` to skip actions |
|
|
449
619
|
|
|
450
620
|
#### Workflow Management
|
|
451
621
|
|
|
452
622
|
| Method | Returns | Description |
|
|
453
|
-
|
|
454
|
-
| `createWorkflow(definition)` | `WorkflowDefinition
|
|
455
|
-
| `enableWorkflow(definitionId)` | `void
|
|
456
|
-
| `disableWorkflow(definitionId)` | `void
|
|
623
|
+
|---|---|---|
|
|
624
|
+
| `createWorkflow(definition)` | `Promise<WorkflowDefinition>` | Create a new workflow definition. Schedules cron if applicable |
|
|
625
|
+
| `enableWorkflow(definitionId)` | `Promise<void>` | Enable a workflow definition. Starts cron if defined |
|
|
626
|
+
| `disableWorkflow(definitionId)` | `Promise<void>` | Disable a workflow definition. Stops cron if defined |
|
|
457
627
|
|
|
458
628
|
#### Schema
|
|
459
629
|
|
|
460
|
-
| Method | Description |
|
|
461
|
-
|
|
462
|
-
| `migrate()` | Create `pulse_*` tables (idempotent) |
|
|
630
|
+
| Method | Returns | Description |
|
|
631
|
+
|---|---|---|
|
|
632
|
+
| `migrate()` | `Promise<void>` | Create `pulse_*` tables in your database (idempotent) |
|
|
463
633
|
|
|
464
634
|
---
|
|
465
635
|
|
|
466
|
-
##
|
|
467
|
-
|
|
468
|
-
### Monitoring: Incident on Missed Heartbeat
|
|
636
|
+
## Type Reference
|
|
469
637
|
|
|
470
|
-
|
|
638
|
+
<details>
|
|
639
|
+
<summary><code>TriggerInput</code> — Signal data you pass to <code>emit()</code></summary>
|
|
471
640
|
|
|
472
641
|
```ts
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
}
|
|
642
|
+
interface TriggerInput {
|
|
643
|
+
tenantId: string
|
|
644
|
+
source: string
|
|
645
|
+
type: string
|
|
646
|
+
resourceType?: string
|
|
647
|
+
resourceId?: string
|
|
648
|
+
environment?: string
|
|
649
|
+
payload?: Record<string, unknown>
|
|
650
|
+
}
|
|
651
|
+
```
|
|
482
652
|
|
|
483
|
-
|
|
484
|
-
const incident = await db.incidents.create({
|
|
485
|
-
tenantId: ctx.tenantId,
|
|
486
|
-
serviceId: ctx.trigger.resourceId,
|
|
487
|
-
severity: ctx.config.severity || 'medium',
|
|
488
|
-
})
|
|
489
|
-
ctx.log('Incident created', { incidentId: incident.id })
|
|
490
|
-
return { success: true, data: { incidentId: incident.id } }
|
|
491
|
-
}, { replaySafe: true })
|
|
653
|
+
</details>
|
|
492
654
|
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
return { success: true }
|
|
496
|
-
}, { replaySafe: false })
|
|
655
|
+
<details>
|
|
656
|
+
<summary><code>TriggerEnvelope</code> — Persisted signal with ID and timestamp</summary>
|
|
497
657
|
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
steps: [
|
|
504
|
-
{ type: 'delay', name: 'wait_5m', delayMs: 5 * 60 * 1000 },
|
|
505
|
-
{ type: 'condition', name: 'service_still_down' },
|
|
506
|
-
{ type: 'action', name: 'create_incident' },
|
|
507
|
-
{ type: 'action', name: 'notify_oncall' },
|
|
508
|
-
],
|
|
509
|
-
config: { severity: 'high' },
|
|
510
|
-
isEnabled: true,
|
|
511
|
-
})
|
|
658
|
+
```ts
|
|
659
|
+
interface TriggerEnvelope extends TriggerInput {
|
|
660
|
+
id: string // Auto-generated (e.g., 'trg_...')
|
|
661
|
+
createdAt: Date
|
|
662
|
+
}
|
|
512
663
|
```
|
|
513
664
|
|
|
514
|
-
|
|
665
|
+
</details>
|
|
515
666
|
|
|
516
|
-
|
|
667
|
+
<details>
|
|
668
|
+
<summary><code>TriggerRegistration</code> — Trigger type declaration</summary>
|
|
517
669
|
|
|
518
670
|
```ts
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
return { success: true }
|
|
526
|
-
}, { replaySafe: false })
|
|
671
|
+
interface TriggerRegistration {
|
|
672
|
+
source: string
|
|
673
|
+
resourceType?: string
|
|
674
|
+
payloadSchema?: ZodSchema // Optional Zod schema for payload validation
|
|
675
|
+
}
|
|
676
|
+
```
|
|
527
677
|
|
|
528
|
-
|
|
529
|
-
const sub = await billing.getSubscription(ctx.tenantId)
|
|
530
|
-
return sub.plan === 'trial'
|
|
531
|
-
})
|
|
678
|
+
</details>
|
|
532
679
|
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
ctx.log('Downgraded to free plan')
|
|
536
|
-
return { success: true }
|
|
537
|
-
}, { replaySafe: true })
|
|
680
|
+
<details>
|
|
681
|
+
<summary><code>WorkflowDefinition</code> — Stored workflow template</summary>
|
|
538
682
|
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
]
|
|
549
|
-
config:
|
|
550
|
-
isEnabled:
|
|
551
|
-
|
|
683
|
+
```ts
|
|
684
|
+
interface WorkflowDefinition {
|
|
685
|
+
id: string
|
|
686
|
+
tenantId: string
|
|
687
|
+
name: string
|
|
688
|
+
description?: string
|
|
689
|
+
triggerType: string
|
|
690
|
+
environmentFilter?: string
|
|
691
|
+
resourceTypeFilter?: string
|
|
692
|
+
steps: WorkflowStep[]
|
|
693
|
+
config: Record<string, unknown>
|
|
694
|
+
isEnabled: boolean
|
|
695
|
+
cronExpression?: string // Cron schedule (e.g., '0 2 * * *')
|
|
696
|
+
createdAt: Date
|
|
697
|
+
updatedAt: Date
|
|
698
|
+
}
|
|
552
699
|
```
|
|
553
700
|
|
|
554
|
-
|
|
701
|
+
</details>
|
|
555
702
|
|
|
556
|
-
|
|
703
|
+
<details>
|
|
704
|
+
<summary><code>WorkflowStep</code> — Step definition within a workflow</summary>
|
|
557
705
|
|
|
558
706
|
```ts
|
|
559
|
-
|
|
560
|
-
const result = await autoRemediate(ctx.trigger.resourceId)
|
|
561
|
-
|
|
562
|
-
if (result.fixed) {
|
|
563
|
-
await ctx.emit({
|
|
564
|
-
tenantId: ctx.tenantId,
|
|
565
|
-
source: 'auto-remediation',
|
|
566
|
-
type: 'service.recovered',
|
|
567
|
-
resourceType: 'service',
|
|
568
|
-
resourceId: ctx.trigger.resourceId,
|
|
569
|
-
payload: { fix: result.action },
|
|
570
|
-
})
|
|
571
|
-
}
|
|
572
|
-
|
|
573
|
-
return { success: true, data: result }
|
|
574
|
-
}, { replaySafe: true })
|
|
707
|
+
type StepType = 'action' | 'condition' | 'delay' | 'parallel'
|
|
575
708
|
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
709
|
+
interface WorkflowStep {
|
|
710
|
+
type: StepType
|
|
711
|
+
name: string
|
|
712
|
+
config?: Record<string, unknown>
|
|
713
|
+
delayMs?: number // Only for 'delay' steps
|
|
714
|
+
timeoutMs?: number // Step execution timeout
|
|
715
|
+
retryPolicy?: RetryPolicy // Retry on failure
|
|
716
|
+
onFalse?: 'complete' | 'skip' | number // Condition false behavior
|
|
717
|
+
branches?: WorkflowStep[][] // Only for 'parallel' steps
|
|
718
|
+
}
|
|
585
719
|
```
|
|
586
720
|
|
|
587
|
-
|
|
721
|
+
</details>
|
|
588
722
|
|
|
589
|
-
|
|
723
|
+
<details>
|
|
724
|
+
<summary><code>WorkflowRun</code> — Execution record for a workflow</summary>
|
|
590
725
|
|
|
591
726
|
```ts
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
},
|
|
613
|
-
isEnabled: true,
|
|
614
|
-
})
|
|
727
|
+
type WorkflowStatus = 'pending' | 'running' | 'waiting' | 'completed' | 'failed' | 'canceled'
|
|
728
|
+
|
|
729
|
+
interface WorkflowRun {
|
|
730
|
+
id: string
|
|
731
|
+
definitionId: string
|
|
732
|
+
tenantId: string
|
|
733
|
+
triggerId: string
|
|
734
|
+
status: WorkflowStatus
|
|
735
|
+
context: Record<string, unknown>
|
|
736
|
+
currentStepIndex: number
|
|
737
|
+
isReplay: boolean
|
|
738
|
+
definitionSnapshot: DefinitionSnapshot
|
|
739
|
+
startedAt?: Date
|
|
740
|
+
completedAt?: Date
|
|
741
|
+
failedAt?: Date
|
|
742
|
+
canceledAt?: Date // When the run was canceled
|
|
743
|
+
cancelReason?: string // Why the run was canceled
|
|
744
|
+
createdAt: Date
|
|
745
|
+
updatedAt: Date
|
|
746
|
+
}
|
|
615
747
|
```
|
|
616
748
|
|
|
617
|
-
|
|
749
|
+
</details>
|
|
750
|
+
|
|
751
|
+
<details>
|
|
752
|
+
<summary><code>WorkflowStepRun</code> — Execution record for a single step</summary>
|
|
618
753
|
|
|
619
754
|
```ts
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
755
|
+
type StepStatus = 'pending' | 'scheduled' | 'running' | 'completed' | 'failed' | 'skipped'
|
|
756
|
+
|
|
757
|
+
interface WorkflowStepRun {
|
|
758
|
+
id: string
|
|
759
|
+
runId: string
|
|
760
|
+
tenantId: string
|
|
761
|
+
stepIndex: number
|
|
762
|
+
stepType: StepType
|
|
763
|
+
stepName: string
|
|
764
|
+
status: StepStatus
|
|
765
|
+
scheduledFor?: Date
|
|
766
|
+
startedAt?: Date
|
|
767
|
+
completedAt?: Date
|
|
768
|
+
result?: Record<string, unknown>
|
|
769
|
+
errorMessage?: string
|
|
770
|
+
branchIndex?: number // Branch index for parallel step branches
|
|
771
|
+
parentStepRunId?: string // Parent step run ID for branch steps
|
|
772
|
+
createdAt: Date
|
|
773
|
+
}
|
|
632
774
|
```
|
|
633
775
|
|
|
634
|
-
|
|
776
|
+
</details>
|
|
777
|
+
|
|
778
|
+
<details>
|
|
779
|
+
<summary><code>RunTimelineEntry</code> — Chronological run event</summary>
|
|
635
780
|
|
|
636
781
|
```ts
|
|
637
|
-
|
|
782
|
+
interface RunTimelineEntry {
|
|
783
|
+
timestamp: Date
|
|
784
|
+
type:
|
|
785
|
+
| 'run_created' | 'run_completed' | 'run_failed' | 'run_canceled'
|
|
786
|
+
| 'step_scheduled' | 'step_started' | 'step_completed'
|
|
787
|
+
| 'step_failed' | 'step_skipped' | 'log'
|
|
788
|
+
stepIndex?: number
|
|
789
|
+
stepName?: string
|
|
790
|
+
stepType?: string
|
|
791
|
+
detail?: Record<string, unknown>
|
|
792
|
+
}
|
|
793
|
+
```
|
|
638
794
|
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
795
|
+
</details>
|
|
796
|
+
|
|
797
|
+
<details>
|
|
798
|
+
<summary><code>DefinitionSnapshot</code> — Frozen copy of workflow at run creation</summary>
|
|
799
|
+
|
|
800
|
+
```ts
|
|
801
|
+
interface DefinitionSnapshot {
|
|
802
|
+
steps: WorkflowStep[]
|
|
803
|
+
config: Record<string, unknown>
|
|
804
|
+
triggerType: string
|
|
642
805
|
}
|
|
806
|
+
```
|
|
643
807
|
|
|
644
|
-
|
|
808
|
+
</details>
|
|
809
|
+
|
|
810
|
+
<details>
|
|
811
|
+
<summary><code>RetryPolicy</code> — Retry configuration for action steps</summary>
|
|
812
|
+
|
|
813
|
+
```ts
|
|
814
|
+
interface RetryPolicy {
|
|
815
|
+
maxAttempts: number // 1-10
|
|
816
|
+
backoffMs: number // 100-300000 ms
|
|
817
|
+
}
|
|
645
818
|
```
|
|
646
819
|
|
|
647
|
-
|
|
820
|
+
</details>
|
|
648
821
|
|
|
649
|
-
|
|
822
|
+
<details>
|
|
823
|
+
<summary><code>WorkflowContext</code> — Passed to action and condition handlers</summary>
|
|
650
824
|
|
|
651
|
-
|
|
825
|
+
```ts
|
|
826
|
+
interface WorkflowContext {
|
|
827
|
+
tenantId: string
|
|
828
|
+
trigger: TriggerEnvelope
|
|
829
|
+
run: WorkflowRun
|
|
830
|
+
step: WorkflowStepRun
|
|
831
|
+
config: Record<string, unknown>
|
|
832
|
+
isReplay: boolean
|
|
833
|
+
emit: (trigger: TriggerInput) => Promise<TriggerEnvelope>
|
|
834
|
+
log: (message: string, data?: Record<string, unknown>) => void
|
|
835
|
+
}
|
|
836
|
+
```
|
|
837
|
+
|
|
838
|
+
</details>
|
|
652
839
|
|
|
653
|
-
|
|
840
|
+
<details>
|
|
841
|
+
<summary><code>ActionResult</code> — Return value from action handlers</summary>
|
|
654
842
|
|
|
655
843
|
```ts
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
844
|
+
interface ActionResult {
|
|
845
|
+
success: boolean
|
|
846
|
+
data?: Record<string, unknown>
|
|
847
|
+
error?: string
|
|
848
|
+
}
|
|
660
849
|
```
|
|
661
850
|
|
|
662
|
-
|
|
851
|
+
</details>
|
|
663
852
|
|
|
664
|
-
|
|
853
|
+
<details>
|
|
854
|
+
<summary><code>ActionOptions</code> — Options for action registration</summary>
|
|
665
855
|
|
|
666
|
-
```
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
856
|
+
```ts
|
|
857
|
+
interface ActionOptions {
|
|
858
|
+
replaySafe: boolean // Whether this action can be re-executed during replay
|
|
859
|
+
}
|
|
860
|
+
```
|
|
861
|
+
|
|
862
|
+
</details>
|
|
863
|
+
|
|
864
|
+
<details>
|
|
865
|
+
<summary><code>EngineConfig</code> — Configuration for <code>createEngine()</code></summary>
|
|
866
|
+
|
|
867
|
+
```ts
|
|
868
|
+
interface EngineConfig {
|
|
869
|
+
db: Pool // pg Pool instance
|
|
870
|
+
redis: unknown // ioredis instance
|
|
871
|
+
tablePrefix?: string // Default: 'pulse_'
|
|
872
|
+
queuePrefix?: string // Default: 'pulse'
|
|
873
|
+
concurrency?: number // Step worker concurrency (default: 5)
|
|
874
|
+
onStepComplete?: LifecycleHook<StepCompleteEvent>
|
|
875
|
+
onRunComplete?: LifecycleHook<RunCompleteEvent>
|
|
876
|
+
}
|
|
670
877
|
```
|
|
671
878
|
|
|
672
|
-
|
|
879
|
+
</details>
|
|
673
880
|
|
|
674
|
-
|
|
881
|
+
<details>
|
|
882
|
+
<summary><code>StepCompleteEvent</code> / <code>RunCompleteEvent</code> — Lifecycle hook payloads</summary>
|
|
675
883
|
|
|
676
|
-
|
|
884
|
+
```ts
|
|
885
|
+
interface StepCompleteEvent {
|
|
886
|
+
run: WorkflowRun
|
|
887
|
+
step: WorkflowStepRun
|
|
888
|
+
}
|
|
677
889
|
|
|
890
|
+
interface RunCompleteEvent {
|
|
891
|
+
run: WorkflowRun
|
|
892
|
+
status: 'completed' | 'failed' | 'canceled'
|
|
893
|
+
}
|
|
678
894
|
```
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
895
|
+
|
|
896
|
+
</details>
|
|
897
|
+
|
|
898
|
+
<details>
|
|
899
|
+
<summary><code>ReplayOptions</code> — Options for <code>replay()</code></summary>
|
|
900
|
+
|
|
901
|
+
```ts
|
|
902
|
+
interface ReplayOptions {
|
|
903
|
+
dryRun?: boolean // If true, skip all actions (log only)
|
|
904
|
+
}
|
|
683
905
|
```
|
|
684
906
|
|
|
685
|
-
|
|
907
|
+
</details>
|
|
686
908
|
|
|
687
|
-
|
|
909
|
+
<details>
|
|
910
|
+
<summary><code>ExecutionLog</code> — Structured log entry</summary>
|
|
688
911
|
|
|
689
912
|
```ts
|
|
690
|
-
|
|
691
|
-
|
|
913
|
+
interface ExecutionLog {
|
|
914
|
+
id: string
|
|
915
|
+
runId: string
|
|
916
|
+
stepRunId?: string
|
|
917
|
+
tenantId: string
|
|
918
|
+
level: 'info' | 'warn' | 'error'
|
|
919
|
+
message: string
|
|
920
|
+
data?: Record<string, unknown>
|
|
921
|
+
createdAt: Date
|
|
922
|
+
}
|
|
692
923
|
```
|
|
693
924
|
|
|
694
|
-
|
|
925
|
+
</details>
|
|
695
926
|
|
|
696
927
|
---
|
|
697
928
|
|
|
698
929
|
## Database Schema
|
|
699
930
|
|
|
700
|
-
The engine creates tables with the prefix `pulse_` (configurable). All tables use `TEXT` primary keys with auto-generated IDs. Migrations are idempotent — safe to call on every startup.
|
|
931
|
+
The engine creates tables in your PostgreSQL database with the prefix `pulse_` (configurable). All tables use `TEXT` primary keys with auto-generated IDs. Migrations are idempotent — safe to call on every startup.
|
|
701
932
|
|
|
702
933
|
```ts
|
|
703
934
|
await engine.migrate()
|
|
@@ -705,9 +936,11 @@ await engine.migrate()
|
|
|
705
936
|
|
|
706
937
|
### `pulse_triggers`
|
|
707
938
|
|
|
939
|
+
Persisted signal records for auditing and replay.
|
|
940
|
+
|
|
708
941
|
| Column | Type | Description |
|
|
709
|
-
|
|
710
|
-
| `id` | `TEXT PK` | Trigger ID (`trg_...`) |
|
|
942
|
+
|---|---|---|
|
|
943
|
+
| `id` | `TEXT PK` | Trigger ID (e.g., `trg_...`) |
|
|
711
944
|
| `tenant_id` | `TEXT` | Tenant scope |
|
|
712
945
|
| `source` | `TEXT` | Originating system |
|
|
713
946
|
| `type` | `TEXT` | Signal type |
|
|
@@ -715,12 +948,16 @@ await engine.migrate()
|
|
|
715
948
|
| `resource_id` | `TEXT` | Specific resource |
|
|
716
949
|
| `environment` | `TEXT` | Environment scope |
|
|
717
950
|
| `payload` | `JSONB` | Arbitrary event data |
|
|
718
|
-
| `created_at` | `TIMESTAMPTZ` | When emitted |
|
|
951
|
+
| `created_at` | `TIMESTAMPTZ` | When the signal was emitted |
|
|
952
|
+
|
|
953
|
+
**Indexes:** `(tenant_id, type)`, `(created_at)`
|
|
719
954
|
|
|
720
955
|
### `pulse_workflow_definitions`
|
|
721
956
|
|
|
957
|
+
Workflow templates that define step sequences and matching rules.
|
|
958
|
+
|
|
722
959
|
| Column | Type | Description |
|
|
723
|
-
|
|
960
|
+
|---|---|---|
|
|
724
961
|
| `id` | `TEXT PK` | Definition ID |
|
|
725
962
|
| `tenant_id` | `TEXT` | Tenant scope |
|
|
726
963
|
| `name` | `TEXT` | Human-readable name |
|
|
@@ -730,15 +967,19 @@ await engine.migrate()
|
|
|
730
967
|
| `resource_type_filter` | `TEXT` | Optional resource type filter |
|
|
731
968
|
| `steps` | `JSONB` | Ordered step definitions |
|
|
732
969
|
| `config` | `JSONB` | Config passed to handlers |
|
|
733
|
-
| `is_enabled` | `BOOLEAN` | Whether active |
|
|
734
|
-
| `cron_expression` | `TEXT` | Optional cron schedule |
|
|
970
|
+
| `is_enabled` | `BOOLEAN` | Whether the workflow is active |
|
|
971
|
+
| `cron_expression` | `TEXT` | Optional cron schedule for automatic trigger emission |
|
|
735
972
|
| `created_at` | `TIMESTAMPTZ` | Creation time |
|
|
736
|
-
| `updated_at` | `TIMESTAMPTZ` | Last update |
|
|
973
|
+
| `updated_at` | `TIMESTAMPTZ` | Last update time |
|
|
974
|
+
|
|
975
|
+
**Indexes:** `(tenant_id, trigger_type, is_enabled)`
|
|
737
976
|
|
|
738
977
|
### `pulse_workflow_runs`
|
|
739
978
|
|
|
979
|
+
Execution records — one per workflow triggered by a signal.
|
|
980
|
+
|
|
740
981
|
| Column | Type | Description |
|
|
741
|
-
|
|
982
|
+
|---|---|---|
|
|
742
983
|
| `id` | `TEXT PK` | Run ID |
|
|
743
984
|
| `definition_id` | `TEXT FK` | Workflow definition |
|
|
744
985
|
| `tenant_id` | `TEXT` | Tenant scope |
|
|
@@ -746,330 +987,524 @@ await engine.migrate()
|
|
|
746
987
|
| `status` | `TEXT` | `pending` / `running` / `waiting` / `completed` / `failed` / `canceled` |
|
|
747
988
|
| `context` | `JSONB` | Shared run context |
|
|
748
989
|
| `current_step_index` | `INTEGER` | Active step position |
|
|
749
|
-
| `is_replay` | `BOOLEAN` | Whether this is a replay |
|
|
750
|
-
| `definition_snapshot` | `JSONB` | Frozen copy of workflow at run creation |
|
|
990
|
+
| `is_replay` | `BOOLEAN` | Whether this is a replay run |
|
|
991
|
+
| `definition_snapshot` | `JSONB` | Frozen copy of workflow definition at run creation |
|
|
751
992
|
| `started_at` | `TIMESTAMPTZ` | When execution began |
|
|
752
|
-
| `completed_at` | `TIMESTAMPTZ` | When completed |
|
|
753
|
-
| `failed_at` | `TIMESTAMPTZ` | When failed |
|
|
754
|
-
| `canceled_at` | `TIMESTAMPTZ` | When canceled |
|
|
755
|
-
| `cancel_reason` | `TEXT` |
|
|
993
|
+
| `completed_at` | `TIMESTAMPTZ` | When execution completed |
|
|
994
|
+
| `failed_at` | `TIMESTAMPTZ` | When execution failed |
|
|
995
|
+
| `canceled_at` | `TIMESTAMPTZ` | When the run was canceled |
|
|
996
|
+
| `cancel_reason` | `TEXT` | Why the run was canceled |
|
|
756
997
|
| `created_at` | `TIMESTAMPTZ` | Creation time |
|
|
757
|
-
| `updated_at` | `TIMESTAMPTZ` | Last update |
|
|
998
|
+
| `updated_at` | `TIMESTAMPTZ` | Last update time |
|
|
999
|
+
|
|
1000
|
+
**Indexes:** `(tenant_id, status)`, `(trigger_id)`
|
|
758
1001
|
|
|
759
1002
|
### `pulse_workflow_step_runs`
|
|
760
1003
|
|
|
1004
|
+
Individual step execution records within a run.
|
|
1005
|
+
|
|
761
1006
|
| Column | Type | Description |
|
|
762
|
-
|
|
1007
|
+
|---|---|---|
|
|
763
1008
|
| `id` | `TEXT PK` | Step run ID |
|
|
764
1009
|
| `run_id` | `TEXT FK` | Parent workflow run |
|
|
765
1010
|
| `tenant_id` | `TEXT` | Tenant scope |
|
|
766
|
-
| `step_index` | `INTEGER` | Position in sequence |
|
|
1011
|
+
| `step_index` | `INTEGER` | Position in step sequence |
|
|
767
1012
|
| `step_type` | `TEXT` | `action` / `condition` / `delay` / `parallel` |
|
|
768
1013
|
| `step_name` | `TEXT` | Registered handler name |
|
|
769
1014
|
| `status` | `TEXT` | `pending` / `scheduled` / `running` / `completed` / `failed` / `skipped` |
|
|
770
|
-
| `scheduled_for` | `TIMESTAMPTZ` | When
|
|
1015
|
+
| `scheduled_for` | `TIMESTAMPTZ` | When the step should execute (for delays) |
|
|
771
1016
|
| `started_at` | `TIMESTAMPTZ` | When execution began |
|
|
772
|
-
| `completed_at` | `TIMESTAMPTZ` | When completed |
|
|
1017
|
+
| `completed_at` | `TIMESTAMPTZ` | When execution completed |
|
|
773
1018
|
| `result` | `JSONB` | Action result data |
|
|
774
|
-
| `error_message` | `TEXT` | Error details |
|
|
775
|
-
| `branch_index` | `INTEGER` | Branch index (
|
|
776
|
-
| `parent_step_run_id` | `TEXT` | Parent parallel step |
|
|
1019
|
+
| `error_message` | `TEXT` | Error details on failure |
|
|
1020
|
+
| `branch_index` | `INTEGER` | Branch index within a parallel step (null for top-level) |
|
|
1021
|
+
| `parent_step_run_id` | `TEXT` | Parent parallel step run ID (null for top-level) |
|
|
777
1022
|
| `created_at` | `TIMESTAMPTZ` | Creation time |
|
|
778
1023
|
|
|
1024
|
+
**Indexes:** `(run_id, step_index)`, `(status, scheduled_for) WHERE status = 'scheduled'`
|
|
1025
|
+
|
|
779
1026
|
### `pulse_execution_logs`
|
|
780
1027
|
|
|
1028
|
+
Structured execution logs written by `ctx.log()` calls.
|
|
1029
|
+
|
|
781
1030
|
| Column | Type | Description |
|
|
782
|
-
|
|
1031
|
+
|---|---|---|
|
|
783
1032
|
| `id` | `TEXT PK` | Log entry ID |
|
|
784
1033
|
| `run_id` | `TEXT FK` | Parent workflow run |
|
|
785
|
-
| `step_run_id` | `TEXT FK` | Associated step |
|
|
1034
|
+
| `step_run_id` | `TEXT FK` | Associated step (optional) |
|
|
786
1035
|
| `tenant_id` | `TEXT` | Tenant scope |
|
|
787
1036
|
| `level` | `TEXT` | `info` / `warn` / `error` |
|
|
788
1037
|
| `message` | `TEXT` | Log message |
|
|
789
1038
|
| `data` | `JSONB` | Structured log data |
|
|
790
|
-
| `created_at` | `TIMESTAMPTZ` | When written |
|
|
1039
|
+
| `created_at` | `TIMESTAMPTZ` | When the log was written |
|
|
1040
|
+
|
|
1041
|
+
**Indexes:** `(run_id, created_at)`
|
|
791
1042
|
|
|
792
1043
|
---
|
|
793
1044
|
|
|
794
|
-
##
|
|
1045
|
+
## Runtime Guarantees
|
|
795
1046
|
|
|
796
|
-
|
|
797
|
-
<summary><code>TriggerInput</code></summary>
|
|
1047
|
+
### Execution Semantics
|
|
798
1048
|
|
|
799
|
-
|
|
800
|
-
interface TriggerInput {
|
|
801
|
-
tenantId: string
|
|
802
|
-
source: string
|
|
803
|
-
type: string
|
|
804
|
-
resourceType?: string
|
|
805
|
-
resourceId?: string
|
|
806
|
-
environment?: string
|
|
807
|
-
payload?: Record<string, unknown>
|
|
808
|
-
}
|
|
809
|
-
```
|
|
810
|
-
</details>
|
|
1049
|
+
Pulse provides **at-least-once** execution semantics. If a worker crashes after an action completes but before the engine records success, the step may execute again on retry. Design your actions to be **idempotent** where possible — especially for side effects like sending emails, charging customers, or creating external resources.
|
|
811
1050
|
|
|
812
|
-
|
|
813
|
-
<summary><code>TriggerEnvelope</code></summary>
|
|
1051
|
+
You can derive idempotency keys from the workflow context passed to every action:
|
|
814
1052
|
|
|
815
1053
|
```ts
|
|
816
|
-
|
|
817
|
-
id
|
|
818
|
-
|
|
819
|
-
}
|
|
1054
|
+
engine.registerAction('send_email', async (ctx) => {
|
|
1055
|
+
const idempotencyKey = `${ctx.run.id}:${ctx.step.name}`
|
|
1056
|
+
// Use this key with your email provider to prevent duplicates
|
|
1057
|
+
})
|
|
820
1058
|
```
|
|
821
|
-
</details>
|
|
822
|
-
|
|
823
|
-
<details>
|
|
824
|
-
<summary><code>TriggerRegistration</code></summary>
|
|
825
1059
|
|
|
826
|
-
|
|
827
|
-
interface TriggerRegistration {
|
|
828
|
-
source: string
|
|
829
|
-
resourceType?: string
|
|
830
|
-
payloadSchema?: ZodSchema
|
|
831
|
-
}
|
|
832
|
-
```
|
|
833
|
-
</details>
|
|
1060
|
+
### Step Claiming
|
|
834
1061
|
|
|
835
|
-
|
|
836
|
-
<summary><code>WorkflowDefinition</code></summary>
|
|
1062
|
+
Every step is claimed atomically before execution using a compare-and-set query:
|
|
837
1063
|
|
|
838
|
-
```
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
name: string
|
|
843
|
-
description?: string
|
|
844
|
-
triggerType: string
|
|
845
|
-
environmentFilter?: string
|
|
846
|
-
resourceTypeFilter?: string
|
|
847
|
-
steps: WorkflowStep[]
|
|
848
|
-
config: Record<string, unknown>
|
|
849
|
-
isEnabled: boolean
|
|
850
|
-
cronExpression?: string
|
|
851
|
-
createdAt: Date
|
|
852
|
-
updatedAt: Date
|
|
853
|
-
}
|
|
1064
|
+
```sql
|
|
1065
|
+
UPDATE step_runs SET status = 'running', started_at = NOW()
|
|
1066
|
+
WHERE id = $1 AND status IN ('pending', 'scheduled')
|
|
1067
|
+
RETURNING *
|
|
854
1068
|
```
|
|
855
|
-
</details>
|
|
856
1069
|
|
|
857
|
-
|
|
858
|
-
<summary><code>WorkflowStep</code></summary>
|
|
1070
|
+
If two workers pick up the same job from the queue (e.g., due to retry or crash recovery), only one will successfully claim the step. The other receives a null result and silently no-ops.
|
|
859
1071
|
|
|
860
|
-
|
|
861
|
-
|
|
1072
|
+
### Run State Transitions
|
|
1073
|
+
|
|
1074
|
+
Run status changes are guarded by the current status. Invalid transitions (e.g., completing an already-canceled run) are rejected:
|
|
862
1075
|
|
|
863
|
-
interface WorkflowStep {
|
|
864
|
-
type: StepType
|
|
865
|
-
name: string
|
|
866
|
-
config?: Record<string, unknown>
|
|
867
|
-
delayMs?: number
|
|
868
|
-
timeoutMs?: number
|
|
869
|
-
retryPolicy?: RetryPolicy
|
|
870
|
-
onFalse?: 'complete' | 'skip' | number
|
|
871
|
-
branches?: WorkflowStep[][]
|
|
872
|
-
}
|
|
873
1076
|
```
|
|
874
|
-
|
|
1077
|
+
pending → running, canceled
|
|
1078
|
+
running → waiting, completed, failed, canceled
|
|
1079
|
+
waiting → running, canceled
|
|
1080
|
+
failed → running (retry)
|
|
1081
|
+
```
|
|
875
1082
|
|
|
876
|
-
|
|
877
|
-
|
|
1083
|
+
Attempts to transition from an unexpected state throw an error rather than silently corrupting run state.
|
|
1084
|
+
|
|
1085
|
+
### Cancellation
|
|
1086
|
+
|
|
1087
|
+
When a run is canceled:
|
|
1088
|
+
|
|
1089
|
+
1. The run status is set to `canceled` with a timestamp and optional reason.
|
|
1090
|
+
2. All pending and scheduled step runs are marked as `skipped`.
|
|
1091
|
+
3. A step that is already running will complete its current execution — the engine checks run status before scheduling the next step.
|
|
1092
|
+
|
|
1093
|
+
### Retry
|
|
1094
|
+
|
|
1095
|
+
Retrying a failed run:
|
|
1096
|
+
|
|
1097
|
+
1. Finds the first failed step run and resets it to `pending`.
|
|
1098
|
+
2. Sets the run status back to `running`.
|
|
1099
|
+
3. The step is re-enqueued and executes with the original retry policy.
|
|
1100
|
+
4. Execution continues from the failed step — previously completed steps are not re-run.
|
|
1101
|
+
|
|
1102
|
+
### Replay
|
|
1103
|
+
|
|
1104
|
+
Replay re-emits a historical trigger to create a new run:
|
|
1105
|
+
|
|
1106
|
+
- A new run is created with `isReplay: true`.
|
|
1107
|
+
- Actions flagged with `replaySafe: false` are **skipped** during replay execution.
|
|
1108
|
+
- All other steps execute normally against the replayed trigger data.
|
|
1109
|
+
- Replay is useful for debugging, backfilling, and testing new workflows against historical signals.
|
|
1110
|
+
|
|
1111
|
+
### Parallel Execution
|
|
1112
|
+
|
|
1113
|
+
Parallel steps fan out into concurrent branches:
|
|
1114
|
+
|
|
1115
|
+
- Each branch executes independently with its own step runs.
|
|
1116
|
+
- **All branches must complete** for the parallel step to succeed.
|
|
1117
|
+
- If **any branch fails**, pending sibling branch steps are canceled and the run is marked as failed.
|
|
1118
|
+
- Context from each branch is merged — each step writes to `context[actionName]`, so branch steps should have unique action names.
|
|
1119
|
+
|
|
1120
|
+
### Context Accumulation
|
|
1121
|
+
|
|
1122
|
+
Each action's return value is stored in the run context keyed by action name:
|
|
878
1123
|
|
|
879
1124
|
```ts
|
|
880
|
-
|
|
1125
|
+
// After "check_status" action returns { healthy: true }
|
|
1126
|
+
// context.check_status === { healthy: true }
|
|
881
1127
|
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
triggerId: string
|
|
887
|
-
status: WorkflowStatus
|
|
888
|
-
context: Record<string, unknown>
|
|
889
|
-
currentStepIndex: number
|
|
890
|
-
isReplay: boolean
|
|
891
|
-
definitionSnapshot: DefinitionSnapshot
|
|
892
|
-
startedAt?: Date
|
|
893
|
-
completedAt?: Date
|
|
894
|
-
failedAt?: Date
|
|
895
|
-
canceledAt?: Date
|
|
896
|
-
cancelReason?: string
|
|
897
|
-
createdAt: Date
|
|
898
|
-
updatedAt: Date
|
|
899
|
-
}
|
|
1128
|
+
// Next action can access it:
|
|
1129
|
+
engine.registerAction('notify', async (ctx) => {
|
|
1130
|
+
if (ctx.context.check_status.healthy) { /* ... */ }
|
|
1131
|
+
})
|
|
900
1132
|
```
|
|
901
|
-
</details>
|
|
902
1133
|
|
|
903
|
-
|
|
904
|
-
<summary><code>WorkflowStepRun</code></summary>
|
|
1134
|
+
Step names within a workflow must be unique, preventing key collisions.
|
|
905
1135
|
|
|
906
|
-
|
|
907
|
-
|
|
1136
|
+
---
|
|
1137
|
+
|
|
1138
|
+
## Architecture
|
|
908
1139
|
|
|
909
|
-
interface WorkflowStepRun {
|
|
910
|
-
id: string
|
|
911
|
-
runId: string
|
|
912
|
-
tenantId: string
|
|
913
|
-
stepIndex: number
|
|
914
|
-
stepType: StepType
|
|
915
|
-
stepName: string
|
|
916
|
-
status: StepStatus
|
|
917
|
-
scheduledFor?: Date
|
|
918
|
-
startedAt?: Date
|
|
919
|
-
completedAt?: Date
|
|
920
|
-
result?: Record<string, unknown>
|
|
921
|
-
errorMessage?: string
|
|
922
|
-
branchIndex?: number
|
|
923
|
-
parentStepRunId?: string
|
|
924
|
-
createdAt: Date
|
|
925
|
-
}
|
|
926
1140
|
```
|
|
927
|
-
|
|
1141
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
1142
|
+
│ Your Application │
|
|
1143
|
+
│ │
|
|
1144
|
+
│ engine.emit({ type: 'heartbeat.missed', ... }) │
|
|
1145
|
+
│ │ │
|
|
1146
|
+
│ ▼ │
|
|
1147
|
+
│ ┌──────────┐ ┌───────────┐ ┌───────────────────────┐ │
|
|
1148
|
+
│ │ Registry │ │ Matcher │────▶│ Run Manager │ │
|
|
1149
|
+
│ │ │ │ │ │ (state machine) │ │
|
|
1150
|
+
│ │ triggers │ │ finds │ │ │ │
|
|
1151
|
+
│ │ actions │ │ matching │ │ pending → running │ │
|
|
1152
|
+
│ │ conditions│ │ workflows │ │ → waiting → completed │ │
|
|
1153
|
+
│ └──────────┘ └───────────┘ └───────┬───────────────┘ │
|
|
1154
|
+
│ │ │
|
|
1155
|
+
│ ▼ │
|
|
1156
|
+
│ ┌──────────────────────┐ ┌─────────────────────────────┐ │
|
|
1157
|
+
│ │ Step Scheduler │ │ Step Executor │ │
|
|
1158
|
+
│ │ │────▶│ │ │
|
|
1159
|
+
│ │ BullMQ delayed jobs │ │ delay → condition → action │ │
|
|
1160
|
+
│ │ + parallel branches │ │ + parallel branch dispatch │ │
|
|
1161
|
+
│ └──────────────────────┘ └─────────────────────────────┘ │
|
|
1162
|
+
│ │
|
|
1163
|
+
│ ┌────────────────────┐ ┌─────────────────────────────────┐ │
|
|
1164
|
+
│ │ Replay Manager │ │ Execution Logs │ │
|
|
1165
|
+
│ │ │ │ │ │
|
|
1166
|
+
│ │ re-emit historical │ │ structured logs per run/step │ │
|
|
1167
|
+
│ │ signals with safety│ │ ctx.log() → pulse_execution_logs│ │
|
|
1168
|
+
│ └────────────────────┘ └─────────────────────────────────┘ │
|
|
1169
|
+
│ │
|
|
1170
|
+
│ ┌────────────────────┐ ┌─────────────────────────────────┐ │
|
|
1171
|
+
│ │ Cron Manager │ │ Run Timeline │ │
|
|
1172
|
+
│ │ │ │ │ │
|
|
1173
|
+
│ │ BullMQ repeatable │ │ chronological event view │ │
|
|
1174
|
+
│ │ jobs → auto-emit │ │ for debugging & dashboards │ │
|
|
1175
|
+
│ └────────────────────┘ └─────────────────────────────────┘ │
|
|
1176
|
+
│ │
|
|
1177
|
+
│ ┌──────────────┐ ┌────────────────┐ │
|
|
1178
|
+
│ │ PostgreSQL │ │ Redis │ │
|
|
1179
|
+
│ │ (your DB) │ │ (BullMQ jobs) │ │
|
|
1180
|
+
│ └──────────────┘ └────────────────┘ │
|
|
1181
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
1182
|
+
```
|
|
928
1183
|
|
|
929
|
-
|
|
930
|
-
|
|
1184
|
+
### Module Breakdown
|
|
1185
|
+
|
|
1186
|
+
| Module | File | Purpose |
|
|
1187
|
+
|---|---|---|
|
|
1188
|
+
| **Engine Factory** | `src/index.ts` | `createEngine()` — wires everything together |
|
|
1189
|
+
| **Types** | `src/types.ts` | All TypeScript interfaces and type definitions |
|
|
1190
|
+
| **Registry** | `src/registry.ts` | Trigger, action, and condition registration |
|
|
1191
|
+
| **Matcher** | `src/matcher.ts` | Signal-to-workflow matching, run creation, and validation |
|
|
1192
|
+
| **Run Manager** | `src/runs.ts` | Workflow run lifecycle, state machine, timeline, cancel |
|
|
1193
|
+
| **Scheduler** | `src/scheduler.ts` | Step scheduling with BullMQ delayed jobs + parallel branches |
|
|
1194
|
+
| **Executor** | `src/executor.ts` | Step dispatch — delay, condition, action, parallel |
|
|
1195
|
+
| **Cron Manager** | `src/cron.ts` | Cron-triggered workflow scheduling via BullMQ repeatable jobs |
|
|
1196
|
+
| **Replay** | `src/replay.ts` | Trigger persistence and replay support |
|
|
1197
|
+
| **Queue** | `src/queue.ts` | BullMQ queue and worker setup (match, step, cron queues) |
|
|
1198
|
+
| **Logs** | `src/logs.ts` | Execution logging and query helpers |
|
|
1199
|
+
| **Schema** | `src/schema/` | Table definitions and migration runner |
|
|
1200
|
+
|
|
1201
|
+
---
|
|
1202
|
+
|
|
1203
|
+
## Examples
|
|
1204
|
+
|
|
1205
|
+
### Monitoring: Incident on Missed Heartbeat
|
|
1206
|
+
|
|
1207
|
+
A service stops sending heartbeats. Wait 5 minutes, check if it's still down, then create an incident.
|
|
931
1208
|
|
|
932
1209
|
```ts
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
1210
|
+
// Register handlers
|
|
1211
|
+
engine.registerTrigger('heartbeat.missed', {
|
|
1212
|
+
source: 'heartbeat',
|
|
1213
|
+
resourceType: 'service',
|
|
1214
|
+
})
|
|
1215
|
+
|
|
1216
|
+
engine.registerCondition('service_still_down', async (ctx) => {
|
|
1217
|
+
const status = await healthCheck(ctx.trigger.resourceId)
|
|
1218
|
+
return status !== 'healthy'
|
|
1219
|
+
})
|
|
1220
|
+
|
|
1221
|
+
engine.registerAction('create_incident', async (ctx) => {
|
|
1222
|
+
const incident = await db.incidents.create({
|
|
1223
|
+
tenantId: ctx.tenantId,
|
|
1224
|
+
serviceId: ctx.trigger.resourceId,
|
|
1225
|
+
severity: ctx.config.severity || 'medium',
|
|
1226
|
+
title: `Service ${ctx.trigger.resourceId} is unresponsive`,
|
|
1227
|
+
})
|
|
1228
|
+
ctx.log('Incident created', { incidentId: incident.id })
|
|
1229
|
+
return { success: true, data: { incidentId: incident.id } }
|
|
1230
|
+
}, { replaySafe: true })
|
|
1231
|
+
|
|
1232
|
+
engine.registerAction('notify_oncall', async (ctx) => {
|
|
1233
|
+
await pagerduty.trigger({
|
|
1234
|
+
service: ctx.trigger.resourceId,
|
|
1235
|
+
severity: ctx.config.severity,
|
|
1236
|
+
})
|
|
1237
|
+
return { success: true }
|
|
1238
|
+
}, { replaySafe: false }) // Don't re-page during replay
|
|
1239
|
+
|
|
1240
|
+
// Create the workflow
|
|
1241
|
+
await engine.createWorkflow({
|
|
1242
|
+
tenantId: 'workspace_1',
|
|
1243
|
+
name: 'Incident on missed heartbeat',
|
|
1244
|
+
triggerType: 'heartbeat.missed',
|
|
1245
|
+
environmentFilter: 'production',
|
|
1246
|
+
steps: [
|
|
1247
|
+
{ type: 'delay', name: 'wait_5m', delayMs: 5 * 60 * 1000 },
|
|
1248
|
+
{ type: 'condition', name: 'service_still_down' },
|
|
1249
|
+
{ type: 'action', name: 'create_incident' },
|
|
1250
|
+
{ type: 'action', name: 'notify_oncall' },
|
|
1251
|
+
],
|
|
1252
|
+
config: { severity: 'high' },
|
|
1253
|
+
isEnabled: true,
|
|
1254
|
+
})
|
|
944
1255
|
```
|
|
945
|
-
</details>
|
|
946
1256
|
|
|
947
|
-
|
|
948
|
-
|
|
1257
|
+
### Billing: Trial Expiration Workflow
|
|
1258
|
+
|
|
1259
|
+
A trial is about to expire. Send a warning email, wait, then downgrade if they haven't upgraded.
|
|
949
1260
|
|
|
950
1261
|
```ts
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
}
|
|
1262
|
+
engine.registerTrigger('trial.expiring', {
|
|
1263
|
+
source: 'billing',
|
|
1264
|
+
resourceType: 'subscription',
|
|
1265
|
+
})
|
|
1266
|
+
|
|
1267
|
+
engine.registerAction('send_trial_warning', async (ctx) => {
|
|
1268
|
+
await emails.send({
|
|
1269
|
+
to: ctx.trigger.payload.email,
|
|
1270
|
+
template: 'trial-expiring',
|
|
1271
|
+
data: { daysLeft: ctx.trigger.payload.daysLeft },
|
|
1272
|
+
})
|
|
1273
|
+
return { success: true }
|
|
1274
|
+
}, { replaySafe: false })
|
|
1275
|
+
|
|
1276
|
+
engine.registerCondition('has_not_upgraded', async (ctx) => {
|
|
1277
|
+
const sub = await billing.getSubscription(ctx.tenantId)
|
|
1278
|
+
return sub.plan === 'trial'
|
|
1279
|
+
})
|
|
1280
|
+
|
|
1281
|
+
engine.registerAction('downgrade_to_free', async (ctx) => {
|
|
1282
|
+
await billing.changePlan(ctx.tenantId, 'free')
|
|
1283
|
+
ctx.log('Downgraded to free plan')
|
|
1284
|
+
return { success: true }
|
|
1285
|
+
}, { replaySafe: true })
|
|
1286
|
+
|
|
1287
|
+
await engine.createWorkflow({
|
|
1288
|
+
tenantId: 'workspace_1',
|
|
1289
|
+
name: 'Trial expiration',
|
|
1290
|
+
triggerType: 'trial.expiring',
|
|
1291
|
+
steps: [
|
|
1292
|
+
{ type: 'action', name: 'send_trial_warning' },
|
|
1293
|
+
{ type: 'delay', name: 'wait_3_days', delayMs: 3 * 24 * 60 * 60 * 1000 },
|
|
1294
|
+
{ type: 'condition', name: 'has_not_upgraded' },
|
|
1295
|
+
{ type: 'action', name: 'downgrade_to_free' },
|
|
1296
|
+
],
|
|
1297
|
+
config: {},
|
|
1298
|
+
isEnabled: true,
|
|
1299
|
+
})
|
|
961
1300
|
```
|
|
962
|
-
</details>
|
|
963
1301
|
|
|
964
|
-
|
|
965
|
-
|
|
1302
|
+
### Chaining: Workflow That Triggers Another Workflow
|
|
1303
|
+
|
|
1304
|
+
Actions can emit signals, allowing workflows to trigger other workflows.
|
|
966
1305
|
|
|
967
1306
|
```ts
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
1307
|
+
// First workflow: detect issue → emit resolution signal
|
|
1308
|
+
engine.registerAction('attempt_auto_fix', async (ctx) => {
|
|
1309
|
+
const result = await autoRemediate(ctx.trigger.resourceId)
|
|
1310
|
+
|
|
1311
|
+
if (result.fixed) {
|
|
1312
|
+
// Emit a new signal — this will match other workflows
|
|
1313
|
+
await ctx.emit({
|
|
1314
|
+
tenantId: ctx.tenantId,
|
|
1315
|
+
source: 'auto-remediation',
|
|
1316
|
+
type: 'service.recovered',
|
|
1317
|
+
resourceType: 'service',
|
|
1318
|
+
resourceId: ctx.trigger.resourceId,
|
|
1319
|
+
payload: { fix: result.action },
|
|
1320
|
+
})
|
|
1321
|
+
}
|
|
1322
|
+
|
|
1323
|
+
return { success: true, data: result }
|
|
1324
|
+
}, { replaySafe: true })
|
|
1325
|
+
|
|
1326
|
+
// Second workflow: triggered by the recovery signal
|
|
1327
|
+
engine.registerAction('close_incident', async (ctx) => {
|
|
1328
|
+
await db.incidents.close({
|
|
1329
|
+
serviceId: ctx.trigger.resourceId,
|
|
1330
|
+
resolution: ctx.trigger.payload.fix,
|
|
1331
|
+
})
|
|
1332
|
+
return { success: true }
|
|
1333
|
+
}, { replaySafe: true })
|
|
1334
|
+
|
|
1335
|
+
await engine.createWorkflow({
|
|
1336
|
+
tenantId: 'workspace_1',
|
|
1337
|
+
name: 'Auto-close on recovery',
|
|
1338
|
+
triggerType: 'service.recovered',
|
|
1339
|
+
steps: [
|
|
1340
|
+
{ type: 'action', name: 'close_incident' },
|
|
1341
|
+
],
|
|
1342
|
+
config: {},
|
|
1343
|
+
isEnabled: true,
|
|
1344
|
+
})
|
|
977
1345
|
```
|
|
978
|
-
</details>
|
|
979
1346
|
|
|
980
|
-
|
|
981
|
-
|
|
1347
|
+
### Parallel: Multi-Channel Notification
|
|
1348
|
+
|
|
1349
|
+
Send notifications across multiple channels simultaneously, then mark the alert as notified.
|
|
982
1350
|
|
|
983
1351
|
```ts
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
}
|
|
1352
|
+
engine.registerAction('send_email', async (ctx) => {
|
|
1353
|
+
await emails.send({ to: ctx.config.alertEmail, subject: 'Alert' })
|
|
1354
|
+
return { success: true }
|
|
1355
|
+
}, { replaySafe: false })
|
|
989
1356
|
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
}
|
|
1357
|
+
engine.registerAction('send_slack', async (ctx) => {
|
|
1358
|
+
await slack.post({ channel: ctx.config.slackChannel, text: 'Alert triggered' })
|
|
1359
|
+
return { success: true }
|
|
1360
|
+
}, { replaySafe: false })
|
|
993
1361
|
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
}
|
|
1362
|
+
engine.registerAction('send_sms', async (ctx) => {
|
|
1363
|
+
await sms.send({ to: ctx.config.phoneNumber, body: 'Alert triggered' })
|
|
1364
|
+
return { success: true }
|
|
1365
|
+
}, { replaySafe: false })
|
|
998
1366
|
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
}
|
|
1367
|
+
engine.registerAction('mark_notified', async (ctx) => {
|
|
1368
|
+
await db.alerts.update(ctx.trigger.resourceId, { notified: true })
|
|
1369
|
+
return { success: true }
|
|
1370
|
+
}, { replaySafe: true })
|
|
1371
|
+
|
|
1372
|
+
await engine.createWorkflow({
|
|
1373
|
+
tenantId: 'workspace_1',
|
|
1374
|
+
name: 'Multi-channel alert',
|
|
1375
|
+
triggerType: 'alert.triggered',
|
|
1376
|
+
steps: [
|
|
1377
|
+
{
|
|
1378
|
+
type: 'parallel',
|
|
1379
|
+
name: 'notify_all',
|
|
1380
|
+
branches: [
|
|
1381
|
+
[{ type: 'action', name: 'send_email' }],
|
|
1382
|
+
[{ type: 'action', name: 'send_slack' }],
|
|
1383
|
+
[{ type: 'action', name: 'send_sms' }],
|
|
1384
|
+
],
|
|
1385
|
+
},
|
|
1386
|
+
{ type: 'action', name: 'mark_notified' },
|
|
1387
|
+
],
|
|
1388
|
+
config: {
|
|
1389
|
+
alertEmail: 'oncall@company.com',
|
|
1390
|
+
slackChannel: '#alerts',
|
|
1391
|
+
phoneNumber: '+1234567890',
|
|
1392
|
+
},
|
|
1393
|
+
isEnabled: true,
|
|
1394
|
+
})
|
|
1002
1395
|
```
|
|
1003
|
-
</details>
|
|
1004
1396
|
|
|
1005
|
-
|
|
1397
|
+
### Cron: Daily Cleanup Job
|
|
1006
1398
|
|
|
1007
|
-
|
|
1399
|
+
Run a cleanup workflow on a schedule without needing an external signal.
|
|
1008
1400
|
|
|
1401
|
+
```ts
|
|
1402
|
+
engine.registerTrigger('maintenance.cleanup', {
|
|
1403
|
+
source: 'cron',
|
|
1404
|
+
})
|
|
1405
|
+
|
|
1406
|
+
engine.registerAction('cleanup_old_records', async (ctx) => {
|
|
1407
|
+
const deleted = await db.query(
|
|
1408
|
+
'DELETE FROM temp_data WHERE created_at < NOW() - INTERVAL \'30 days\' RETURNING id'
|
|
1409
|
+
)
|
|
1410
|
+
ctx.log('Cleaned up old records', { count: deleted.rowCount })
|
|
1411
|
+
return { success: true, data: { deletedCount: deleted.rowCount } }
|
|
1412
|
+
}, { replaySafe: true })
|
|
1413
|
+
|
|
1414
|
+
engine.registerAction('send_cleanup_report', async (ctx) => {
|
|
1415
|
+
await emails.send({
|
|
1416
|
+
to: 'admin@company.com',
|
|
1417
|
+
subject: 'Daily cleanup report',
|
|
1418
|
+
body: `Deleted ${ctx.run.context.deletedCount ?? 0} old records`,
|
|
1419
|
+
})
|
|
1420
|
+
return { success: true }
|
|
1421
|
+
}, { replaySafe: false })
|
|
1422
|
+
|
|
1423
|
+
await engine.createWorkflow({
|
|
1424
|
+
tenantId: 'workspace_1',
|
|
1425
|
+
name: 'Daily cleanup',
|
|
1426
|
+
triggerType: 'maintenance.cleanup',
|
|
1427
|
+
steps: [
|
|
1428
|
+
{ type: 'action', name: 'cleanup_old_records' },
|
|
1429
|
+
{ type: 'action', name: 'send_cleanup_report' },
|
|
1430
|
+
],
|
|
1431
|
+
config: {},
|
|
1432
|
+
isEnabled: true,
|
|
1433
|
+
cronExpression: '0 2 * * *', // Every day at 2 AM
|
|
1434
|
+
})
|
|
1009
1435
|
```
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1436
|
+
|
|
1437
|
+
### Observability: Run Timeline Dashboard
|
|
1438
|
+
|
|
1439
|
+
Use the timeline API to build a debugging view of what happened during a run.
|
|
1440
|
+
|
|
1441
|
+
```ts
|
|
1442
|
+
const timeline = await engine.getRunTimeline('run_abc123')
|
|
1443
|
+
|
|
1444
|
+
// Display in a dashboard
|
|
1445
|
+
for (const entry of timeline) {
|
|
1446
|
+
const time = entry.timestamp.toISOString()
|
|
1447
|
+
|
|
1448
|
+
switch (entry.type) {
|
|
1449
|
+
case 'run_created':
|
|
1450
|
+
console.log(`${time} Run started`)
|
|
1451
|
+
break
|
|
1452
|
+
case 'step_started':
|
|
1453
|
+
console.log(`${time} Step "${entry.stepName}" (${entry.stepType}) started`)
|
|
1454
|
+
break
|
|
1455
|
+
case 'step_completed':
|
|
1456
|
+
console.log(`${time} Step "${entry.stepName}" completed`, entry.detail)
|
|
1457
|
+
break
|
|
1458
|
+
case 'step_failed':
|
|
1459
|
+
console.log(`${time} Step "${entry.stepName}" FAILED:`, entry.detail?.error)
|
|
1460
|
+
break
|
|
1461
|
+
case 'log':
|
|
1462
|
+
console.log(`${time} [${entry.detail?.level}] ${entry.detail?.message}`)
|
|
1463
|
+
break
|
|
1464
|
+
case 'run_completed':
|
|
1465
|
+
console.log(`${time} Run completed successfully`)
|
|
1466
|
+
break
|
|
1467
|
+
case 'run_canceled':
|
|
1468
|
+
console.log(`${time} Run canceled: ${entry.detail?.reason}`)
|
|
1469
|
+
break
|
|
1470
|
+
}
|
|
1471
|
+
}
|
|
1044
1472
|
```
|
|
1045
1473
|
|
|
1046
|
-
###
|
|
1474
|
+
### Error Recovery: Retry Failed Runs
|
|
1047
1475
|
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1476
|
+
Monitor for failures and automatically or manually retry.
|
|
1477
|
+
|
|
1478
|
+
```ts
|
|
1479
|
+
// List all failed runs for a tenant
|
|
1480
|
+
const failed = await engine.getFailedRuns('workspace_1')
|
|
1481
|
+
|
|
1482
|
+
for (const run of failed) {
|
|
1483
|
+
console.log(`Run ${run.id} failed at ${run.failedAt}`)
|
|
1484
|
+
|
|
1485
|
+
// Retry the run — resumes from the failed step
|
|
1486
|
+
try {
|
|
1487
|
+
const retried = await engine.retryRun(run.id)
|
|
1488
|
+
console.log(`Retried run ${retried.id}, now ${retried.status}`)
|
|
1489
|
+
} catch (err) {
|
|
1490
|
+
console.error(`Cannot retry: ${err.message}`)
|
|
1491
|
+
}
|
|
1492
|
+
}
|
|
1493
|
+
|
|
1494
|
+
// Cancel a run that's stuck or no longer needed
|
|
1495
|
+
await engine.cancelRun('run_xyz789', 'Superseded by newer deployment')
|
|
1496
|
+
```
|
|
1062
1497
|
|
|
1063
1498
|
---
|
|
1064
1499
|
|
|
1065
1500
|
## Requirements
|
|
1066
1501
|
|
|
1067
|
-
| Requirement | Version |
|
|
1068
|
-
|
|
1069
|
-
| Node.js | 18+ |
|
|
1070
|
-
| PostgreSQL | 12+ |
|
|
1071
|
-
| Redis | 6+ |
|
|
1072
|
-
| TypeScript | 5.0+ |
|
|
1502
|
+
| Requirement | Version | Notes |
|
|
1503
|
+
|---|---|---|
|
|
1504
|
+
| **Node.js** | 18+ | ESM module |
|
|
1505
|
+
| **PostgreSQL** | 12+ | Engine creates `pulse_*` tables |
|
|
1506
|
+
| **Redis** | 6+ | Used by BullMQ for job queuing |
|
|
1507
|
+
| **TypeScript** | 5.0+ | Full type definitions included |
|
|
1073
1508
|
|
|
1074
1509
|
---
|
|
1075
1510
|
|