@pgflow/core 0.0.0-add-workerconfig-to-context--20250905094004-b98e1fec-20250905074005 → 0.0.0-test-snapshot-releases-8d5d9bc1-20250922101013

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/package.json +2 -2
  2. package/dist/ATLAS.md +0 -32
  3. package/dist/CHANGELOG.md +0 -639
  4. package/dist/PgflowSqlClient.d.ts +0 -17
  5. package/dist/PgflowSqlClient.d.ts.map +0 -1
  6. package/dist/PgflowSqlClient.js +0 -70
  7. package/dist/README.md +0 -393
  8. package/dist/database-types.d.ts +0 -828
  9. package/dist/database-types.d.ts.map +0 -1
  10. package/dist/database-types.js +0 -8
  11. package/dist/index.d.ts +0 -4
  12. package/dist/index.d.ts.map +0 -1
  13. package/dist/index.js +0 -2
  14. package/dist/package.json +0 -32
  15. package/dist/supabase/migrations/20250429164909_pgflow_initial.sql +0 -579
  16. package/dist/supabase/migrations/20250517072017_pgflow_fix_poll_for_tasks_to_use_separate_statement_for_polling.sql +0 -101
  17. package/dist/supabase/migrations/20250609105135_pgflow_add_start_tasks_and_started_status.sql +0 -371
  18. package/dist/supabase/migrations/20250610180554_pgflow_add_set_vt_batch_and_use_it_in_start_tasks.sql +0 -127
  19. package/dist/supabase/migrations/20250614124241_pgflow_add_realtime.sql +0 -501
  20. package/dist/supabase/migrations/20250619195327_pgflow_fix_fail_task_missing_realtime_event.sql +0 -185
  21. package/dist/supabase/migrations/20250627090700_pgflow_fix_function_search_paths.sql +0 -6
  22. package/dist/supabase/migrations/20250707210212_pgflow_add_opt_start_delay.sql +0 -103
  23. package/dist/supabase/migrations/20250719205006_pgflow_worker_deprecation.sql +0 -2
  24. package/dist/tsconfig.lib.tsbuildinfo +0 -1
  25. package/dist/types.d.ts +0 -94
  26. package/dist/types.d.ts.map +0 -1
  27. package/dist/types.js +0 -1
@@ -1,70 +0,0 @@
1
- /**
2
- * Implementation of IPgflowClient that uses direct SQL calls to pgflow functions
3
- */
4
- export class PgflowSqlClient {
5
- sql;
6
- constructor(sql) {
7
- this.sql = sql;
8
- }
9
- async readMessages(queueName, visibilityTimeout, batchSize, maxPollSeconds = 5, pollIntervalMs = 200) {
10
- return await this.sql `
11
- SELECT *
12
- FROM pgflow.read_with_poll(
13
- queue_name => ${queueName},
14
- vt => ${visibilityTimeout},
15
- qty => ${batchSize},
16
- max_poll_seconds => ${maxPollSeconds},
17
- poll_interval_ms => ${pollIntervalMs}
18
- );
19
- `;
20
- }
21
- async startTasks(flowSlug, msgIds, workerId) {
22
- return await this.sql `
23
- SELECT *
24
- FROM pgflow.start_tasks(
25
- flow_slug => ${flowSlug},
26
- msg_ids => ${msgIds}::bigint[],
27
- worker_id => ${workerId}::uuid
28
- );
29
- `;
30
- }
31
- async completeTask(stepTask, output) {
32
- await this.sql `
33
- SELECT pgflow.complete_task(
34
- run_id => ${stepTask.run_id}::uuid,
35
- step_slug => ${stepTask.step_slug}::text,
36
- task_index => ${0}::int,
37
- output => ${this.sql.json(output || null)}::jsonb
38
- );
39
- `;
40
- }
41
- async failTask(stepTask, error) {
42
- const errorString = typeof error === 'string'
43
- ? error
44
- : error instanceof Error
45
- ? error.message
46
- : JSON.stringify(error);
47
- await this.sql `
48
- SELECT pgflow.fail_task(
49
- run_id => ${stepTask.run_id}::uuid,
50
- step_slug => ${stepTask.step_slug}::text,
51
- task_index => ${0}::int,
52
- error_message => ${errorString}::text
53
- );
54
- `;
55
- }
56
- async startFlow(flow_slug, input, run_id) {
57
- const results = await this.sql `
58
- SELECT * FROM pgflow.start_flow(
59
- flow_slug => ${flow_slug}::text,
60
- input => ${this.sql.json(input)}::jsonb
61
- ${run_id ? this.sql `, run_id => ${run_id}::uuid` : this.sql ``}
62
- );
63
- `;
64
- if (results.length === 0) {
65
- throw new Error(`Failed to start flow ${flow_slug}`);
66
- }
67
- const [flowRun] = results;
68
- return flowRun;
69
- }
70
- }
package/dist/README.md DELETED
@@ -1,393 +0,0 @@
1
- # pgflow SQL Core
2
-
3
- PostgreSQL-native workflow engine for defining, managing, and tracking DAG-based workflows directly in your database.
4
-
5
- > [!NOTE]
6
- > This project and all its components are licensed under [Apache 2.0](./LICENSE) license.
7
-
8
- > [!WARNING]
9
- > This project uses [Atlas](https://atlasgo.io/docs) to manage the schemas and migrations.
10
- > See [ATLAS.md](ATLAS.md) for more details.
11
-
12
- ## Table of Contents
13
-
14
- - [Overview](#overview)
15
- - [Key Features](#key-features)
16
- - [Architecture](#architecture)
17
- - [Schema Design](#schema-design)
18
- - [Execution Model](#execution-model)
19
- - [Example Flow and its life](#example-flow-and-its-life)
20
- - [Defining a Workflow](#defining-a-workflow)
21
- - [Starting a Workflow Run](#starting-a-workflow-run)
22
- - [Workflow Execution](#workflow-execution)
23
- - [Task Polling](#task-polling)
24
- - [Task Completion](#task-completion)
25
- - [Error Handling](#error-handling)
26
- - [Retries and Timeouts](#retries-and-timeouts)
27
- - [TypeScript Flow DSL](#typescript-flow-dsl)
28
- - [Overview](#overview-1)
29
- - [Type Inference System](#type-inference-system)
30
- - [Basic Example](#basic-example)
31
- - [How Payload Types Are Built](#how-payload-types-are-built)
32
- - [Benefits of Automatic Type Inference](#benefits-of-automatic-type-inference)
33
- - [Data Flow](#data-flow)
34
- - [Input and Output Handling](#input-and-output-handling)
35
- - [Run Completion](#run-completion)
36
-
37
- ## Overview
38
-
39
- The pgflow SQL Core provides the data model, state machine, and transactional functions for workflow management. It treats workflows as Directed Acyclic Graphs (DAGs) of steps, each step being a simple state machine.
40
-
41
- This package focuses on:
42
-
43
- - Defining and storing workflow shapes
44
- - Managing workflow state transitions
45
- - Exposing transactional functions for workflow operations
46
- - Providing two-phase APIs for reliable task polling and status updates
47
-
48
- The actual execution of workflow tasks is handled by the [Edge Worker](../edge-worker/README.md), which calls back to the SQL Core to acknowledge task completion or failure.
49
-
50
- ## Key Features
51
-
52
- - **Declarative Workflows**: Define flows and steps via SQL tables
53
- - **Dependency Management**: Explicit step dependencies with atomic transitions
54
- - **Configurable Behavior**: Per-flow and per-step options for timeouts, retries, and delays
55
- - **Queue Integration**: Built on pgmq for reliable task processing
56
- - **Transactional Guarantees**: All state transitions are ACID-compliant
57
-
58
- ## Architecture
59
-
60
- ### Schema Design
61
-
62
- [Schema ERD Diagram (click to enlarge)](./assets/schema.svg)
63
-
64
- <a href="./assets/schema.svg">
65
- <img src="./assets/schema.svg" alt="Schema ERD Diagram" width="25%" height="25%">
66
- </a>
67
-
68
- ---
69
-
70
- The schema consists of two main categories of tables:
71
-
72
- #### Static definition tables
73
-
74
- - `flows` (just an identity for the workflow with some global options)
75
- - `steps` (DAG nodes belonging to particular `flows`, with option overrides)
76
- - `deps` (DAG edges between `steps`)
77
-
78
- #### Runtime state tables
79
-
80
- - `runs` (execution instances of `flows`)
81
- - `step_states` (states of individual `steps` within a `run`)
82
- - `step_tasks` (units of work for individual `steps` within a `run`, so we can have fanouts)
83
-
84
- ### Execution Model
85
-
86
- The SQL Core handles the workflow lifecycle through these key operations:
87
-
88
- 1. **Definition**: Workflows are defined using `create_flow` and `add_step`
89
- 2. **Instantiation**: Workflow instances are started with `start_flow`, creating a new run
90
- 3. **Task Retrieval**: The [Edge Worker](../edge-worker/README.md) uses two-phase polling - first `read_with_poll` to reserve queue messages, then `start_tasks` to convert them to executable tasks
91
- 4. **State Transitions**: When the Edge Worker reports back using `complete_task` or `fail_task`, the SQL Core handles state transitions and schedules dependent steps
92
-
93
- [Flow lifecycle diagram (click to enlarge)](./assets/flow-lifecycle.svg)
94
-
95
- <a href="./assets/flow-lifecycle.svg"><img src="./assets/flow-lifecycle.svg" alt="Flow Lifecycle" width="25%" height="25%"></a>
96
-
97
- ## Example flow and its life
98
-
99
- Let's walk through creating and running a workflow that fetches a website,
100
- does summarization and sentiment analysis in parallel steps
101
- and saves the results to a database.
102
-
103
- ![example flow graph](./assets/example-flow.svg)
104
-
105
- ### Defining a Workflow
106
-
107
- Workflows are defined using two SQL functions: `create_flow` and `add_step`.
108
-
109
- In this example, we'll create a workflow with:
110
-
111
- - `website` as the entry point ("root step")
112
- - `sentiment` and `summary` as parallel steps that depend on `website`
113
- - `saveToDb` as the final step, depending on both parallel steps
114
-
115
- ```sql
116
- -- Define workflow with parallel steps
117
- SELECT pgflow.create_flow('analyze_website');
118
- SELECT pgflow.add_step('analyze_website', 'website');
119
- SELECT pgflow.add_step('analyze_website', 'sentiment', deps_slugs => ARRAY['website']);
120
- SELECT pgflow.add_step('analyze_website', 'summary', deps_slugs => ARRAY['website']);
121
- SELECT pgflow.add_step('analyze_website', 'saveToDb', deps_slugs => ARRAY['sentiment', 'summary']);
122
- ```
123
-
124
- > [!WARNING]
125
- > You need to call `add_step` in topological order, which is enforced by foreign key constraints.
126
-
127
- > [!NOTE]
128
- > You can have multiple "root steps" in a workflow. You can even create a root-steps-only workflow
129
- > to process a single input in parallel, because at the end, all of the outputs from steps
130
- > that does not have dependents ("final steps") are aggregated and saved as run's `output`.
131
-
132
- ### Starting a Workflow Run
133
-
134
- To start a workflow, call `start_flow` with a flow slug and input arguments:
135
-
136
- ```sql
137
- SELECT * FROM pgflow.start_flow(
138
- flow_slug => 'analyze_website',
139
- input => '{"url": "https://example.com"}'::jsonb
140
- );
141
-
142
- -- run_id | flow_slug | status | input | output | remaining_steps
143
- -- ------------+-----------------+---------+--------------------------------+--------+-----------------
144
- -- <run uuid> | analyze_website | started | {"url": "https://example.com"} | [NULL] | 4
145
- ```
146
-
147
- When a workflow starts:
148
-
149
- - A new `run` record is created
150
- - Initial states for all steps are created
151
- - Root steps are marked as `started`
152
- - Tasks are created for root steps
153
- - Messages are enqueued on PGMQ for worker processing
154
-
155
- > [!NOTE]
156
- > The `input` argument must be a valid JSONB object: string, number, boolean, array, object or null.
157
-
158
- ### Workflow Execution
159
-
160
- #### Task Polling
161
-
162
- The Edge Worker uses a two-phase approach to retrieve and start tasks:
163
-
164
- **Phase 1 - Reserve Messages:**
165
- ```sql
166
- SELECT * FROM pgflow.read_with_poll(
167
- queue_name => 'analyze_website',
168
- vt => 60, -- visibility timeout in seconds
169
- qty => 5 -- maximum number of messages to fetch
170
- );
171
- ```
172
-
173
- **Phase 2 - Start Tasks:**
174
- ```sql
175
- SELECT * FROM pgflow.start_tasks(
176
- flow_slug => 'analyze_website',
177
- msg_ids => ARRAY[101, 102, 103], -- message IDs from phase 1
178
- worker_id => '550e8400-e29b-41d4-a716-446655440000'::uuid
179
- );
180
- ```
181
-
182
- **How it works:**
183
-
184
- 1. **read_with_poll** reserves raw queue messages and hides them from other workers
185
- 2. **start_tasks** finds matching step_tasks, increments attempts counter, and builds task inputs
186
- 3. Task metadata and input are returned to the worker for execution
187
-
188
- This two-phase approach ensures tasks always exist before processing begins, eliminating race conditions that could occur with single-phase polling.
189
-
190
- #### Task Completion
191
-
192
- After successful processing, the worker acknowledges completion:
193
-
194
- ```sql
195
- SELECT pgflow.complete_task(
196
- run_id => '<run_uuid>',
197
- step_slug => 'website',
198
- task_index => 0, -- we will have multiple tasks for a step in the future
199
- output => '{"content": "HTML content", "status": 200}'::jsonb
200
- );
201
- ```
202
-
203
- When a task completes:
204
-
205
- 1. The task status is updated to 'completed' and the output is saved
206
- 2. The message is archived in PGMQ
207
- 3. The step state is updated to 'completed'
208
- 4. Dependent steps with all dependencies completed are automatically started
209
- 5. The run's remaining_steps counter is decremented
210
- 6. If all steps are completed, the run is marked as completed with aggregated outputs
211
-
212
- #### Error Handling
213
-
214
- If a task fails, the worker acknowledges this using `fail_task`:
215
-
216
- ```sql
217
- SELECT pgflow.fail_task(
218
- run_id => '<run_uuid>',
219
- step_slug => 'website',
220
- task_index => 0,
221
- error_message => 'Connection timeout when fetching URL'::text
222
- );
223
- ```
224
-
225
- The system handles failures by:
226
-
227
- 1. Checking if retry attempts are available
228
- 2. For available retries:
229
- - Keeping the task in 'queued' status
230
- - Applying exponential backoff for visibility
231
- - Preventing processing until the visibility timeout expires
232
- 3. When retries are exhausted:
233
- - Marking the task as 'failed'
234
- - Marking the step as 'failed'
235
- - Marking the run as 'failed'
236
- - Archiving the message in PGMQ
237
- - Notifying workers to abort pending tasks (future feature)
238
-
239
- #### Retries and Timeouts
240
-
241
- Retry behavior can be configured at both the flow and step level:
242
-
243
- ```sql
244
- -- Flow-level defaults
245
- SELECT pgflow.create_flow(
246
- flow_slug => 'analyze_website',
247
- max_attempts => 3, -- Maximum retry attempts (including first attempt)
248
- base_delay => 5, -- Base delay in seconds for exponential backoff
249
- timeout => 60 -- Task timeout in seconds
250
- );
251
-
252
- -- Step-level overrides
253
- SELECT pgflow.add_step(
254
- flow_slug => 'analyze_website',
255
- step_slug => 'sentiment',
256
- deps_slugs => ARRAY['website']::text[],
257
- max_attempts => 5, -- Override max attempts for this step
258
- base_delay => 2, -- Override base delay for exponential backoff
259
- timeout => 30 -- Override timeout for this step
260
- );
261
- ```
262
-
263
- The system applies exponential backoff for retries using the formula:
264
-
265
- ```
266
- delay = base_delay * (2 ^ attempts_count)
267
- ```
268
-
269
- Timeouts are enforced by setting the message visibility timeout to the step's timeout value plus a small buffer. If a worker doesn't acknowledge completion or failure within this period, the task becomes visible again and can be retried.
270
-
271
- ## TypeScript Flow DSL
272
-
273
- > [!NOTE]
274
- > TypeScript Flow DSL is a Work In Progress and is not ready yet!
275
-
276
- ### Overview
277
-
278
- While the SQL Core engine handles workflow definitions and state management, the primary way to define and work with your workflow logic is via the Flow DSL in TypeScript. This DSL offers a fluent API that makes it straightforward to outline the steps in your flow with full type safety.
279
-
280
- ### Type Inference System
281
-
282
- The most powerful feature of the Flow DSL is its **automatic type inference system**:
283
-
284
- 1. You only need to annotate the initial Flow input type
285
- 2. The return type of each step is automatically inferred from your handler function
286
- 3. These return types become available in the payload of dependent steps
287
- 4. The TypeScript compiler builds a complete type graph matching your workflow DAG
288
-
289
- This means you get full IDE autocompletion and type checking throughout your workflow without manual type annotations.
290
-
291
- ### Basic Example
292
-
293
- Here's an example that matches our website analysis workflow:
294
-
295
- ```ts
296
- // Provide a type for the input of the Flow
297
- type Input = {
298
- url: string;
299
- };
300
-
301
- const AnalyzeWebsite = new Flow<Input>({
302
- slug: 'analyze_website',
303
- maxAttempts: 3,
304
- baseDelay: 5,
305
- timeout: 10,
306
- })
307
- .step(
308
- { slug: 'website' },
309
- async (input) => await scrapeWebsite(input.run.url)
310
- )
311
- .step(
312
- { slug: 'sentiment', dependsOn: ['website'], timeout: 30, maxAttempts: 5 },
313
- async (input) => await analyzeSentiment(input.website.content)
314
- )
315
- .step(
316
- { slug: 'summary', dependsOn: ['website'] },
317
- async (input) => await summarizeWithAI(input.website.content)
318
- )
319
- .step(
320
- { slug: 'saveToDb', dependsOn: ['sentiment', 'summary'] },
321
- async (input) =>
322
- await saveToDb({
323
- websiteUrl: input.run.url,
324
- sentiment: input.sentiment.score,
325
- summary: input.summary,
326
- }).status
327
- );
328
- ```
329
-
330
- ### How Payload Types Are Built
331
-
332
- The payload object for each step is constructed dynamically based on:
333
-
334
- 1. **The `run` property**: Always contains the original workflow input
335
- 2. **Dependency outputs**: Each dependency's output is available under a key matching the dependency's ID
336
- 3. **DAG structure**: Only outputs from direct dependencies are included in the payload
337
-
338
- This means your step handlers receive exactly the data they need, properly typed, without any manual type declarations beyond the initial Flow input type.
339
-
340
- ### Benefits of Automatic Type Inference
341
-
342
- - **Refactoring safety**: Change a step's output, and TypeScript will flag all dependent steps that need updates
343
- - **Discoverability**: IDE autocompletion shows exactly what data is available in each step
344
- - **Error prevention**: Catch typos and type mismatches at compile time, not runtime
345
- - **Documentation**: The types themselves serve as living documentation of your workflow's data flow
346
-
347
- ## Data Flow
348
-
349
- ### Input and Output Handling
350
-
351
- Handlers in pgflow **must return** JSON-serializable values that are captured and saved when `complete_task` is called. These outputs become available as inputs to dependent steps, allowing data to flow through your workflow pipeline.
352
-
353
- When a step is executed, it receives an input object where:
354
-
355
- - Each key is a step_slug of a completed dependency
356
- - Each value is that step's output
357
- - A special "run" key contains the original workflow input
358
-
359
- #### Example: `sentiment`
360
-
361
- When the `sentiment` step runs, it receives:
362
-
363
- ```json
364
- {
365
- "run": { "url": "https://example.com" },
366
- "website": { "content": "HTML content", "status": 200 }
367
- }
368
- ```
369
-
370
- #### Example: `saveToDb`
371
-
372
- The `saveToDb` step depends on both `sentiment` and `summary`:
373
-
374
- ```json
375
- {
376
- "run": { "url": "https://example.com" },
377
- "sentiment": { "score": 0.85, "label": "positive" },
378
- "summary": "This website discusses various topics related to technology and innovation."
379
- }
380
- ```
381
-
382
- ### Run Completion
383
-
384
- When all steps in a run are completed, the run status is automatically updated to 'completed' and its output is set. The output is an aggregation of all the outputs from final steps (steps that have no dependents):
385
-
386
- ```sql
387
- -- Example of a completed run with output
388
- SELECT run_id, status, output FROM pgflow.runs WHERE run_id = '<run_uuid>';
389
-
390
- -- run_id | status | output
391
- -- ------------+-----------+-----------------------------------------------------
392
- -- <run uuid> | completed | {"saveToDb": {"status": "success"}}
393
- ```