@pgflow/core 0.0.5-prealpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +660 -0
- package/README.md +373 -0
- package/__tests__/mocks/index.ts +1 -0
- package/__tests__/mocks/postgres.ts +37 -0
- package/__tests__/types/PgflowSqlClient.test-d.ts +59 -0
- package/dist/LICENSE.md +660 -0
- package/dist/README.md +373 -0
- package/dist/index.js +54 -0
- package/docs/options_for_flow_and_steps.md +75 -0
- package/docs/pgflow-blob-reference-system.md +179 -0
- package/eslint.config.cjs +22 -0
- package/example-flow.mermaid +5 -0
- package/example-flow.svg +1 -0
- package/flow-lifecycle.mermaid +83 -0
- package/flow-lifecycle.svg +1 -0
- package/out-tsc/vitest/__tests__/mocks/index.d.ts +2 -0
- package/out-tsc/vitest/__tests__/mocks/index.d.ts.map +1 -0
- package/out-tsc/vitest/__tests__/mocks/postgres.d.ts +15 -0
- package/out-tsc/vitest/__tests__/mocks/postgres.d.ts.map +1 -0
- package/out-tsc/vitest/__tests__/types/PgflowSqlClient.test-d.d.ts +2 -0
- package/out-tsc/vitest/__tests__/types/PgflowSqlClient.test-d.d.ts.map +1 -0
- package/out-tsc/vitest/tsconfig.spec.tsbuildinfo +1 -0
- package/out-tsc/vitest/vite.config.d.ts +3 -0
- package/out-tsc/vitest/vite.config.d.ts.map +1 -0
- package/package.json +28 -0
- package/pkgs/core/dist/index.js +54 -0
- package/pkgs/core/dist/pkgs/core/LICENSE.md +660 -0
- package/pkgs/core/dist/pkgs/core/README.md +373 -0
- package/pkgs/dsl/dist/index.js +123 -0
- package/pkgs/dsl/dist/pkgs/dsl/README.md +11 -0
- package/project.json +125 -0
- package/prompts/architect.md +87 -0
- package/prompts/condition.md +33 -0
- package/prompts/declarative_sql.md +15 -0
- package/prompts/deps_in_payloads.md +20 -0
- package/prompts/dsl-multi-arg.ts +48 -0
- package/prompts/dsl-options.md +39 -0
- package/prompts/dsl-single-arg.ts +51 -0
- package/prompts/dsl-two-arg.ts +61 -0
- package/prompts/dsl.md +119 -0
- package/prompts/fanout_steps.md +1 -0
- package/prompts/json_schemas.md +36 -0
- package/prompts/one_shot.md +286 -0
- package/prompts/pgtap.md +229 -0
- package/prompts/sdk.md +59 -0
- package/prompts/step_types.md +62 -0
- package/prompts/versioning.md +16 -0
- package/queries/fail_permanently.sql +17 -0
- package/queries/fail_task.sql +21 -0
- package/queries/sequential.sql +47 -0
- package/queries/two_roots_left_right.sql +59 -0
- package/schema.svg +1 -0
- package/scripts/colorize-pgtap-output.awk +72 -0
- package/scripts/run-test-with-colors +5 -0
- package/scripts/watch-test +7 -0
- package/src/PgflowSqlClient.ts +85 -0
- package/src/database-types.ts +759 -0
- package/src/index.ts +3 -0
- package/src/types.ts +103 -0
- package/supabase/config.toml +32 -0
- package/supabase/migrations/000000_schema.sql +150 -0
- package/supabase/migrations/000005_create_flow.sql +29 -0
- package/supabase/migrations/000010_add_step.sql +48 -0
- package/supabase/migrations/000015_start_ready_steps.sql +45 -0
- package/supabase/migrations/000020_start_flow.sql +46 -0
- package/supabase/migrations/000030_read_with_poll_backport.sql +70 -0
- package/supabase/migrations/000040_poll_for_tasks.sql +100 -0
- package/supabase/migrations/000045_maybe_complete_run.sql +30 -0
- package/supabase/migrations/000050_complete_task.sql +98 -0
- package/supabase/migrations/000055_calculate_retry_delay.sql +11 -0
- package/supabase/migrations/000060_fail_task.sql +124 -0
- package/supabase/migrations/000_edge_worker_initial.sql +86 -0
- package/supabase/seed.sql +202 -0
- package/supabase/tests/add_step/basic_step_addition.test.sql +29 -0
- package/supabase/tests/add_step/circular_dependency.test.sql +21 -0
- package/supabase/tests/add_step/flow_isolation.test.sql +26 -0
- package/supabase/tests/add_step/idempotent_step_addition.test.sql +20 -0
- package/supabase/tests/add_step/invalid_step_slug.test.sql +16 -0
- package/supabase/tests/add_step/nonexistent_dependency.test.sql +16 -0
- package/supabase/tests/add_step/nonexistent_flow.test.sql +13 -0
- package/supabase/tests/add_step/options.test.sql +66 -0
- package/supabase/tests/add_step/step_with_dependency.test.sql +36 -0
- package/supabase/tests/add_step/step_with_multiple_dependencies.test.sql +46 -0
- package/supabase/tests/complete_task/archives_message.test.sql +67 -0
- package/supabase/tests/complete_task/completes_run_if_no_more_remaining_steps.test.sql +62 -0
- package/supabase/tests/complete_task/completes_task_and_updates_dependents.test.sql +64 -0
- package/supabase/tests/complete_task/decrements_remaining_steps_if_completing_step.test.sql +62 -0
- package/supabase/tests/complete_task/saves_output_when_completing_run.test.sql +57 -0
- package/supabase/tests/create_flow/flow_creation.test.sql +27 -0
- package/supabase/tests/create_flow/idempotency_and_duplicates.test.sql +26 -0
- package/supabase/tests/create_flow/invalid_slug.test.sql +13 -0
- package/supabase/tests/create_flow/options.test.sql +57 -0
- package/supabase/tests/fail_task/exponential_backoff.test.sql +70 -0
- package/supabase/tests/fail_task/mark_as_failed_if_no_retries_available.test.sql +49 -0
- package/supabase/tests/fail_task/respects_flow_retry_settings.test.sql +48 -0
- package/supabase/tests/fail_task/respects_step_retry_settings.test.sql +48 -0
- package/supabase/tests/fail_task/retry_task_if_retries_available.test.sql +39 -0
- package/supabase/tests/is_valid_slug.test.sql +72 -0
- package/supabase/tests/poll_for_tasks/builds_proper_input_from_deps_outputs.test.sql +35 -0
- package/supabase/tests/poll_for_tasks/hides_messages.test.sql +35 -0
- package/supabase/tests/poll_for_tasks/increments_attempts_count.test.sql +35 -0
- package/supabase/tests/poll_for_tasks/multiple_task_processing.test.sql +24 -0
- package/supabase/tests/poll_for_tasks/polls_only_queued_tasks.test.sql +35 -0
- package/supabase/tests/poll_for_tasks/reads_messages.test.sql +38 -0
- package/supabase/tests/poll_for_tasks/returns_no_tasks_if_no_step_task_for_message.test.sql +34 -0
- package/supabase/tests/poll_for_tasks/returns_no_tasks_if_queue_is_empty.test.sql +19 -0
- package/supabase/tests/poll_for_tasks/returns_no_tasks_when_qty_set_to_0.test.sql +22 -0
- package/supabase/tests/poll_for_tasks/sets_vt_delay_based_on_opt_timeout.test.sql +41 -0
- package/supabase/tests/poll_for_tasks/tasks_reapppear_if_not_processed_in_time.test.sql +59 -0
- package/supabase/tests/start_flow/creates_run.test.sql +24 -0
- package/supabase/tests/start_flow/creates_step_states_for_all_steps.test.sql +25 -0
- package/supabase/tests/start_flow/creates_step_tasks_only_for_root_steps.test.sql +54 -0
- package/supabase/tests/start_flow/returns_run.test.sql +24 -0
- package/supabase/tests/start_flow/sends_messages_on_the_queue.test.sql +50 -0
- package/supabase/tests/start_flow/starts_only_root_steps.test.sql +21 -0
- package/supabase/tests/step_dsl_is_idempotent.test.sql +34 -0
- package/tsconfig.json +16 -0
- package/tsconfig.lib.json +26 -0
- package/tsconfig.spec.json +35 -0
- package/vite.config.ts +57 -0
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
Your job is to implement required SQL schemas and functions for an MVP of my open source Postgres-native workflow orchestration engine called pgflow.
|
|
2
|
+
|
|
3
|
+
The main idea of the project is to keep shape of the DAG (nodes and edges) and its runtime state in the database
|
|
4
|
+
and expose SQL functions that will allow to propagate through the state.
|
|
5
|
+
|
|
6
|
+
Real work is done on the task queue workers and the functions from pgflow are only orchestrating
|
|
7
|
+
the queue messages.
|
|
8
|
+
|
|
9
|
+
Workers are supposed to call user functions with the input from the queue message,
|
|
10
|
+
and should acknowledge the completion of the task or its failure (error thrown) by
|
|
11
|
+
calling appropriate pgflow SQL functions.
|
|
12
|
+
|
|
13
|
+
This way the orchestration is decoupled from the execution.
|
|
14
|
+
|
|
15
|
+
I have a concrete implementation plan for you to follow and will unfold it
|
|
16
|
+
step by step below.
|
|
17
|
+
|
|
18
|
+
## Assumptions/best practices
|
|
19
|
+
|
|
20
|
+
### We are building Minimal Viable Product
|
|
21
|
+
|
|
22
|
+
Remember that we are building MVP and main focus should be on shipping something as soon as possible,
|
|
23
|
+
by cutting scope, simplifying the architectures and code.
|
|
24
|
+
|
|
25
|
+
But the outlined features are definitely something that we will be doing in the future.
|
|
26
|
+
I am most certain about the foreach-array steps - this is a MUST have.
|
|
27
|
+
So your focus should be on trying to implement the MVP but not closing the doors to the future improvements.
|
|
28
|
+
|
|
29
|
+
### Slugs
|
|
30
|
+
|
|
31
|
+
We do not use serial IDs nor UUIDs for static things, we use "slugs" instead.
|
|
32
|
+
A slug is just a string that conforms to following rules:
|
|
33
|
+
|
|
34
|
+
```sql
|
|
35
|
+
slug is not null
|
|
36
|
+
and slug <> ''
|
|
37
|
+
and length(slug) <= 128
|
|
38
|
+
and slug ~ '^[a-zA-Z_][a-zA-Z0-9_]*$';
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
We use UUID for identifying particular run of the flow.
|
|
42
|
+
But the states of steps for that particular run are not identified by separate UUIDs,
|
|
43
|
+
but rather by a pair of run_id and step_slug. This pattern allows to easily refer
|
|
44
|
+
to steps and flows by their slugs. **Leverage this pattern everywhere you can!**
|
|
45
|
+
|
|
46
|
+
### References/fkeys
|
|
47
|
+
|
|
48
|
+
Use foreign keys everywhere to ensure consistency.
|
|
49
|
+
Use composite foreign keys and composite primary keys composed of flow/step slugs and run_id's if needed.
|
|
50
|
+
|
|
51
|
+
### Declarative vs procedural
|
|
52
|
+
|
|
53
|
+
**YOU MUST ALWAYS PRIORITIZE DECLARATIVE STYLE** and prioritize Batching operations.
|
|
54
|
+
|
|
55
|
+
Avoid plpgsql as much as you can.
|
|
56
|
+
It is important to have your DB procedures run in batched ways and use declarative rather than procedural constructs where possible:
|
|
57
|
+
|
|
58
|
+
- do not ever use `language plplsql` in functions, always use `language sql`
|
|
59
|
+
- don't do loops, do SQL statements that address multiple rows at once.
|
|
60
|
+
- don't write trigger functions that fire for a single row, use `FOR EACH STATEMENT` instead.
|
|
61
|
+
- don't call functions for each row in a result set, a condition, a join, or whatever; instead use functions that return `SETOF` and join against these.
|
|
62
|
+
|
|
63
|
+
If you're constructing dynamic SQL, you should only ever use `%I` and `%L` when using `FORMAT` or similar; you should never see `%s` (with the very rare exception of where you're merging in another SQL fragment that you've previously formatted using %I and %L).
|
|
64
|
+
|
|
65
|
+
Remember, that functions have significant overhead in Postgres - instead of factoring into lots of tiny functions, think about how to make your code more expressive so there's no need.
|
|
66
|
+
|
|
67
|
+
## Schemas
|
|
68
|
+
|
|
69
|
+
### pgflow.flows
|
|
70
|
+
|
|
71
|
+
A static definition of a flow (DAG):
|
|
72
|
+
|
|
73
|
+
```sql
|
|
74
|
+
CREATE TABLE pgflow.flows (
|
|
75
|
+
flow_slug text PRIMARY KEY NOT NULL -- Unique identifier for the flow
|
|
76
|
+
CHECK (is_valid_slug(flow_slug))
|
|
77
|
+
);
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### pgflow.steps
|
|
81
|
+
|
|
82
|
+
A static definition of a step within a flow (a DAG "nodes"):
|
|
83
|
+
|
|
84
|
+
```sql
|
|
85
|
+
CREATE TABLE pgflow.steps (
|
|
86
|
+
flow_slug text NOT NULL REFERENCES flows (flow_slug),
|
|
87
|
+
step_slug text NOT NULL,
|
|
88
|
+
step_type text NOT NULL DEFAULT 'single',
|
|
89
|
+
PRIMARY KEY (flow_slug, step_slug),
|
|
90
|
+
CHECK (is_valid_slug(flow_slug)),
|
|
91
|
+
CHECK (is_valid_slug(step_slug))
|
|
92
|
+
);
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### pgflow.deps
|
|
96
|
+
|
|
97
|
+
A static definition of dependencies between steps (a DAG "edges"):
|
|
98
|
+
|
|
99
|
+
```sql
|
|
100
|
+
CREATE TABLE pgflow.deps (
|
|
101
|
+
flow_slug text NOT NULL REFERENCES pgflow.flows (flow_slug),
|
|
102
|
+
dep_slug text NOT NULL, -- The step that must complete first
|
|
103
|
+
step_slug text NOT NULL, -- The step that depends on dep_slug
|
|
104
|
+
PRIMARY KEY (flow_slug, dep_slug, step_slug),
|
|
105
|
+
FOREIGN KEY (flow_slug, dep_slug)
|
|
106
|
+
REFERENCES pgflow.steps (flow_slug, step_slug),
|
|
107
|
+
FOREIGN KEY (flow_slug, step_slug)
|
|
108
|
+
REFERENCES pgflow.steps (flow_slug, step_slug),
|
|
109
|
+
CHECK (dep_slug != step_slug) -- Prevent self-dependencies
|
|
110
|
+
CHECK (is_valid_slug(step_slug))
|
|
111
|
+
);
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### pgflow.runs
|
|
115
|
+
|
|
116
|
+
A table storing runtime state of given flow.
|
|
117
|
+
A run is identified by a `flow_slug` and `run_id`.
|
|
118
|
+
|
|
119
|
+
```sql
|
|
120
|
+
CREATE TABLE pgflow.runs (
|
|
121
|
+
run_id uuid PRIMARY KEY NOT NULL DEFAULT gen_random_uuid(),
|
|
122
|
+
flow_slug text NOT NULL REFERENCES pgflow.flows (flow_slug), -- denormalized
|
|
123
|
+
status text NOT NULL DEFAULT 'started',
|
|
124
|
+
input jsonb NOT NULL,
|
|
125
|
+
CHECK (status IN ('started', 'failed', 'completed'))
|
|
126
|
+
)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
There is also `status` that currently can be started, failed or completed.
|
|
130
|
+
);
|
|
131
|
+
|
|
132
|
+
````
|
|
133
|
+
|
|
134
|
+
There is also `status` that currently can be pending, failed or completed.
|
|
135
|
+
|
|
136
|
+
### pgflow.step_states
|
|
137
|
+
|
|
138
|
+
Represents a state of a particular step in a particular run.
|
|
139
|
+
|
|
140
|
+
```sql
|
|
141
|
+
|
|
142
|
+
-- Step states table - tracks the state of individual steps within a run
|
|
143
|
+
CREATE TABLE pgflow.step_states (
|
|
144
|
+
flow_slug text NOT NULL REFERENCES pgflow.flows (flow_slug),
|
|
145
|
+
run_id uuid NOT NULL REFERENCES pgflow.runs (run_id),
|
|
146
|
+
step_slug text NOT NULL,
|
|
147
|
+
status text NOT NULL DEFAULT 'created',
|
|
148
|
+
PRIMARY KEY (run_id, step_slug),
|
|
149
|
+
FOREIGN KEY (flow_slug, step_slug)
|
|
150
|
+
REFERENCES pgflow.steps (flow_slug, step_slug),
|
|
151
|
+
CHECK (status IN ('created', 'started', 'completed', 'failed'))
|
|
152
|
+
);
|
|
153
|
+
);
|
|
154
|
+
````
|
|
155
|
+
|
|
156
|
+
### pgflow.step_tasks
|
|
157
|
+
|
|
158
|
+
This table is really unique and interesting. We are starting the development
|
|
159
|
+
of the flow orchestration engine with a simple step that runs one unit of work.
|
|
160
|
+
|
|
161
|
+
But I imagine we would suppport additional types of steps, like:
|
|
162
|
+
|
|
163
|
+
- a step that requires input array and enqueues a task per array item, so they are created in parallel
|
|
164
|
+
- a step that runs some preprocessing/postprocessing in an additional task
|
|
165
|
+
|
|
166
|
+
So in order to accomodate this, we need an additional layer between step_state and
|
|
167
|
+
an actual task queue, in order to track which messages belong to which steps,
|
|
168
|
+
in case there are more than 1 unit of work for given step.
|
|
169
|
+
|
|
170
|
+
```sql
|
|
171
|
+
-- Executio logs table - tracks the task of individual steps
|
|
172
|
+
CREATE TABLE pgflow.step_tasks (
|
|
173
|
+
flow_slug text NOT NULL REFERENCES pgflow.flows (flow_slug),
|
|
174
|
+
step_slug text NOT NULL,
|
|
175
|
+
run_id uuid NOT NULL REFERENCES pgflow.runs (run_id),
|
|
176
|
+
status text NOT NULL DEFAULT 'queued',
|
|
177
|
+
input jsonb NOT NULL, -- payload that will be passed to queue message
|
|
178
|
+
output jsonb, -- like step_result but for task, can store result or error/stacktrace
|
|
179
|
+
message_id bigint, -- an id of the queue message
|
|
180
|
+
CONSTRAINT step_tasks_pkey PRIMARY KEY (run_id, step_slug),
|
|
181
|
+
FOREIGN KEY (run_id, step_slug)
|
|
182
|
+
REFERENCES pgflow.step_states (run_id, step_slug),
|
|
183
|
+
CHECK (status IN ('queued', 'started', 'failed', 'completed')),
|
|
184
|
+
CHECK (is_valid_slug(flow_slug)),
|
|
185
|
+
CHECK (is_valid_slug(step_slug))
|
|
186
|
+
);
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## Typescript DSL, topological ordering and acyclicity validation
|
|
190
|
+
|
|
191
|
+
The simple typescript DSL will be created that will have string typing
|
|
192
|
+
and will enforce adding steps in a topological order, preventing
|
|
193
|
+
cycles by the strict ordering of the steps addition.
|
|
194
|
+
|
|
195
|
+
Typescript DSL looks like this:
|
|
196
|
+
|
|
197
|
+
```ts
|
|
198
|
+
const BasicFlow = new Flow<string>()
|
|
199
|
+
.step('root', ({ run }) => {
|
|
200
|
+
return `[${run}]r00t`;
|
|
201
|
+
})
|
|
202
|
+
.step('left', ['root'], ({ root: r }) => {
|
|
203
|
+
return `${r}/left`;
|
|
204
|
+
})
|
|
205
|
+
.step('right', ['root'], ({ root: r }) => {
|
|
206
|
+
return `${r}/right`;
|
|
207
|
+
})
|
|
208
|
+
.step('end', ['left', 'right'], ({ left, right, run }) => {
|
|
209
|
+
return `<${left}> and <${right}> of (${run})`;
|
|
210
|
+
});
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
This will be compiled to a simple SQL calling SQL function `pgflow.add_step(flow_slug, step_slug, dep_step_slugs[])`:
|
|
214
|
+
|
|
215
|
+
```sql
|
|
216
|
+
SELECT pgflow.add_step('basic', 'root', ARRAY[]::text[]);
|
|
217
|
+
SELECT pgflow.add_step('basic', 'left', ARRAY['root']);
|
|
218
|
+
SELECT pgflow.add_step('basic', 'right', ARRAY['root']);
|
|
219
|
+
SELECT pgflow.add_step('basic', 'end', ARRAY['left', 'right']);
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
## SQL functions API
|
|
223
|
+
|
|
224
|
+
This describes public SQL functions that are available to developer using pgflow
|
|
225
|
+
and to the workers.
|
|
226
|
+
|
|
227
|
+
Developer calls `start_flow` and rest is called by the workers.
|
|
228
|
+
|
|
229
|
+
### pgflow.start_flow(flow_slug::text, input::jsonb)
|
|
230
|
+
|
|
231
|
+
This function is used to start a flow.
|
|
232
|
+
It should work like this:
|
|
233
|
+
|
|
234
|
+
- create a new `pgflow.runs` row for given flow_slug
|
|
235
|
+
- create all the `pgflow.step_states` rows corresponding to the steps in the flow
|
|
236
|
+
- find root steps (ones without dependencies) and call "start_step" on each of them
|
|
237
|
+
|
|
238
|
+
### pgflow.start_step(run_id::uuid, step_slug::text)
|
|
239
|
+
|
|
240
|
+
This function is called by start_flow but also by complete_step_task (or somewhere near its call)
|
|
241
|
+
when worker acknowledges the step_task completion and it is detected, that there are ready dependant
|
|
242
|
+
steps to be started.
|
|
243
|
+
|
|
244
|
+
It should probably call start_step_task under the hood, which will:
|
|
245
|
+
|
|
246
|
+
- updating step_state status/timestamps
|
|
247
|
+
- creating a step_task row
|
|
248
|
+
- enqueueing a queue message for this step_task
|
|
249
|
+
|
|
250
|
+
For other step types, like array/foreach, it would probably call the step_task
|
|
251
|
+
for each array item, so more than one step task is created and more than one message is enqueued.
|
|
252
|
+
|
|
253
|
+
### pgflow.start_step_task(run_id::uuid, step_slug::text, task_id::bigint)
|
|
254
|
+
|
|
255
|
+
I am not yet sure how this will work for other step types that will need more step tasks.
|
|
256
|
+
But probably each step type would have its own implementation of this function,
|
|
257
|
+
and a simple step type will just create a new step_task row and enqueue it.
|
|
258
|
+
|
|
259
|
+
But an array/foreach step type would need a different implementation.
|
|
260
|
+
Would need to check the input for the step which is an array, and would
|
|
261
|
+
create a new step_task for each array item and enqueue as many messages as there are items in the array.
|
|
262
|
+
|
|
263
|
+
### pgflow.complete_step_task(run_id::uuid, step_slug::text, output::jsonb)
|
|
264
|
+
|
|
265
|
+
This will be called by the worker when a step_task is completed.
|
|
266
|
+
It will work like this in the simplified version when one step_state corresponds to one step_task:
|
|
267
|
+
|
|
268
|
+
- it marks step_task as completed, saving the output
|
|
269
|
+
- it in turns mark step_state as completed, saving the output
|
|
270
|
+
- then it should check for any dependant steps (steps that depend on just completed step) in the same run
|
|
271
|
+
- it should then check if any of those dependant steps are "ready" - meaning, all their dependencies are completed
|
|
272
|
+
- for each of those
|
|
273
|
+
|
|
274
|
+
I am not yet sure how this will work for other step types that will need more step tasks.
|
|
275
|
+
Probably each step type would have its own implementation of this function,
|
|
276
|
+
so a simple step will just call complete_step_state when complete_step_task is called.
|
|
277
|
+
|
|
278
|
+
An array/foreach step type would need a different implementation.
|
|
279
|
+
Would probably need to check if other step_tasks are still pending.
|
|
280
|
+
If all are already completed, it would just call complete_step_state,
|
|
281
|
+
otherwise it will just continue, so other (last) step task can complete the step state.
|
|
282
|
+
|
|
283
|
+
### pgflow.fail_step_task(run_id::uuid, step_slug::text, error::jsonb)
|
|
284
|
+
|
|
285
|
+
This is very similar to complete_step_task, but it will mark step_task as failed,
|
|
286
|
+
will save error message and will call fail_step_state instead of complete_step_state.
|
package/prompts/pgtap.md
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
# PGTap Testing Guidelines
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
This document outlines a set of rules, best practices, ideas, and guidelines for writing pgTap tests for the project.
|
|
6
|
+
|
|
7
|
+
## File Organization
|
|
8
|
+
|
|
9
|
+
- Store test files under the `supabase/tests/` directory.
|
|
10
|
+
- Use descriptive file names with the `.test.sql` suffix.
|
|
11
|
+
- Organize tests in subfolders, by functionality (e.g., `start_flow`, `create_flow`, `add_step`, `poll_for_tasks`, `complete_task`, etc).
|
|
12
|
+
|
|
13
|
+
## Transactional Test Structure
|
|
14
|
+
|
|
15
|
+
Wrap each test in a transaction to ensure isolation:
|
|
16
|
+
|
|
17
|
+
```sql
|
|
18
|
+
begin;
|
|
19
|
+
select plan(2);
|
|
20
|
+
-- Test queries here
|
|
21
|
+
select finish();
|
|
22
|
+
rollback;
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Setup and Teardown
|
|
26
|
+
|
|
27
|
+
Reset and prepare the database context at the start of each test:
|
|
28
|
+
|
|
29
|
+
```sql
|
|
30
|
+
select pgflow_tests.reset_db();
|
|
31
|
+
select pgflow_tests.setup_flow('sequential');
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Terminate tests with:
|
|
35
|
+
|
|
36
|
+
```sql
|
|
37
|
+
select finish();
|
|
38
|
+
rollback;
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Declaring the Test Plan
|
|
42
|
+
|
|
43
|
+
Declare the number of tests using the `plan()` function:
|
|
44
|
+
|
|
45
|
+
```sql
|
|
46
|
+
select plan(2);
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Using pgTap Assertions
|
|
50
|
+
|
|
51
|
+
Use the following assertion functions to verify expected outcomes:
|
|
52
|
+
|
|
53
|
+
- `is(actual, expected, message)`
|
|
54
|
+
- `results_eq(actual, expected, message)`
|
|
55
|
+
- `set_eq(actual_query, expected_array, message)`
|
|
56
|
+
- `throws_ok(query, expected_error_message, message)`
|
|
57
|
+
- `ok(boolean_expression, message)`
|
|
58
|
+
|
|
59
|
+
### Example: Validating Run Creation
|
|
60
|
+
|
|
61
|
+
```sql
|
|
62
|
+
select pgflow.start_flow('sequential', '"hello"'::jsonb);
|
|
63
|
+
|
|
64
|
+
select results_eq(
|
|
65
|
+
$$ SELECT flow_slug, status, input FROM pgflow.runs $$,
|
|
66
|
+
$$ VALUES ('sequential', 'started', '"hello"'::jsonb) $$,
|
|
67
|
+
'Run should be created with appropriate status and input'
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
select is(
|
|
71
|
+
(select remaining_steps::int from pgflow.runs limit 1),
|
|
72
|
+
3::int,
|
|
73
|
+
'remaining_steps should be equal to number of steps'
|
|
74
|
+
);
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Example: Testing Error Handling
|
|
78
|
+
|
|
79
|
+
```sql
|
|
80
|
+
select throws_ok(
|
|
81
|
+
$$ SELECT pgflow.create_flow('invalid-flow') $$,
|
|
82
|
+
'new row for relation "flows" violates check constraint "flows_flow_slug_check"',
|
|
83
|
+
'Should detect and prevent invalid flow slug'
|
|
84
|
+
);
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Idempotence and Duplicate Prevention
|
|
88
|
+
|
|
89
|
+
Run operations multiple times to ensure idempotency and that no duplicates are created:
|
|
90
|
+
|
|
91
|
+
```sql
|
|
92
|
+
select pgflow.create_flow('test_flow');
|
|
93
|
+
select pgflow.create_flow('test_flow');
|
|
94
|
+
|
|
95
|
+
select results_eq(
|
|
96
|
+
$$ SELECT flow_slug FROM pgflow.flows $$,
|
|
97
|
+
array['test_flow']::text [],
|
|
98
|
+
'No duplicate flow should be created'
|
|
99
|
+
);
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Testing Dependencies and Flow Isolation
|
|
103
|
+
|
|
104
|
+
Ensure that steps and dependencies remain isolated within a flow:
|
|
105
|
+
|
|
106
|
+
```sql
|
|
107
|
+
select pgflow.create_flow('test_flow');
|
|
108
|
+
select pgflow.add_step('test_flow', 'first_step');
|
|
109
|
+
|
|
110
|
+
select pgflow.create_flow('another_flow');
|
|
111
|
+
select pgflow.add_step('another_flow', 'first_step');
|
|
112
|
+
select pgflow.add_step('another_flow', 'another_step', array['first_step']);
|
|
113
|
+
|
|
114
|
+
select set_eq(
|
|
115
|
+
$$
|
|
116
|
+
SELECT flow_slug, step_slug
|
|
117
|
+
FROM pgflow.steps WHERE flow_slug = 'another_flow'
|
|
118
|
+
$$,
|
|
119
|
+
$$ VALUES
|
|
120
|
+
('another_flow', 'another_step'),
|
|
121
|
+
('another_flow', 'first_step')
|
|
122
|
+
$$,
|
|
123
|
+
'Steps in second flow should be isolated from first flow'
|
|
124
|
+
);
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Testing Message Queues
|
|
128
|
+
|
|
129
|
+
Simulate message polling and verify visibility timeouts:
|
|
130
|
+
|
|
131
|
+
```sql
|
|
132
|
+
select is(
|
|
133
|
+
(select count(*)::integer from pgflow.poll_for_tasks(
|
|
134
|
+
queue_name => 'sequential'::text,
|
|
135
|
+
vt => 5,
|
|
136
|
+
qty => 1,
|
|
137
|
+
max_poll_seconds => 1
|
|
138
|
+
)),
|
|
139
|
+
1::integer,
|
|
140
|
+
'First poll should get the available task'
|
|
141
|
+
);
|
|
142
|
+
|
|
143
|
+
select is(
|
|
144
|
+
(select count(*)::integer from pgflow.poll_for_tasks(
|
|
145
|
+
queue_name => 'sequential'::text,
|
|
146
|
+
vt => 5,
|
|
147
|
+
qty => 1,
|
|
148
|
+
max_poll_seconds => 1
|
|
149
|
+
)),
|
|
150
|
+
0::integer,
|
|
151
|
+
'Concurrent poll should not get the same task (due to visibility timeout)'
|
|
152
|
+
);
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## Completing Tasks and Flow Progression
|
|
156
|
+
|
|
157
|
+
Ensure that task completions update state and trigger dependents:
|
|
158
|
+
|
|
159
|
+
```sql
|
|
160
|
+
select pgflow.complete_task(
|
|
161
|
+
(select run_id from pgflow.runs limit 1),
|
|
162
|
+
'first',
|
|
163
|
+
0,
|
|
164
|
+
'{"result": "first completed"}'::jsonb
|
|
165
|
+
);
|
|
166
|
+
|
|
167
|
+
select results_eq(
|
|
168
|
+
$$ SELECT status, output FROM pgflow.step_tasks
|
|
169
|
+
WHERE run_id = (SELECT run_id FROM pgflow.runs LIMIT 1)
|
|
170
|
+
AND step_slug = 'first' $$,
|
|
171
|
+
$$ VALUES ('completed', '{"result": "first completed"}'::jsonb) $$,
|
|
172
|
+
'Task should be marked as completed with correct output'
|
|
173
|
+
);
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
## Archiving Processed Messages
|
|
177
|
+
|
|
178
|
+
Verify that messages are archived after task completion:
|
|
179
|
+
|
|
180
|
+
```sql
|
|
181
|
+
select is(
|
|
182
|
+
(select message ->> 'step_slug' from pgmq.q_sequential limit 1),
|
|
183
|
+
'first',
|
|
184
|
+
'First message should be in the queue'
|
|
185
|
+
);
|
|
186
|
+
|
|
187
|
+
select pgflow.complete_task(
|
|
188
|
+
(select run_id from pgflow.runs limit 1),
|
|
189
|
+
'first',
|
|
190
|
+
0,
|
|
191
|
+
'"first was successful"'::jsonb
|
|
192
|
+
);
|
|
193
|
+
|
|
194
|
+
select is(
|
|
195
|
+
(select count(*)::INT from pgmq.q_sequential where message ->> 'step_slug' = 'first'),
|
|
196
|
+
0::INT,
|
|
197
|
+
'There should be no messages in the queue'
|
|
198
|
+
);
|
|
199
|
+
|
|
200
|
+
select is(
|
|
201
|
+
(select count(*)::INT from pgmq.a_sequential where message ->> 'step_slug' = 'first' limit 1),
|
|
202
|
+
1::INT,
|
|
203
|
+
'The message should be archived'
|
|
204
|
+
);
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## Validating Input with Custom Validators
|
|
208
|
+
|
|
209
|
+
Use custom functions to check input formats:
|
|
210
|
+
|
|
211
|
+
```sql
|
|
212
|
+
select ok(
|
|
213
|
+
pgflow.is_valid_slug('valid_slug'),
|
|
214
|
+
'is_valid_slug returns true for string with underscore'
|
|
215
|
+
);
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## Conclusion
|
|
219
|
+
|
|
220
|
+
Adhere to the following best practices when writing pgTap tests:
|
|
221
|
+
|
|
222
|
+
- Keep tests self-contained with proper setup and teardown.
|
|
223
|
+
- Use transactions to isolate tests.
|
|
224
|
+
- Declare a clear test plan using `plan()`.
|
|
225
|
+
- Write focused tests with descriptive messages.
|
|
226
|
+
- Ensure idempotence by re-running operations.
|
|
227
|
+
- Validate both positive outcomes and error cases.
|
|
228
|
+
|
|
229
|
+
Following these guidelines will help maintain consistency, reliability, and clarity in your pgTap tests.
|
package/prompts/sdk.md
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Flow SDK
|
|
2
|
+
|
|
3
|
+
The purpose of Flow SDK is to allow users to start and observe flow runs in their apps
|
|
4
|
+
and leverage strong typing of the inputs, outputs and dependencies between steps
|
|
5
|
+
in order to improve Developer Experience.
|
|
6
|
+
|
|
7
|
+
Based on the Flow definition like this:
|
|
8
|
+
|
|
9
|
+
```ts
|
|
10
|
+
const ScrapeWebsiteFlow = new Flow<Input>()
|
|
11
|
+
.step('table_of_contents', async (payload) => {
|
|
12
|
+
// Placeholder function
|
|
13
|
+
return await fetchTableOfContents(payload.run.url);
|
|
14
|
+
})
|
|
15
|
+
.step('subpages', ['table_of_contents'], async (payload) => {
|
|
16
|
+
// Placeholder function
|
|
17
|
+
return await scrapeSubpages(payload.run.url, payload.table_of_contents.urls_of_subpages);
|
|
18
|
+
})
|
|
19
|
+
.step('summaries', ['subpages'], async (payload) => {
|
|
20
|
+
// Placeholder function
|
|
21
|
+
return await generateSummaries(payload.subpages.contentsOfSubpages);
|
|
22
|
+
})
|
|
23
|
+
.step('sentiments', ['subpages'], async (payload) => {
|
|
24
|
+
// Placeholder function
|
|
25
|
+
return await analyzeSentiments(payload.subpages.contentsOfSubpages);
|
|
26
|
+
})
|
|
27
|
+
.step('save_to_db', ['subpages', 'summaries', 'sentiments'], async (payload) => {
|
|
28
|
+
// Placeholder function
|
|
29
|
+
return await saveToDb(payload.subpages, payload.summaries, payload.sentiments);
|
|
30
|
+
});
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
We want to be able to infer the following information somehow:
|
|
34
|
+
|
|
35
|
+
- The cumulative payload types that are built step-by-step
|
|
36
|
+
- The relationships between steps that are established at runtime
|
|
37
|
+
|
|
38
|
+
Those are the most important things we need, so users can for example trigger
|
|
39
|
+
flows and get annotations for the step results etc.
|
|
40
|
+
Given the example flow I would like my users to be able to get their defined flow and do things like:
|
|
41
|
+
|
|
42
|
+
```ts
|
|
43
|
+
import type { ScrapeWebsiteFlow } from './flows/scrape_website';
|
|
44
|
+
import { createClient } from '@pgflow/sdk';
|
|
45
|
+
|
|
46
|
+
const { startFlow } = createClient(supabaseClient);
|
|
47
|
+
|
|
48
|
+
const flowRun = startFlow<ScrapeWebsiteFlow>({
|
|
49
|
+
url: 'https://example.com', // this is type checked based on the Input to ScrapeWebsiteFlow
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
// here, 'subpages' (the name of step) would be type checked and only existing steps
|
|
53
|
+
// can be used here, so user cannot await for non existing step
|
|
54
|
+
const subpagesOutput = flowRun.stepCompleted('subpages');
|
|
55
|
+
|
|
56
|
+
// the subpagesOutput is also type-annotated based on the return type inferred
|
|
57
|
+
// from the handler for 'subpages' step, only based on the ScrapeWebsiteFlow type
|
|
58
|
+
subpagesOutput.forEac() // this is an array because handler for 'subpages' returns an array
|
|
59
|
+
```
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
## Step types in MVP
|
|
2
|
+
|
|
3
|
+
Regular Steps
|
|
4
|
+
|
|
5
|
+
- Basic unit of work.
|
|
6
|
+
- Executes a handler that receives outputs from its declared dependencies.
|
|
7
|
+
- Its return value is passed to dependent steps.
|
|
8
|
+
|
|
9
|
+
## Planned step types
|
|
10
|
+
|
|
11
|
+
### Map Steps (tasks fanout)
|
|
12
|
+
|
|
13
|
+
- Designed for when a dependency returns an array.
|
|
14
|
+
- The handler runs once per array element (in parallel).
|
|
15
|
+
- The outputs are collected back into an array (order preserved) and passed downstream.
|
|
16
|
+
|
|
17
|
+
### Conditional Steps
|
|
18
|
+
|
|
19
|
+
- Each step will be able to specify a condition, regardless of its type
|
|
20
|
+
- Steps that run only when certain conditions are met
|
|
21
|
+
- A condition is provided (as a JSON fragment)
|
|
22
|
+
- At runtime, the inpuyt input for a step (from all deps) is matched via @> against the condition
|
|
23
|
+
- If the condition is not met, the step does not run and marked as skipped
|
|
24
|
+
- Dependent steps are not run and should probably be marked as skipped as well.
|
|
25
|
+
|
|
26
|
+
### Manual Approval Steps
|
|
27
|
+
|
|
28
|
+
- Steps that pause for human intervention.
|
|
29
|
+
- They just differ by NOT immediately queueing a task.
|
|
30
|
+
- Instead, they wait for an external update by calling **complete_step** to set their output and trigger downstream steps.
|
|
31
|
+
|
|
32
|
+
### Subflow Steps
|
|
33
|
+
|
|
34
|
+
- Encapsulate an entire subflow (a mini workflow) as a single step.
|
|
35
|
+
- A subflow is defined using the same DSL as the main flow.
|
|
36
|
+
- Each subflow has an automatic final step that gathers the outputs of all leaf steps.
|
|
37
|
+
- The subflow step triggers the subflow and waits until its output is ready.
|
|
38
|
+
- The aggregated output from the subflow becomes the output of the subflow step.
|
|
39
|
+
|
|
40
|
+
### Fanout subflows step
|
|
41
|
+
|
|
42
|
+
- Like Map steps, but instead of a task per array item, it runs a subflow per array item.
|
|
43
|
+
- It gathers final steps from subflows into an output array for the fanout subflow step
|
|
44
|
+
|
|
45
|
+
####
|
|
46
|
+
|
|
47
|
+
• “Fanout subflow” steps do not have local tasks. Instead, they spawn child subflows and wait for them to finish.
|
|
48
|
+
• You can track subflow completion with the same remaining_tasks field:
|
|
49
|
+
– Increment remaining_tasks by the number of child subflows.
|
|
50
|
+
– Decrement it each time a child subflow completes.
|
|
51
|
+
– When remaining_tasks reaches zero, the fanout subflow step is done.
|
|
52
|
+
• Alternatively, you can add a remaining_subflows column to separate child‐subflow tracking from local tasks.
|
|
53
|
+
– This gives clearer semantics but requires extra logic to handle multiple completion conditions.
|
|
54
|
+
• Most implementations unify subflow runs under remaining_tasks to reuse existing “remaining_tasks = 0 means done” checks.
|
|
55
|
+
|
|
56
|
+
### Additional Techniques
|
|
57
|
+
|
|
58
|
+
We simplify as much as possible and use other tools instead of reinventing the wheel.
|
|
59
|
+
|
|
60
|
+
- Recurrent tasks are handled externally via Cron triggers.
|
|
61
|
+
- Delays can be implemented using pgmq visibility timeouts.
|
|
62
|
+
- The overall design treats a flow as a single function with one input (parameters) and one output (final aggregated output).
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Versioning
|
|
2
|
+
|
|
3
|
+
Flow Versioning Strategy #[[pgflow/Versioning]]
|
|
4
|
+
Agreed on immutable flow definitions (similar to Temporal)
|
|
5
|
+
Once a flow is uploaded to DB, it remains unchanged
|
|
6
|
+
Versioning handled through flow slugs rather than explicit version numbers
|
|
7
|
+
Users responsible for managing changes in safe, organized manner
|
|
8
|
+
Benefits of immutable approach
|
|
9
|
+
Simplifies implementation
|
|
10
|
+
Provides natural versioning cascade for subflows
|
|
11
|
+
Makes version transitions explicit and intentional
|
|
12
|
+
Avoids "half-upgraded" scenarios
|
|
13
|
+
Consciously decided against "latest" aliases for now
|
|
14
|
+
Could introduce complexity and unpredictable behavior
|
|
15
|
+
Users can implement their own aliasing logic if needed
|
|
16
|
+
Explicit slugs provide clarity about which version is being used
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
select pgflow_tests.reset_db();
|
|
2
|
+
select pgflow_tests.setup_flow('sequential');
|
|
3
|
+
|
|
4
|
+
-- SETUP
|
|
5
|
+
select pgflow.start_flow('sequential', '{"test": true}'::JSONB);
|
|
6
|
+
|
|
7
|
+
-- default opt_max_attempts is 3, so failing twice should mark the task as failed
|
|
8
|
+
select pgflow_tests.poll_and_fail('sequential');
|
|
9
|
+
select pg_sleep(1.1);
|
|
10
|
+
select pgflow_tests.poll_and_fail('sequential');
|
|
11
|
+
select * from pgflow.step_tasks;
|
|
12
|
+
select * from pgmq.q_sequential;
|
|
13
|
+
-- select * from pgflow.step_tasks;
|
|
14
|
+
--
|
|
15
|
+
select * from pgflow.step_tasks;
|
|
16
|
+
select * from pgflow.step_states;
|
|
17
|
+
select * from pgflow.runs;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
\x
|
|
2
|
+
begin;
|
|
3
|
+
select pgflow_tests.reset_db();
|
|
4
|
+
select pgflow_tests.setup_flow('two_roots_left_right');
|
|
5
|
+
|
|
6
|
+
--------------------------------------------------------------------------------
|
|
7
|
+
--------------------------------------------------------------------------------
|
|
8
|
+
--------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
select pgflow.start_flow('two_roots_left_right', '"hello"'::jsonb);
|
|
12
|
+
|
|
13
|
+
select pgflow_tests.poll_and_complete('two_roots_left_right');
|
|
14
|
+
select pgflow_tests.poll_and_complete('two_roots_left_right');
|
|
15
|
+
select pgflow_tests.poll_and_complete('two_roots_left_right');
|
|
16
|
+
select pgflow_tests.poll_and_complete('two_roots_left_right');
|
|
17
|
+
|
|
18
|
+
select jsonb_pretty(output) from pgflow.runs;
|
|
19
|
+
select * from pgflow.runs;
|
|
20
|
+
|
|
21
|
+
rollback;
|