@pgflow/core 0.0.5-prealpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/LICENSE.md +660 -0
  2. package/README.md +373 -0
  3. package/__tests__/mocks/index.ts +1 -0
  4. package/__tests__/mocks/postgres.ts +37 -0
  5. package/__tests__/types/PgflowSqlClient.test-d.ts +59 -0
  6. package/dist/LICENSE.md +660 -0
  7. package/dist/README.md +373 -0
  8. package/dist/index.js +54 -0
  9. package/docs/options_for_flow_and_steps.md +75 -0
  10. package/docs/pgflow-blob-reference-system.md +179 -0
  11. package/eslint.config.cjs +22 -0
  12. package/example-flow.mermaid +5 -0
  13. package/example-flow.svg +1 -0
  14. package/flow-lifecycle.mermaid +83 -0
  15. package/flow-lifecycle.svg +1 -0
  16. package/out-tsc/vitest/__tests__/mocks/index.d.ts +2 -0
  17. package/out-tsc/vitest/__tests__/mocks/index.d.ts.map +1 -0
  18. package/out-tsc/vitest/__tests__/mocks/postgres.d.ts +15 -0
  19. package/out-tsc/vitest/__tests__/mocks/postgres.d.ts.map +1 -0
  20. package/out-tsc/vitest/__tests__/types/PgflowSqlClient.test-d.d.ts +2 -0
  21. package/out-tsc/vitest/__tests__/types/PgflowSqlClient.test-d.d.ts.map +1 -0
  22. package/out-tsc/vitest/tsconfig.spec.tsbuildinfo +1 -0
  23. package/out-tsc/vitest/vite.config.d.ts +3 -0
  24. package/out-tsc/vitest/vite.config.d.ts.map +1 -0
  25. package/package.json +28 -0
  26. package/pkgs/core/dist/index.js +54 -0
  27. package/pkgs/core/dist/pkgs/core/LICENSE.md +660 -0
  28. package/pkgs/core/dist/pkgs/core/README.md +373 -0
  29. package/pkgs/dsl/dist/index.js +123 -0
  30. package/pkgs/dsl/dist/pkgs/dsl/README.md +11 -0
  31. package/project.json +125 -0
  32. package/prompts/architect.md +87 -0
  33. package/prompts/condition.md +33 -0
  34. package/prompts/declarative_sql.md +15 -0
  35. package/prompts/deps_in_payloads.md +20 -0
  36. package/prompts/dsl-multi-arg.ts +48 -0
  37. package/prompts/dsl-options.md +39 -0
  38. package/prompts/dsl-single-arg.ts +51 -0
  39. package/prompts/dsl-two-arg.ts +61 -0
  40. package/prompts/dsl.md +119 -0
  41. package/prompts/fanout_steps.md +1 -0
  42. package/prompts/json_schemas.md +36 -0
  43. package/prompts/one_shot.md +286 -0
  44. package/prompts/pgtap.md +229 -0
  45. package/prompts/sdk.md +59 -0
  46. package/prompts/step_types.md +62 -0
  47. package/prompts/versioning.md +16 -0
  48. package/queries/fail_permanently.sql +17 -0
  49. package/queries/fail_task.sql +21 -0
  50. package/queries/sequential.sql +47 -0
  51. package/queries/two_roots_left_right.sql +59 -0
  52. package/schema.svg +1 -0
  53. package/scripts/colorize-pgtap-output.awk +72 -0
  54. package/scripts/run-test-with-colors +5 -0
  55. package/scripts/watch-test +7 -0
  56. package/src/PgflowSqlClient.ts +85 -0
  57. package/src/database-types.ts +759 -0
  58. package/src/index.ts +3 -0
  59. package/src/types.ts +103 -0
  60. package/supabase/config.toml +32 -0
  61. package/supabase/migrations/000000_schema.sql +150 -0
  62. package/supabase/migrations/000005_create_flow.sql +29 -0
  63. package/supabase/migrations/000010_add_step.sql +48 -0
  64. package/supabase/migrations/000015_start_ready_steps.sql +45 -0
  65. package/supabase/migrations/000020_start_flow.sql +46 -0
  66. package/supabase/migrations/000030_read_with_poll_backport.sql +70 -0
  67. package/supabase/migrations/000040_poll_for_tasks.sql +100 -0
  68. package/supabase/migrations/000045_maybe_complete_run.sql +30 -0
  69. package/supabase/migrations/000050_complete_task.sql +98 -0
  70. package/supabase/migrations/000055_calculate_retry_delay.sql +11 -0
  71. package/supabase/migrations/000060_fail_task.sql +124 -0
  72. package/supabase/migrations/000_edge_worker_initial.sql +86 -0
  73. package/supabase/seed.sql +202 -0
  74. package/supabase/tests/add_step/basic_step_addition.test.sql +29 -0
  75. package/supabase/tests/add_step/circular_dependency.test.sql +21 -0
  76. package/supabase/tests/add_step/flow_isolation.test.sql +26 -0
  77. package/supabase/tests/add_step/idempotent_step_addition.test.sql +20 -0
  78. package/supabase/tests/add_step/invalid_step_slug.test.sql +16 -0
  79. package/supabase/tests/add_step/nonexistent_dependency.test.sql +16 -0
  80. package/supabase/tests/add_step/nonexistent_flow.test.sql +13 -0
  81. package/supabase/tests/add_step/options.test.sql +66 -0
  82. package/supabase/tests/add_step/step_with_dependency.test.sql +36 -0
  83. package/supabase/tests/add_step/step_with_multiple_dependencies.test.sql +46 -0
  84. package/supabase/tests/complete_task/archives_message.test.sql +67 -0
  85. package/supabase/tests/complete_task/completes_run_if_no_more_remaining_steps.test.sql +62 -0
  86. package/supabase/tests/complete_task/completes_task_and_updates_dependents.test.sql +64 -0
  87. package/supabase/tests/complete_task/decrements_remaining_steps_if_completing_step.test.sql +62 -0
  88. package/supabase/tests/complete_task/saves_output_when_completing_run.test.sql +57 -0
  89. package/supabase/tests/create_flow/flow_creation.test.sql +27 -0
  90. package/supabase/tests/create_flow/idempotency_and_duplicates.test.sql +26 -0
  91. package/supabase/tests/create_flow/invalid_slug.test.sql +13 -0
  92. package/supabase/tests/create_flow/options.test.sql +57 -0
  93. package/supabase/tests/fail_task/exponential_backoff.test.sql +70 -0
  94. package/supabase/tests/fail_task/mark_as_failed_if_no_retries_available.test.sql +49 -0
  95. package/supabase/tests/fail_task/respects_flow_retry_settings.test.sql +48 -0
  96. package/supabase/tests/fail_task/respects_step_retry_settings.test.sql +48 -0
  97. package/supabase/tests/fail_task/retry_task_if_retries_available.test.sql +39 -0
  98. package/supabase/tests/is_valid_slug.test.sql +72 -0
  99. package/supabase/tests/poll_for_tasks/builds_proper_input_from_deps_outputs.test.sql +35 -0
  100. package/supabase/tests/poll_for_tasks/hides_messages.test.sql +35 -0
  101. package/supabase/tests/poll_for_tasks/increments_attempts_count.test.sql +35 -0
  102. package/supabase/tests/poll_for_tasks/multiple_task_processing.test.sql +24 -0
  103. package/supabase/tests/poll_for_tasks/polls_only_queued_tasks.test.sql +35 -0
  104. package/supabase/tests/poll_for_tasks/reads_messages.test.sql +38 -0
  105. package/supabase/tests/poll_for_tasks/returns_no_tasks_if_no_step_task_for_message.test.sql +34 -0
  106. package/supabase/tests/poll_for_tasks/returns_no_tasks_if_queue_is_empty.test.sql +19 -0
  107. package/supabase/tests/poll_for_tasks/returns_no_tasks_when_qty_set_to_0.test.sql +22 -0
  108. package/supabase/tests/poll_for_tasks/sets_vt_delay_based_on_opt_timeout.test.sql +41 -0
  109. package/supabase/tests/poll_for_tasks/tasks_reapppear_if_not_processed_in_time.test.sql +59 -0
  110. package/supabase/tests/start_flow/creates_run.test.sql +24 -0
  111. package/supabase/tests/start_flow/creates_step_states_for_all_steps.test.sql +25 -0
  112. package/supabase/tests/start_flow/creates_step_tasks_only_for_root_steps.test.sql +54 -0
  113. package/supabase/tests/start_flow/returns_run.test.sql +24 -0
  114. package/supabase/tests/start_flow/sends_messages_on_the_queue.test.sql +50 -0
  115. package/supabase/tests/start_flow/starts_only_root_steps.test.sql +21 -0
  116. package/supabase/tests/step_dsl_is_idempotent.test.sql +34 -0
  117. package/tsconfig.json +16 -0
  118. package/tsconfig.lib.json +26 -0
  119. package/tsconfig.spec.json +35 -0
  120. package/vite.config.ts +57 -0
@@ -0,0 +1,286 @@
1
+ Your job is to implement required SQL schemas and functions for an MVP of my open source Postgres-native workflow orchestration engine called pgflow.
2
+
3
+ The main idea of the project is to keep shape of the DAG (nodes and edges) and its runtime state in the database
4
+ and expose SQL functions that will allow to propagate through the state.
5
+
6
+ Real work is done on the task queue workers and the functions from pgflow are only orchestrating
7
+ the queue messages.
8
+
9
+ Workers are supposed to call user functions with the input from the queue message,
10
+ and should acknowledge the completion of the task or its failure (error thrown) by
11
+ calling appropriate pgflow SQL functions.
12
+
13
+ This way the orchestration is decoupled from the execution.
14
+
15
+ I have a concrete implementation plan for you to follow and will unfold it
16
+ step by step below.
17
+
18
+ ## Assumptions/best practices
19
+
20
+ ### We are building Minimal Viable Product
21
+
22
+ Remember that we are building MVP and main focus should be on shipping something as soon as possible,
23
+ by cutting scope, simplifying the architectures and code.
24
+
25
+ But the outlined features are definitely something that we will be doing in the future.
26
+ I am most certain about the foreach-array steps - this is a MUST have.
27
+ So your focus should be on trying to implement the MVP but not closing the doors to the future improvements.
28
+
29
+ ### Slugs
30
+
31
+ We do not use serial IDs nor UUIDs for static things, we use "slugs" instead.
32
+ A slug is just a string that conforms to following rules:
33
+
34
+ ```sql
35
+ slug is not null
36
+ and slug <> ''
37
+ and length(slug) <= 128
38
+ and slug ~ '^[a-zA-Z_][a-zA-Z0-9_]*$';
39
+ ```
40
+
41
+ We use UUID for identifying particular run of the flow.
42
+ But the states of steps for that particular run are not identified by separate UUIDs,
43
+ but rather by a pair of run_id and step_slug. This pattern allows to easily refer
44
+ to steps and flows by their slugs. **Leverage this pattern everywhere you can!**
45
+
46
+ ### References/fkeys
47
+
48
+ Use foreign keys everywhere to ensure consistency.
49
+ Use composite foreign keys and composite primary keys composed of flow/step slugs and run_id's if needed.
50
+
51
+ ### Declarative vs procedural
52
+
53
+ **YOU MUST ALWAYS PRIORITIZE DECLARATIVE STYLE** and prioritize Batching operations.
54
+
55
+ Avoid plpgsql as much as you can.
56
+ It is important to have your DB procedures run in batched ways and use declarative rather than procedural constructs where possible:
57
+
58
+ - do not ever use `language plplsql` in functions, always use `language sql`
59
+ - don't do loops, do SQL statements that address multiple rows at once.
60
+ - don't write trigger functions that fire for a single row, use `FOR EACH STATEMENT` instead.
61
+ - don't call functions for each row in a result set, a condition, a join, or whatever; instead use functions that return `SETOF` and join against these.
62
+
63
+ If you're constructing dynamic SQL, you should only ever use `%I` and `%L` when using `FORMAT` or similar; you should never see `%s` (with the very rare exception of where you're merging in another SQL fragment that you've previously formatted using %I and %L).
64
+
65
+ Remember, that functions have significant overhead in Postgres - instead of factoring into lots of tiny functions, think about how to make your code more expressive so there's no need.
66
+
67
+ ## Schemas
68
+
69
+ ### pgflow.flows
70
+
71
+ A static definition of a flow (DAG):
72
+
73
+ ```sql
74
+ CREATE TABLE pgflow.flows (
75
+ flow_slug text PRIMARY KEY NOT NULL -- Unique identifier for the flow
76
+ CHECK (is_valid_slug(flow_slug))
77
+ );
78
+ ```
79
+
80
+ ### pgflow.steps
81
+
82
+ A static definition of a step within a flow (a DAG "nodes"):
83
+
84
+ ```sql
85
+ CREATE TABLE pgflow.steps (
86
+ flow_slug text NOT NULL REFERENCES flows (flow_slug),
87
+ step_slug text NOT NULL,
88
+ step_type text NOT NULL DEFAULT 'single',
89
+ PRIMARY KEY (flow_slug, step_slug),
90
+ CHECK (is_valid_slug(flow_slug)),
91
+ CHECK (is_valid_slug(step_slug))
92
+ );
93
+ ```
94
+
95
+ ### pgflow.deps
96
+
97
+ A static definition of dependencies between steps (a DAG "edges"):
98
+
99
+ ```sql
100
+ CREATE TABLE pgflow.deps (
101
+ flow_slug text NOT NULL REFERENCES pgflow.flows (flow_slug),
102
+ dep_slug text NOT NULL, -- The step that must complete first
103
+ step_slug text NOT NULL, -- The step that depends on dep_slug
104
+ PRIMARY KEY (flow_slug, dep_slug, step_slug),
105
+ FOREIGN KEY (flow_slug, dep_slug)
106
+ REFERENCES pgflow.steps (flow_slug, step_slug),
107
+ FOREIGN KEY (flow_slug, step_slug)
108
+ REFERENCES pgflow.steps (flow_slug, step_slug),
109
+ CHECK (dep_slug != step_slug) -- Prevent self-dependencies
110
+ CHECK (is_valid_slug(step_slug))
111
+ );
112
+ ```
113
+
114
+ ### pgflow.runs
115
+
116
+ A table storing runtime state of given flow.
117
+ A run is identified by a `flow_slug` and `run_id`.
118
+
119
+ ```sql
120
+ CREATE TABLE pgflow.runs (
121
+ run_id uuid PRIMARY KEY NOT NULL DEFAULT gen_random_uuid(),
122
+ flow_slug text NOT NULL REFERENCES pgflow.flows (flow_slug), -- denormalized
123
+ status text NOT NULL DEFAULT 'started',
124
+ input jsonb NOT NULL,
125
+ CHECK (status IN ('started', 'failed', 'completed'))
126
+ )
127
+ ```
128
+
129
+ There is also `status` that currently can be started, failed or completed.
130
+ );
131
+
132
+ ````
133
+
134
+ There is also `status` that currently can be pending, failed or completed.
135
+
136
+ ### pgflow.step_states
137
+
138
+ Represents a state of a particular step in a particular run.
139
+
140
+ ```sql
141
+
142
+ -- Step states table - tracks the state of individual steps within a run
143
+ CREATE TABLE pgflow.step_states (
144
+ flow_slug text NOT NULL REFERENCES pgflow.flows (flow_slug),
145
+ run_id uuid NOT NULL REFERENCES pgflow.runs (run_id),
146
+ step_slug text NOT NULL,
147
+ status text NOT NULL DEFAULT 'created',
148
+ PRIMARY KEY (run_id, step_slug),
149
+ FOREIGN KEY (flow_slug, step_slug)
150
+ REFERENCES pgflow.steps (flow_slug, step_slug),
151
+ CHECK (status IN ('created', 'started', 'completed', 'failed'))
152
+ );
153
+ );
154
+ ````
155
+
156
+ ### pgflow.step_tasks
157
+
158
+ This table is really unique and interesting. We are starting the development
159
+ of the flow orchestration engine with a simple step that runs one unit of work.
160
+
161
+ But I imagine we would suppport additional types of steps, like:
162
+
163
+ - a step that requires input array and enqueues a task per array item, so they are created in parallel
164
+ - a step that runs some preprocessing/postprocessing in an additional task
165
+
166
+ So in order to accomodate this, we need an additional layer between step_state and
167
+ an actual task queue, in order to track which messages belong to which steps,
168
+ in case there are more than 1 unit of work for given step.
169
+
170
+ ```sql
171
+ -- Executio logs table - tracks the task of individual steps
172
+ CREATE TABLE pgflow.step_tasks (
173
+ flow_slug text NOT NULL REFERENCES pgflow.flows (flow_slug),
174
+ step_slug text NOT NULL,
175
+ run_id uuid NOT NULL REFERENCES pgflow.runs (run_id),
176
+ status text NOT NULL DEFAULT 'queued',
177
+ input jsonb NOT NULL, -- payload that will be passed to queue message
178
+ output jsonb, -- like step_result but for task, can store result or error/stacktrace
179
+ message_id bigint, -- an id of the queue message
180
+ CONSTRAINT step_tasks_pkey PRIMARY KEY (run_id, step_slug),
181
+ FOREIGN KEY (run_id, step_slug)
182
+ REFERENCES pgflow.step_states (run_id, step_slug),
183
+ CHECK (status IN ('queued', 'started', 'failed', 'completed')),
184
+ CHECK (is_valid_slug(flow_slug)),
185
+ CHECK (is_valid_slug(step_slug))
186
+ );
187
+ ```
188
+
189
+ ## Typescript DSL, topological ordering and acyclicity validation
190
+
191
+ The simple typescript DSL will be created that will have string typing
192
+ and will enforce adding steps in a topological order, preventing
193
+ cycles by the strict ordering of the steps addition.
194
+
195
+ Typescript DSL looks like this:
196
+
197
+ ```ts
198
+ const BasicFlow = new Flow<string>()
199
+ .step('root', ({ run }) => {
200
+ return `[${run}]r00t`;
201
+ })
202
+ .step('left', ['root'], ({ root: r }) => {
203
+ return `${r}/left`;
204
+ })
205
+ .step('right', ['root'], ({ root: r }) => {
206
+ return `${r}/right`;
207
+ })
208
+ .step('end', ['left', 'right'], ({ left, right, run }) => {
209
+ return `<${left}> and <${right}> of (${run})`;
210
+ });
211
+ ```
212
+
213
+ This will be compiled to a simple SQL calling SQL function `pgflow.add_step(flow_slug, step_slug, dep_step_slugs[])`:
214
+
215
+ ```sql
216
+ SELECT pgflow.add_step('basic', 'root', ARRAY[]::text[]);
217
+ SELECT pgflow.add_step('basic', 'left', ARRAY['root']);
218
+ SELECT pgflow.add_step('basic', 'right', ARRAY['root']);
219
+ SELECT pgflow.add_step('basic', 'end', ARRAY['left', 'right']);
220
+ ```
221
+
222
+ ## SQL functions API
223
+
224
+ This describes public SQL functions that are available to developer using pgflow
225
+ and to the workers.
226
+
227
+ Developer calls `start_flow` and rest is called by the workers.
228
+
229
+ ### pgflow.start_flow(flow_slug::text, input::jsonb)
230
+
231
+ This function is used to start a flow.
232
+ It should work like this:
233
+
234
+ - create a new `pgflow.runs` row for given flow_slug
235
+ - create all the `pgflow.step_states` rows corresponding to the steps in the flow
236
+ - find root steps (ones without dependencies) and call "start_step" on each of them
237
+
238
+ ### pgflow.start_step(run_id::uuid, step_slug::text)
239
+
240
+ This function is called by start_flow but also by complete_step_task (or somewhere near its call)
241
+ when worker acknowledges the step_task completion and it is detected, that there are ready dependant
242
+ steps to be started.
243
+
244
+ It should probably call start_step_task under the hood, which will:
245
+
246
+ - updating step_state status/timestamps
247
+ - creating a step_task row
248
+ - enqueueing a queue message for this step_task
249
+
250
+ For other step types, like array/foreach, it would probably call the step_task
251
+ for each array item, so more than one step task is created and more than one message is enqueued.
252
+
253
+ ### pgflow.start_step_task(run_id::uuid, step_slug::text, task_id::bigint)
254
+
255
+ I am not yet sure how this will work for other step types that will need more step tasks.
256
+ But probably each step type would have its own implementation of this function,
257
+ and a simple step type will just create a new step_task row and enqueue it.
258
+
259
+ But an array/foreach step type would need a different implementation.
260
+ Would need to check the input for the step which is an array, and would
261
+ create a new step_task for each array item and enqueue as many messages as there are items in the array.
262
+
263
+ ### pgflow.complete_step_task(run_id::uuid, step_slug::text, output::jsonb)
264
+
265
+ This will be called by the worker when a step_task is completed.
266
+ It will work like this in the simplified version when one step_state corresponds to one step_task:
267
+
268
+ - it marks step_task as completed, saving the output
269
+ - it in turns mark step_state as completed, saving the output
270
+ - then it should check for any dependant steps (steps that depend on just completed step) in the same run
271
+ - it should then check if any of those dependant steps are "ready" - meaning, all their dependencies are completed
272
+ - for each of those
273
+
274
+ I am not yet sure how this will work for other step types that will need more step tasks.
275
+ Probably each step type would have its own implementation of this function,
276
+ so a simple step will just call complete_step_state when complete_step_task is called.
277
+
278
+ An array/foreach step type would need a different implementation.
279
+ Would probably need to check if other step_tasks are still pending.
280
+ If all are already completed, it would just call complete_step_state,
281
+ otherwise it will just continue, so other (last) step task can complete the step state.
282
+
283
+ ### pgflow.fail_step_task(run_id::uuid, step_slug::text, error::jsonb)
284
+
285
+ This is very similar to complete_step_task, but it will mark step_task as failed,
286
+ will save error message and will call fail_step_state instead of complete_step_state.
@@ -0,0 +1,229 @@
1
+ # PGTap Testing Guidelines
2
+
3
+ ## Overview
4
+
5
+ This document outlines a set of rules, best practices, ideas, and guidelines for writing pgTap tests for the project.
6
+
7
+ ## File Organization
8
+
9
+ - Store test files under the `supabase/tests/` directory.
10
+ - Use descriptive file names with the `.test.sql` suffix.
11
+ - Organize tests in subfolders, by functionality (e.g., `start_flow`, `create_flow`, `add_step`, `poll_for_tasks`, `complete_task`, etc).
12
+
13
+ ## Transactional Test Structure
14
+
15
+ Wrap each test in a transaction to ensure isolation:
16
+
17
+ ```sql
18
+ begin;
19
+ select plan(2);
20
+ -- Test queries here
21
+ select finish();
22
+ rollback;
23
+ ```
24
+
25
+ ## Setup and Teardown
26
+
27
+ Reset and prepare the database context at the start of each test:
28
+
29
+ ```sql
30
+ select pgflow_tests.reset_db();
31
+ select pgflow_tests.setup_flow('sequential');
32
+ ```
33
+
34
+ Terminate tests with:
35
+
36
+ ```sql
37
+ select finish();
38
+ rollback;
39
+ ```
40
+
41
+ ## Declaring the Test Plan
42
+
43
+ Declare the number of tests using the `plan()` function:
44
+
45
+ ```sql
46
+ select plan(2);
47
+ ```
48
+
49
+ ## Using pgTap Assertions
50
+
51
+ Use the following assertion functions to verify expected outcomes:
52
+
53
+ - `is(actual, expected, message)`
54
+ - `results_eq(actual, expected, message)`
55
+ - `set_eq(actual_query, expected_array, message)`
56
+ - `throws_ok(query, expected_error_message, message)`
57
+ - `ok(boolean_expression, message)`
58
+
59
+ ### Example: Validating Run Creation
60
+
61
+ ```sql
62
+ select pgflow.start_flow('sequential', '"hello"'::jsonb);
63
+
64
+ select results_eq(
65
+ $$ SELECT flow_slug, status, input FROM pgflow.runs $$,
66
+ $$ VALUES ('sequential', 'started', '"hello"'::jsonb) $$,
67
+ 'Run should be created with appropriate status and input'
68
+ );
69
+
70
+ select is(
71
+ (select remaining_steps::int from pgflow.runs limit 1),
72
+ 3::int,
73
+ 'remaining_steps should be equal to number of steps'
74
+ );
75
+ ```
76
+
77
+ ### Example: Testing Error Handling
78
+
79
+ ```sql
80
+ select throws_ok(
81
+ $$ SELECT pgflow.create_flow('invalid-flow') $$,
82
+ 'new row for relation "flows" violates check constraint "flows_flow_slug_check"',
83
+ 'Should detect and prevent invalid flow slug'
84
+ );
85
+ ```
86
+
87
+ ## Idempotence and Duplicate Prevention
88
+
89
+ Run operations multiple times to ensure idempotency and that no duplicates are created:
90
+
91
+ ```sql
92
+ select pgflow.create_flow('test_flow');
93
+ select pgflow.create_flow('test_flow');
94
+
95
+ select results_eq(
96
+ $$ SELECT flow_slug FROM pgflow.flows $$,
97
+ array['test_flow']::text [],
98
+ 'No duplicate flow should be created'
99
+ );
100
+ ```
101
+
102
+ ## Testing Dependencies and Flow Isolation
103
+
104
+ Ensure that steps and dependencies remain isolated within a flow:
105
+
106
+ ```sql
107
+ select pgflow.create_flow('test_flow');
108
+ select pgflow.add_step('test_flow', 'first_step');
109
+
110
+ select pgflow.create_flow('another_flow');
111
+ select pgflow.add_step('another_flow', 'first_step');
112
+ select pgflow.add_step('another_flow', 'another_step', array['first_step']);
113
+
114
+ select set_eq(
115
+ $$
116
+ SELECT flow_slug, step_slug
117
+ FROM pgflow.steps WHERE flow_slug = 'another_flow'
118
+ $$,
119
+ $$ VALUES
120
+ ('another_flow', 'another_step'),
121
+ ('another_flow', 'first_step')
122
+ $$,
123
+ 'Steps in second flow should be isolated from first flow'
124
+ );
125
+ ```
126
+
127
+ ## Testing Message Queues
128
+
129
+ Simulate message polling and verify visibility timeouts:
130
+
131
+ ```sql
132
+ select is(
133
+ (select count(*)::integer from pgflow.poll_for_tasks(
134
+ queue_name => 'sequential'::text,
135
+ vt => 5,
136
+ qty => 1,
137
+ max_poll_seconds => 1
138
+ )),
139
+ 1::integer,
140
+ 'First poll should get the available task'
141
+ );
142
+
143
+ select is(
144
+ (select count(*)::integer from pgflow.poll_for_tasks(
145
+ queue_name => 'sequential'::text,
146
+ vt => 5,
147
+ qty => 1,
148
+ max_poll_seconds => 1
149
+ )),
150
+ 0::integer,
151
+ 'Concurrent poll should not get the same task (due to visibility timeout)'
152
+ );
153
+ ```
154
+
155
+ ## Completing Tasks and Flow Progression
156
+
157
+ Ensure that task completions update state and trigger dependents:
158
+
159
+ ```sql
160
+ select pgflow.complete_task(
161
+ (select run_id from pgflow.runs limit 1),
162
+ 'first',
163
+ 0,
164
+ '{"result": "first completed"}'::jsonb
165
+ );
166
+
167
+ select results_eq(
168
+ $$ SELECT status, output FROM pgflow.step_tasks
169
+ WHERE run_id = (SELECT run_id FROM pgflow.runs LIMIT 1)
170
+ AND step_slug = 'first' $$,
171
+ $$ VALUES ('completed', '{"result": "first completed"}'::jsonb) $$,
172
+ 'Task should be marked as completed with correct output'
173
+ );
174
+ ```
175
+
176
+ ## Archiving Processed Messages
177
+
178
+ Verify that messages are archived after task completion:
179
+
180
+ ```sql
181
+ select is(
182
+ (select message ->> 'step_slug' from pgmq.q_sequential limit 1),
183
+ 'first',
184
+ 'First message should be in the queue'
185
+ );
186
+
187
+ select pgflow.complete_task(
188
+ (select run_id from pgflow.runs limit 1),
189
+ 'first',
190
+ 0,
191
+ '"first was successful"'::jsonb
192
+ );
193
+
194
+ select is(
195
+ (select count(*)::INT from pgmq.q_sequential where message ->> 'step_slug' = 'first'),
196
+ 0::INT,
197
+ 'There should be no messages in the queue'
198
+ );
199
+
200
+ select is(
201
+ (select count(*)::INT from pgmq.a_sequential where message ->> 'step_slug' = 'first' limit 1),
202
+ 1::INT,
203
+ 'The message should be archived'
204
+ );
205
+ ```
206
+
207
+ ## Validating Input with Custom Validators
208
+
209
+ Use custom functions to check input formats:
210
+
211
+ ```sql
212
+ select ok(
213
+ pgflow.is_valid_slug('valid_slug'),
214
+ 'is_valid_slug returns true for string with underscore'
215
+ );
216
+ ```
217
+
218
+ ## Conclusion
219
+
220
+ Adhere to the following best practices when writing pgTap tests:
221
+
222
+ - Keep tests self-contained with proper setup and teardown.
223
+ - Use transactions to isolate tests.
224
+ - Declare a clear test plan using `plan()`.
225
+ - Write focused tests with descriptive messages.
226
+ - Ensure idempotence by re-running operations.
227
+ - Validate both positive outcomes and error cases.
228
+
229
+ Following these guidelines will help maintain consistency, reliability, and clarity in your pgTap tests.
package/prompts/sdk.md ADDED
@@ -0,0 +1,59 @@
1
+ # Flow SDK
2
+
3
+ The purpose of Flow SDK is to allow users to start and observe flow runs in their apps
4
+ and leverage strong typing of the inputs, outputs and dependencies between steps
5
+ in order to improve Developer Experience.
6
+
7
+ Based on the Flow definition like this:
8
+
9
+ ```ts
10
+ const ScrapeWebsiteFlow = new Flow<Input>()
11
+ .step('table_of_contents', async (payload) => {
12
+ // Placeholder function
13
+ return await fetchTableOfContents(payload.run.url);
14
+ })
15
+ .step('subpages', ['table_of_contents'], async (payload) => {
16
+ // Placeholder function
17
+ return await scrapeSubpages(payload.run.url, payload.table_of_contents.urls_of_subpages);
18
+ })
19
+ .step('summaries', ['subpages'], async (payload) => {
20
+ // Placeholder function
21
+ return await generateSummaries(payload.subpages.contentsOfSubpages);
22
+ })
23
+ .step('sentiments', ['subpages'], async (payload) => {
24
+ // Placeholder function
25
+ return await analyzeSentiments(payload.subpages.contentsOfSubpages);
26
+ })
27
+ .step('save_to_db', ['subpages', 'summaries', 'sentiments'], async (payload) => {
28
+ // Placeholder function
29
+ return await saveToDb(payload.subpages, payload.summaries, payload.sentiments);
30
+ });
31
+ ```
32
+
33
+ We want to be able to infer the following information somehow:
34
+
35
+ - The cumulative payload types that are built step-by-step
36
+ - The relationships between steps that are established at runtime
37
+
38
+ Those are the most important things we need, so users can for example trigger
39
+ flows and get annotations for the step results etc.
40
+ Given the example flow I would like my users to be able to get their defined flow and do things like:
41
+
42
+ ```ts
43
+ import type { ScrapeWebsiteFlow } from './flows/scrape_website';
44
+ import { createClient } from '@pgflow/sdk';
45
+
46
+ const { startFlow } = createClient(supabaseClient);
47
+
48
+ const flowRun = startFlow<ScrapeWebsiteFlow>({
49
+ url: 'https://example.com', // this is type checked based on the Input to ScrapeWebsiteFlow
50
+ });
51
+
52
+ // here, 'subpages' (the name of step) would be type checked and only existing steps
53
+ // can be used here, so user cannot await for non existing step
54
+ const subpagesOutput = flowRun.stepCompleted('subpages');
55
+
56
+ // the subpagesOutput is also type-annotated based on the return type inferred
57
+ // from the handler for 'subpages' step, only based on the ScrapeWebsiteFlow type
58
+ subpagesOutput.forEac() // this is an array because handler for 'subpages' returns an array
59
+ ```
@@ -0,0 +1,62 @@
1
+ ## Step types in MVP
2
+
3
+ Regular Steps
4
+
5
+ - Basic unit of work.
6
+ - Executes a handler that receives outputs from its declared dependencies.
7
+ - Its return value is passed to dependent steps.
8
+
9
+ ## Planned step types
10
+
11
+ ### Map Steps (tasks fanout)
12
+
13
+ - Designed for when a dependency returns an array.
14
+ - The handler runs once per array element (in parallel).
15
+ - The outputs are collected back into an array (order preserved) and passed downstream.
16
+
17
+ ### Conditional Steps
18
+
19
+ - Each step will be able to specify a condition, regardless of its type
20
+ - Steps that run only when certain conditions are met
21
+ - A condition is provided (as a JSON fragment)
22
+ - At runtime, the inpuyt input for a step (from all deps) is matched via @> against the condition
23
+ - If the condition is not met, the step does not run and marked as skipped
24
+ - Dependent steps are not run and should probably be marked as skipped as well.
25
+
26
+ ### Manual Approval Steps
27
+
28
+ - Steps that pause for human intervention.
29
+ - They just differ by NOT immediately queueing a task.
30
+ - Instead, they wait for an external update by calling **complete_step** to set their output and trigger downstream steps.
31
+
32
+ ### Subflow Steps
33
+
34
+ - Encapsulate an entire subflow (a mini workflow) as a single step.
35
+ - A subflow is defined using the same DSL as the main flow.
36
+ - Each subflow has an automatic final step that gathers the outputs of all leaf steps.
37
+ - The subflow step triggers the subflow and waits until its output is ready.
38
+ - The aggregated output from the subflow becomes the output of the subflow step.
39
+
40
+ ### Fanout subflows step
41
+
42
+ - Like Map steps, but instead of a task per array item, it runs a subflow per array item.
43
+ - It gathers final steps from subflows into an output array for the fanout subflow step
44
+
45
+ ####
46
+
47
+ • “Fanout subflow” steps do not have local tasks. Instead, they spawn child subflows and wait for them to finish.
48
+ • You can track subflow completion with the same remaining_tasks field:
49
+ – Increment remaining_tasks by the number of child subflows.
50
+ – Decrement it each time a child subflow completes.
51
+ – When remaining_tasks reaches zero, the fanout subflow step is done.
52
+ • Alternatively, you can add a remaining_subflows column to separate child‐subflow tracking from local tasks.
53
+ – This gives clearer semantics but requires extra logic to handle multiple completion conditions.
54
+ • Most implementations unify subflow runs under remaining_tasks to reuse existing “remaining_tasks = 0 means done” checks.
55
+
56
+ ### Additional Techniques
57
+
58
+ We simplify as much as possible and use other tools instead of reinventing the wheel.
59
+
60
+ - Recurrent tasks are handled externally via Cron triggers.
61
+ - Delays can be implemented using pgmq visibility timeouts.
62
+ - The overall design treats a flow as a single function with one input (parameters) and one output (final aggregated output).
@@ -0,0 +1,16 @@
1
+ # Versioning
2
+
3
+ Flow Versioning Strategy #[[pgflow/Versioning]]
4
+ Agreed on immutable flow definitions (similar to Temporal)
5
+ Once a flow is uploaded to DB, it remains unchanged
6
+ Versioning handled through flow slugs rather than explicit version numbers
7
+ Users responsible for managing changes in safe, organized manner
8
+ Benefits of immutable approach
9
+ Simplifies implementation
10
+ Provides natural versioning cascade for subflows
11
+ Makes version transitions explicit and intentional
12
+ Avoids "half-upgraded" scenarios
13
+ Consciously decided against "latest" aliases for now
14
+ Could introduce complexity and unpredictable behavior
15
+ Users can implement their own aliasing logic if needed
16
+ Explicit slugs provide clarity about which version is being used
@@ -0,0 +1,17 @@
1
+ select pgflow_tests.reset_db();
2
+ select pgflow_tests.setup_flow('sequential');
3
+
4
+ -- SETUP
5
+ select pgflow.start_flow('sequential', '{"test": true}'::JSONB);
6
+
7
+ -- default opt_max_attempts is 3, so failing twice should mark the task as failed
8
+ select pgflow_tests.poll_and_fail('sequential');
9
+ select pg_sleep(1.1);
10
+ select pgflow_tests.poll_and_fail('sequential');
11
+ select * from pgflow.step_tasks;
12
+ select * from pgmq.q_sequential;
13
+ -- select * from pgflow.step_tasks;
14
+ --
15
+ select * from pgflow.step_tasks;
16
+ select * from pgflow.step_states;
17
+ select * from pgflow.runs;
@@ -0,0 +1,21 @@
1
+ \x
2
+ begin;
3
+ select pgflow_tests.reset_db();
4
+ select pgflow_tests.setup_flow('two_roots_left_right');
5
+
6
+ --------------------------------------------------------------------------------
7
+ --------------------------------------------------------------------------------
8
+ --------------------------------------------------------------------------------
9
+
10
+
11
+ select pgflow.start_flow('two_roots_left_right', '"hello"'::jsonb);
12
+
13
+ select pgflow_tests.poll_and_complete('two_roots_left_right');
14
+ select pgflow_tests.poll_and_complete('two_roots_left_right');
15
+ select pgflow_tests.poll_and_complete('two_roots_left_right');
16
+ select pgflow_tests.poll_and_complete('two_roots_left_right');
17
+
18
+ select jsonb_pretty(output) from pgflow.runs;
19
+ select * from pgflow.runs;
20
+
21
+ rollback;