@pgflow/core 0.6.1 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1244 @@
1
+ -- Modify "step_task_record" composite type
2
+ ALTER TYPE "pgflow"."step_task_record" ADD ATTRIBUTE "task_index" integer;
3
+ -- Modify "step_states" table - Step 1: Drop old constraint and NOT NULL
4
+ ALTER TABLE "pgflow"."step_states"
5
+ DROP CONSTRAINT "step_states_remaining_tasks_check",
6
+ ALTER COLUMN "remaining_tasks" DROP NOT NULL,
7
+ ALTER COLUMN "remaining_tasks" DROP DEFAULT,
8
+ ADD COLUMN "initial_tasks" integer NULL;
9
+ -- AUTOMATIC DATA MIGRATION: Prepare existing data for new constraints
10
+ -- This runs AFTER dropping NOT NULL but BEFORE adding new constraints
11
+ -- All old steps had exactly 1 task (enforced by old only_single_task_per_step constraint)
12
+
13
+ -- Backfill initial_tasks = 1 for all existing steps
14
+ -- (Old schema enforced exactly 1 task per step, so all steps had initial_tasks=1)
15
+ UPDATE "pgflow"."step_states"
16
+ SET "initial_tasks" = 1
17
+ WHERE "initial_tasks" IS NULL;
18
+
19
+ -- Set remaining_tasks to NULL for 'created' status
20
+ -- (New semantics: NULL = not started, old semantics: 1 = not started)
21
+ UPDATE "pgflow"."step_states"
22
+ SET "remaining_tasks" = NULL
23
+ WHERE "status" = 'created' AND "remaining_tasks" IS NOT NULL;
24
+ -- Modify "step_states" table - Step 2: Add new constraints
25
+ ALTER TABLE "pgflow"."step_states"
26
+ ADD CONSTRAINT "initial_tasks_known_when_started" CHECK ((status <> 'started'::text) OR (initial_tasks IS NOT NULL)),
27
+ ADD CONSTRAINT "remaining_tasks_state_consistency" CHECK ((remaining_tasks IS NULL) OR (status <> 'created'::text)),
28
+ ADD CONSTRAINT "step_states_initial_tasks_check" CHECK ((initial_tasks IS NULL) OR (initial_tasks >= 0));
29
+ -- Modify "step_tasks" table
30
+ ALTER TABLE "pgflow"."step_tasks" DROP CONSTRAINT "only_single_task_per_step", DROP CONSTRAINT "output_valid_only_for_completed", ADD CONSTRAINT "output_valid_only_for_completed" CHECK ((output IS NULL) OR (status = ANY (ARRAY['completed'::text, 'failed'::text])));
31
+ -- Modify "steps" table
32
+ ALTER TABLE "pgflow"."steps" DROP CONSTRAINT "steps_step_type_check", ADD CONSTRAINT "steps_step_type_check" CHECK (step_type = ANY (ARRAY['single'::text, 'map'::text]));
33
+ -- Modify "maybe_complete_run" function
34
+ CREATE OR REPLACE FUNCTION "pgflow"."maybe_complete_run" ("run_id" uuid) RETURNS void LANGUAGE plpgsql SET "search_path" = '' AS $$
35
+ declare
36
+ v_completed_run pgflow.runs%ROWTYPE;
37
+ begin
38
+ -- ==========================================
39
+ -- CHECK AND COMPLETE RUN IF FINISHED
40
+ -- ==========================================
41
+ -- ---------- Complete run if all steps done ----------
42
+ UPDATE pgflow.runs
43
+ SET
44
+ status = 'completed',
45
+ completed_at = now(),
46
+ -- Only compute expensive aggregation when actually completing the run
47
+ output = (
48
+ -- ---------- Gather outputs from leaf steps ----------
49
+ -- Leaf steps = steps with no dependents
50
+ -- For map steps: aggregate all task outputs into array
51
+ -- For single steps: use the single task output
52
+ SELECT jsonb_object_agg(
53
+ step_slug,
54
+ CASE
55
+ WHEN step_type = 'map' THEN aggregated_output
56
+ ELSE single_output
57
+ END
58
+ )
59
+ FROM (
60
+ SELECT DISTINCT
61
+ leaf_state.step_slug,
62
+ leaf_step.step_type,
63
+ -- For map steps: aggregate all task outputs
64
+ CASE WHEN leaf_step.step_type = 'map' THEN
65
+ (SELECT COALESCE(jsonb_agg(leaf_task.output ORDER BY leaf_task.task_index), '[]'::jsonb)
66
+ FROM pgflow.step_tasks leaf_task
67
+ WHERE leaf_task.run_id = leaf_state.run_id
68
+ AND leaf_task.step_slug = leaf_state.step_slug
69
+ AND leaf_task.status = 'completed')
70
+ END as aggregated_output,
71
+ -- For single steps: get the single output
72
+ CASE WHEN leaf_step.step_type = 'single' THEN
73
+ (SELECT leaf_task.output
74
+ FROM pgflow.step_tasks leaf_task
75
+ WHERE leaf_task.run_id = leaf_state.run_id
76
+ AND leaf_task.step_slug = leaf_state.step_slug
77
+ AND leaf_task.status = 'completed'
78
+ LIMIT 1)
79
+ END as single_output
80
+ FROM pgflow.step_states leaf_state
81
+ JOIN pgflow.steps leaf_step ON leaf_step.flow_slug = leaf_state.flow_slug AND leaf_step.step_slug = leaf_state.step_slug
82
+ WHERE leaf_state.run_id = maybe_complete_run.run_id
83
+ AND leaf_state.status = 'completed'
84
+ AND NOT EXISTS (
85
+ SELECT 1
86
+ FROM pgflow.deps dep
87
+ WHERE dep.flow_slug = leaf_state.flow_slug
88
+ AND dep.dep_slug = leaf_state.step_slug
89
+ )
90
+ ) leaf_outputs
91
+ )
92
+ WHERE pgflow.runs.run_id = maybe_complete_run.run_id
93
+ AND pgflow.runs.remaining_steps = 0
94
+ AND pgflow.runs.status != 'completed'
95
+ RETURNING * INTO v_completed_run;
96
+
97
+ -- ==========================================
98
+ -- BROADCAST COMPLETION EVENT
99
+ -- ==========================================
100
+ IF v_completed_run.run_id IS NOT NULL THEN
101
+ PERFORM realtime.send(
102
+ jsonb_build_object(
103
+ 'event_type', 'run:completed',
104
+ 'run_id', v_completed_run.run_id,
105
+ 'flow_slug', v_completed_run.flow_slug,
106
+ 'status', 'completed',
107
+ 'output', v_completed_run.output,
108
+ 'completed_at', v_completed_run.completed_at
109
+ ),
110
+ 'run:completed',
111
+ concat('pgflow:run:', v_completed_run.run_id),
112
+ false
113
+ );
114
+ END IF;
115
+ end;
116
+ $$;
117
+ -- Modify "start_ready_steps" function
118
+ CREATE OR REPLACE FUNCTION "pgflow"."start_ready_steps" ("run_id" uuid) RETURNS void LANGUAGE plpgsql SET "search_path" = '' AS $$
119
+ begin
120
+ -- ==========================================
121
+ -- GUARD: No mutations on failed runs
122
+ -- ==========================================
123
+ IF EXISTS (SELECT 1 FROM pgflow.runs WHERE pgflow.runs.run_id = start_ready_steps.run_id AND pgflow.runs.status = 'failed') THEN
124
+ RETURN;
125
+ END IF;
126
+
127
+ -- ==========================================
128
+ -- HANDLE EMPTY ARRAY MAPS (initial_tasks = 0)
129
+ -- ==========================================
130
+ -- These complete immediately without spawning tasks
131
+ WITH empty_map_steps AS (
132
+ SELECT step_state.*
133
+ FROM pgflow.step_states AS step_state
134
+ JOIN pgflow.steps AS step
135
+ ON step.flow_slug = step_state.flow_slug
136
+ AND step.step_slug = step_state.step_slug
137
+ WHERE step_state.run_id = start_ready_steps.run_id
138
+ AND step_state.status = 'created'
139
+ AND step_state.remaining_deps = 0
140
+ AND step.step_type = 'map'
141
+ AND step_state.initial_tasks = 0
142
+ ORDER BY step_state.step_slug
143
+ FOR UPDATE OF step_state
144
+ ),
145
+ -- ---------- Complete empty map steps ----------
146
+ completed_empty_steps AS (
147
+ UPDATE pgflow.step_states
148
+ SET status = 'completed',
149
+ started_at = now(),
150
+ completed_at = now(),
151
+ remaining_tasks = 0
152
+ FROM empty_map_steps
153
+ WHERE pgflow.step_states.run_id = start_ready_steps.run_id
154
+ AND pgflow.step_states.step_slug = empty_map_steps.step_slug
155
+ RETURNING pgflow.step_states.*
156
+ ),
157
+ -- ---------- Broadcast completion events ----------
158
+ broadcast_empty_completed AS (
159
+ SELECT
160
+ realtime.send(
161
+ jsonb_build_object(
162
+ 'event_type', 'step:completed',
163
+ 'run_id', completed_step.run_id,
164
+ 'step_slug', completed_step.step_slug,
165
+ 'status', 'completed',
166
+ 'started_at', completed_step.started_at,
167
+ 'completed_at', completed_step.completed_at,
168
+ 'remaining_tasks', 0,
169
+ 'remaining_deps', 0,
170
+ 'output', '[]'::jsonb
171
+ ),
172
+ concat('step:', completed_step.step_slug, ':completed'),
173
+ concat('pgflow:run:', completed_step.run_id),
174
+ false
175
+ )
176
+ FROM completed_empty_steps AS completed_step
177
+ ),
178
+
179
+ -- ==========================================
180
+ -- HANDLE NORMAL STEPS (initial_tasks > 0)
181
+ -- ==========================================
182
+ -- ---------- Find ready steps ----------
183
+ -- Steps with no remaining deps and known task count
184
+ ready_steps AS (
185
+ SELECT *
186
+ FROM pgflow.step_states AS step_state
187
+ WHERE step_state.run_id = start_ready_steps.run_id
188
+ AND step_state.status = 'created'
189
+ AND step_state.remaining_deps = 0
190
+ AND step_state.initial_tasks IS NOT NULL -- NEW: Cannot start with unknown count
191
+ AND step_state.initial_tasks > 0 -- Don't start taskless steps
192
+ -- Exclude empty map steps already handled
193
+ AND NOT EXISTS (
194
+ SELECT 1 FROM empty_map_steps
195
+ WHERE empty_map_steps.run_id = step_state.run_id
196
+ AND empty_map_steps.step_slug = step_state.step_slug
197
+ )
198
+ ORDER BY step_state.step_slug
199
+ FOR UPDATE
200
+ ),
201
+ -- ---------- Mark steps as started ----------
202
+ started_step_states AS (
203
+ UPDATE pgflow.step_states
204
+ SET status = 'started',
205
+ started_at = now(),
206
+ remaining_tasks = ready_steps.initial_tasks -- Copy initial_tasks to remaining_tasks when starting
207
+ FROM ready_steps
208
+ WHERE pgflow.step_states.run_id = start_ready_steps.run_id
209
+ AND pgflow.step_states.step_slug = ready_steps.step_slug
210
+ RETURNING pgflow.step_states.*
211
+ ),
212
+
213
+ -- ==========================================
214
+ -- TASK GENERATION AND QUEUE MESSAGES
215
+ -- ==========================================
216
+ -- ---------- Generate tasks and batch messages ----------
217
+ -- Single steps: 1 task (index 0)
218
+ -- Map steps: N tasks (indices 0..N-1)
219
+ message_batches AS (
220
+ SELECT
221
+ started_step.flow_slug,
222
+ started_step.run_id,
223
+ started_step.step_slug,
224
+ COALESCE(step.opt_start_delay, 0) as delay,
225
+ array_agg(
226
+ jsonb_build_object(
227
+ 'flow_slug', started_step.flow_slug,
228
+ 'run_id', started_step.run_id,
229
+ 'step_slug', started_step.step_slug,
230
+ 'task_index', task_idx.task_index
231
+ ) ORDER BY task_idx.task_index
232
+ ) AS messages,
233
+ array_agg(task_idx.task_index ORDER BY task_idx.task_index) AS task_indices
234
+ FROM started_step_states AS started_step
235
+ JOIN pgflow.steps AS step
236
+ ON step.flow_slug = started_step.flow_slug
237
+ AND step.step_slug = started_step.step_slug
238
+ -- Generate task indices from 0 to initial_tasks-1
239
+ CROSS JOIN LATERAL generate_series(0, started_step.initial_tasks - 1) AS task_idx(task_index)
240
+ GROUP BY started_step.flow_slug, started_step.run_id, started_step.step_slug, step.opt_start_delay
241
+ ),
242
+ -- ---------- Send messages to queue ----------
243
+ -- Uses batch sending for performance with large arrays
244
+ sent_messages AS (
245
+ SELECT
246
+ mb.flow_slug,
247
+ mb.run_id,
248
+ mb.step_slug,
249
+ task_indices.task_index,
250
+ msg_ids.msg_id
251
+ FROM message_batches mb
252
+ CROSS JOIN LATERAL unnest(mb.task_indices) WITH ORDINALITY AS task_indices(task_index, idx_ord)
253
+ CROSS JOIN LATERAL pgmq.send_batch(mb.flow_slug, mb.messages, mb.delay) WITH ORDINALITY AS msg_ids(msg_id, msg_ord)
254
+ WHERE task_indices.idx_ord = msg_ids.msg_ord
255
+ ),
256
+
257
+ -- ---------- Broadcast step:started events ----------
258
+ broadcast_events AS (
259
+ SELECT
260
+ realtime.send(
261
+ jsonb_build_object(
262
+ 'event_type', 'step:started',
263
+ 'run_id', started_step.run_id,
264
+ 'step_slug', started_step.step_slug,
265
+ 'status', 'started',
266
+ 'started_at', started_step.started_at,
267
+ 'remaining_tasks', started_step.remaining_tasks,
268
+ 'remaining_deps', started_step.remaining_deps
269
+ ),
270
+ concat('step:', started_step.step_slug, ':started'),
271
+ concat('pgflow:run:', started_step.run_id),
272
+ false
273
+ )
274
+ FROM started_step_states AS started_step
275
+ )
276
+
277
+ -- ==========================================
278
+ -- RECORD TASKS IN DATABASE
279
+ -- ==========================================
280
+ INSERT INTO pgflow.step_tasks (flow_slug, run_id, step_slug, task_index, message_id)
281
+ SELECT
282
+ sent_messages.flow_slug,
283
+ sent_messages.run_id,
284
+ sent_messages.step_slug,
285
+ sent_messages.task_index,
286
+ sent_messages.msg_id
287
+ FROM sent_messages;
288
+
289
+ end;
290
+ $$;
291
+ -- Create "cascade_complete_taskless_steps" function
292
+ CREATE FUNCTION "pgflow"."cascade_complete_taskless_steps" ("run_id" uuid) RETURNS integer LANGUAGE plpgsql AS $$
293
+ DECLARE
294
+ v_total_completed int := 0;
295
+ v_iteration_completed int;
296
+ v_iterations int := 0;
297
+ v_max_iterations int := 50;
298
+ BEGIN
299
+ -- ==========================================
300
+ -- ITERATIVE CASCADE COMPLETION
301
+ -- ==========================================
302
+ -- Completes taskless steps in waves until none remain
303
+ LOOP
304
+ -- ---------- Safety check ----------
305
+ v_iterations := v_iterations + 1;
306
+ IF v_iterations > v_max_iterations THEN
307
+ RAISE EXCEPTION 'Cascade loop exceeded safety limit of % iterations', v_max_iterations;
308
+ END IF;
309
+
310
+ -- ==========================================
311
+ -- COMPLETE READY TASKLESS STEPS
312
+ -- ==========================================
313
+ WITH completed AS (
314
+ -- ---------- Complete taskless steps ----------
315
+ -- Steps with initial_tasks=0 and no remaining deps
316
+ UPDATE pgflow.step_states ss
317
+ SET status = 'completed',
318
+ started_at = now(),
319
+ completed_at = now(),
320
+ remaining_tasks = 0
321
+ FROM pgflow.steps s
322
+ WHERE ss.run_id = cascade_complete_taskless_steps.run_id
323
+ AND ss.flow_slug = s.flow_slug
324
+ AND ss.step_slug = s.step_slug
325
+ AND ss.status = 'created'
326
+ AND ss.remaining_deps = 0
327
+ AND ss.initial_tasks = 0
328
+ -- Process in topological order to ensure proper cascade
329
+ RETURNING ss.*
330
+ ),
331
+ -- ---------- Update dependent steps ----------
332
+ -- Propagate completion and empty arrays to dependents
333
+ dep_updates AS (
334
+ UPDATE pgflow.step_states ss
335
+ SET remaining_deps = ss.remaining_deps - dep_count.count,
336
+ -- If the dependent is a map step and its dependency completed with 0 tasks,
337
+ -- set its initial_tasks to 0 as well
338
+ initial_tasks = CASE
339
+ WHEN s.step_type = 'map' AND dep_count.has_zero_tasks
340
+ THEN 0 -- Empty array propagation
341
+ ELSE ss.initial_tasks -- Keep existing value (including NULL)
342
+ END
343
+ FROM (
344
+ -- Aggregate dependency updates per dependent step
345
+ SELECT
346
+ d.flow_slug,
347
+ d.step_slug as dependent_slug,
348
+ COUNT(*) as count,
349
+ BOOL_OR(c.initial_tasks = 0) as has_zero_tasks
350
+ FROM completed c
351
+ JOIN pgflow.deps d ON d.flow_slug = c.flow_slug
352
+ AND d.dep_slug = c.step_slug
353
+ GROUP BY d.flow_slug, d.step_slug
354
+ ) dep_count,
355
+ pgflow.steps s
356
+ WHERE ss.run_id = cascade_complete_taskless_steps.run_id
357
+ AND ss.flow_slug = dep_count.flow_slug
358
+ AND ss.step_slug = dep_count.dependent_slug
359
+ AND s.flow_slug = ss.flow_slug
360
+ AND s.step_slug = ss.step_slug
361
+ ),
362
+ -- ---------- Update run counters ----------
363
+ -- Only decrement remaining_steps; let maybe_complete_run handle finalization
364
+ run_updates AS (
365
+ UPDATE pgflow.runs r
366
+ SET remaining_steps = r.remaining_steps - c.completed_count
367
+ FROM (SELECT COUNT(*) AS completed_count FROM completed) c
368
+ WHERE r.run_id = cascade_complete_taskless_steps.run_id
369
+ AND c.completed_count > 0
370
+ )
371
+ -- ---------- Check iteration results ----------
372
+ SELECT COUNT(*) INTO v_iteration_completed FROM completed;
373
+
374
+ EXIT WHEN v_iteration_completed = 0; -- No more steps to complete
375
+ v_total_completed := v_total_completed + v_iteration_completed;
376
+ END LOOP;
377
+
378
+ RETURN v_total_completed;
379
+ END;
380
+ $$;
381
+ -- Modify "complete_task" function
382
+ CREATE OR REPLACE FUNCTION "pgflow"."complete_task" ("run_id" uuid, "step_slug" text, "task_index" integer, "output" jsonb) RETURNS SETOF "pgflow"."step_tasks" LANGUAGE plpgsql SET "search_path" = '' AS $$
383
+ declare
384
+ v_step_state pgflow.step_states%ROWTYPE;
385
+ v_dependent_map_slug text;
386
+ v_run_record pgflow.runs%ROWTYPE;
387
+ v_step_record pgflow.step_states%ROWTYPE;
388
+ begin
389
+
390
+ -- ==========================================
391
+ -- GUARD: No mutations on failed runs
392
+ -- ==========================================
393
+ IF EXISTS (SELECT 1 FROM pgflow.runs WHERE pgflow.runs.run_id = complete_task.run_id AND pgflow.runs.status = 'failed') THEN
394
+ RETURN QUERY SELECT * FROM pgflow.step_tasks
395
+ WHERE pgflow.step_tasks.run_id = complete_task.run_id
396
+ AND pgflow.step_tasks.step_slug = complete_task.step_slug
397
+ AND pgflow.step_tasks.task_index = complete_task.task_index;
398
+ RETURN;
399
+ END IF;
400
+
401
+ -- ==========================================
402
+ -- LOCK ACQUISITION AND TYPE VALIDATION
403
+ -- ==========================================
404
+ -- Acquire locks first to prevent race conditions
405
+ SELECT * INTO v_run_record FROM pgflow.runs
406
+ WHERE pgflow.runs.run_id = complete_task.run_id
407
+ FOR UPDATE;
408
+
409
+ SELECT * INTO v_step_record FROM pgflow.step_states
410
+ WHERE pgflow.step_states.run_id = complete_task.run_id
411
+ AND pgflow.step_states.step_slug = complete_task.step_slug
412
+ FOR UPDATE;
413
+
414
+ -- Check for type violations AFTER acquiring locks
415
+ SELECT child_step.step_slug INTO v_dependent_map_slug
416
+ FROM pgflow.deps dependency
417
+ JOIN pgflow.steps child_step ON child_step.flow_slug = dependency.flow_slug
418
+ AND child_step.step_slug = dependency.step_slug
419
+ JOIN pgflow.steps parent_step ON parent_step.flow_slug = dependency.flow_slug
420
+ AND parent_step.step_slug = dependency.dep_slug
421
+ JOIN pgflow.step_states child_state ON child_state.flow_slug = child_step.flow_slug
422
+ AND child_state.step_slug = child_step.step_slug
423
+ WHERE dependency.dep_slug = complete_task.step_slug -- parent is the completing step
424
+ AND dependency.flow_slug = v_run_record.flow_slug
425
+ AND parent_step.step_type = 'single' -- Only validate single steps
426
+ AND child_step.step_type = 'map'
427
+ AND child_state.run_id = complete_task.run_id
428
+ AND child_state.initial_tasks IS NULL
429
+ AND (complete_task.output IS NULL OR jsonb_typeof(complete_task.output) != 'array')
430
+ LIMIT 1;
431
+
432
+ -- Handle type violation if detected
433
+ IF v_dependent_map_slug IS NOT NULL THEN
434
+ -- Mark run as failed immediately
435
+ UPDATE pgflow.runs
436
+ SET status = 'failed',
437
+ failed_at = now()
438
+ WHERE pgflow.runs.run_id = complete_task.run_id;
439
+
440
+ -- Archive all active messages (both queued and started) to prevent orphaned messages
441
+ PERFORM pgmq.archive(
442
+ v_run_record.flow_slug,
443
+ array_agg(st.message_id)
444
+ )
445
+ FROM pgflow.step_tasks st
446
+ WHERE st.run_id = complete_task.run_id
447
+ AND st.status IN ('queued', 'started')
448
+ AND st.message_id IS NOT NULL
449
+ HAVING count(*) > 0; -- Only call archive if there are messages to archive
450
+
451
+ -- Mark current task as failed and store the output
452
+ UPDATE pgflow.step_tasks
453
+ SET status = 'failed',
454
+ failed_at = now(),
455
+ output = complete_task.output, -- Store the output that caused the violation
456
+ error_message = '[TYPE_VIOLATION] Produced ' ||
457
+ CASE WHEN complete_task.output IS NULL THEN 'null'
458
+ ELSE jsonb_typeof(complete_task.output) END ||
459
+ ' instead of array'
460
+ WHERE pgflow.step_tasks.run_id = complete_task.run_id
461
+ AND pgflow.step_tasks.step_slug = complete_task.step_slug
462
+ AND pgflow.step_tasks.task_index = complete_task.task_index;
463
+
464
+ -- Mark step state as failed
465
+ UPDATE pgflow.step_states
466
+ SET status = 'failed',
467
+ failed_at = now(),
468
+ error_message = '[TYPE_VIOLATION] Map step ' || v_dependent_map_slug ||
469
+ ' expects array input but dependency ' || complete_task.step_slug ||
470
+ ' produced ' || CASE WHEN complete_task.output IS NULL THEN 'null'
471
+ ELSE jsonb_typeof(complete_task.output) END
472
+ WHERE pgflow.step_states.run_id = complete_task.run_id
473
+ AND pgflow.step_states.step_slug = complete_task.step_slug;
474
+
475
+ -- Archive the current task's message (it was started, now failed)
476
+ PERFORM pgmq.archive(
477
+ v_run_record.flow_slug,
478
+ st.message_id -- Single message, use scalar form
479
+ )
480
+ FROM pgflow.step_tasks st
481
+ WHERE st.run_id = complete_task.run_id
482
+ AND st.step_slug = complete_task.step_slug
483
+ AND st.task_index = complete_task.task_index
484
+ AND st.message_id IS NOT NULL;
485
+
486
+ -- Return empty result
487
+ RETURN QUERY SELECT * FROM pgflow.step_tasks WHERE false;
488
+ RETURN;
489
+ END IF;
490
+
491
+ -- ==========================================
492
+ -- MAIN CTE CHAIN: Update task and propagate changes
493
+ -- ==========================================
494
+ WITH
495
+ -- ---------- Task completion ----------
496
+ -- Update the task record with completion status and output
497
+ task AS (
498
+ UPDATE pgflow.step_tasks
499
+ SET
500
+ status = 'completed',
501
+ completed_at = now(),
502
+ output = complete_task.output
503
+ WHERE pgflow.step_tasks.run_id = complete_task.run_id
504
+ AND pgflow.step_tasks.step_slug = complete_task.step_slug
505
+ AND pgflow.step_tasks.task_index = complete_task.task_index
506
+ AND pgflow.step_tasks.status = 'started'
507
+ RETURNING *
508
+ ),
509
+ -- ---------- Step state update ----------
510
+ -- Decrement remaining_tasks and potentially mark step as completed
511
+ step_state AS (
512
+ UPDATE pgflow.step_states
513
+ SET
514
+ status = CASE
515
+ WHEN pgflow.step_states.remaining_tasks = 1 THEN 'completed' -- Will be 0 after decrement
516
+ ELSE 'started'
517
+ END,
518
+ completed_at = CASE
519
+ WHEN pgflow.step_states.remaining_tasks = 1 THEN now() -- Will be 0 after decrement
520
+ ELSE NULL
521
+ END,
522
+ remaining_tasks = pgflow.step_states.remaining_tasks - 1
523
+ FROM task
524
+ WHERE pgflow.step_states.run_id = complete_task.run_id
525
+ AND pgflow.step_states.step_slug = complete_task.step_slug
526
+ RETURNING pgflow.step_states.*
527
+ ),
528
+ -- ---------- Dependency resolution ----------
529
+ -- Find all child steps that depend on the completed parent step (only if parent completed)
530
+ child_steps AS (
531
+ SELECT deps.step_slug AS child_step_slug
532
+ FROM pgflow.deps deps
533
+ JOIN step_state parent_state ON parent_state.status = 'completed' AND deps.flow_slug = parent_state.flow_slug
534
+ WHERE deps.dep_slug = complete_task.step_slug -- dep_slug is the parent, step_slug is the child
535
+ ORDER BY deps.step_slug -- Ensure consistent ordering
536
+ ),
537
+ -- ---------- Lock child steps ----------
538
+ -- Acquire locks on all child steps before updating them
539
+ child_steps_lock AS (
540
+ SELECT * FROM pgflow.step_states
541
+ WHERE pgflow.step_states.run_id = complete_task.run_id
542
+ AND pgflow.step_states.step_slug IN (SELECT child_step_slug FROM child_steps)
543
+ FOR UPDATE
544
+ ),
545
+ -- ---------- Update child steps ----------
546
+ -- Decrement remaining_deps and resolve NULL initial_tasks for map steps
547
+ child_steps_update AS (
548
+ UPDATE pgflow.step_states child_state
549
+ SET remaining_deps = child_state.remaining_deps - 1,
550
+ -- Resolve NULL initial_tasks for child map steps
551
+ -- This is where child maps learn their array size from the parent
552
+ -- This CTE only runs when the parent step is complete (see child_steps JOIN)
553
+ initial_tasks = CASE
554
+ WHEN child_step.step_type = 'map' AND child_state.initial_tasks IS NULL THEN
555
+ CASE
556
+ WHEN parent_step.step_type = 'map' THEN
557
+ -- Map->map: Count all completed tasks from parent map
558
+ -- We add 1 because the current task is being completed in this transaction
559
+ -- but isn't yet visible as 'completed' in the step_tasks table
560
+ -- TODO: Refactor to use future column step_states.total_tasks
561
+ -- Would eliminate the COUNT query and just use parent_state.total_tasks
562
+ (SELECT COUNT(*)::int + 1
563
+ FROM pgflow.step_tasks parent_tasks
564
+ WHERE parent_tasks.run_id = complete_task.run_id
565
+ AND parent_tasks.step_slug = complete_task.step_slug
566
+ AND parent_tasks.status = 'completed'
567
+ AND parent_tasks.task_index != complete_task.task_index)
568
+ ELSE
569
+ -- Single->map: Use output array length (single steps complete immediately)
570
+ CASE
571
+ WHEN complete_task.output IS NOT NULL
572
+ AND jsonb_typeof(complete_task.output) = 'array' THEN
573
+ jsonb_array_length(complete_task.output)
574
+ ELSE NULL -- Keep NULL if not an array
575
+ END
576
+ END
577
+ ELSE child_state.initial_tasks -- Keep existing value (including NULL)
578
+ END
579
+ FROM child_steps children
580
+ JOIN pgflow.steps child_step ON child_step.flow_slug = (SELECT r.flow_slug FROM pgflow.runs r WHERE r.run_id = complete_task.run_id)
581
+ AND child_step.step_slug = children.child_step_slug
582
+ JOIN pgflow.steps parent_step ON parent_step.flow_slug = (SELECT r.flow_slug FROM pgflow.runs r WHERE r.run_id = complete_task.run_id)
583
+ AND parent_step.step_slug = complete_task.step_slug
584
+ WHERE child_state.run_id = complete_task.run_id
585
+ AND child_state.step_slug = children.child_step_slug
586
+ )
587
+ -- ---------- Update run remaining_steps ----------
588
+ -- Decrement the run's remaining_steps counter if step completed
589
+ UPDATE pgflow.runs
590
+ SET remaining_steps = pgflow.runs.remaining_steps - 1
591
+ FROM step_state
592
+ WHERE pgflow.runs.run_id = complete_task.run_id
593
+ AND step_state.status = 'completed';
594
+
595
+ -- ==========================================
596
+ -- POST-COMPLETION ACTIONS
597
+ -- ==========================================
598
+
599
+ -- ---------- Get updated state for broadcasting ----------
600
+ SELECT * INTO v_step_state FROM pgflow.step_states
601
+ WHERE pgflow.step_states.run_id = complete_task.run_id AND pgflow.step_states.step_slug = complete_task.step_slug;
602
+
603
+ -- ---------- Handle step completion ----------
604
+ IF v_step_state.status = 'completed' THEN
605
+ -- Cascade complete any taskless steps that are now ready
606
+ PERFORM pgflow.cascade_complete_taskless_steps(complete_task.run_id);
607
+
608
+ -- Broadcast step:completed event
609
+ -- For map steps, aggregate all task outputs; for single steps, use the task output
610
+ PERFORM realtime.send(
611
+ jsonb_build_object(
612
+ 'event_type', 'step:completed',
613
+ 'run_id', complete_task.run_id,
614
+ 'step_slug', complete_task.step_slug,
615
+ 'status', 'completed',
616
+ 'output', CASE
617
+ WHEN (SELECT s.step_type FROM pgflow.steps s
618
+ WHERE s.flow_slug = v_step_state.flow_slug
619
+ AND s.step_slug = complete_task.step_slug) = 'map' THEN
620
+ -- Aggregate all task outputs for map steps
621
+ (SELECT COALESCE(jsonb_agg(st.output ORDER BY st.task_index), '[]'::jsonb)
622
+ FROM pgflow.step_tasks st
623
+ WHERE st.run_id = complete_task.run_id
624
+ AND st.step_slug = complete_task.step_slug
625
+ AND st.status = 'completed')
626
+ ELSE
627
+ -- Single step: use the individual task output
628
+ complete_task.output
629
+ END,
630
+ 'completed_at', v_step_state.completed_at
631
+ ),
632
+ concat('step:', complete_task.step_slug, ':completed'),
633
+ concat('pgflow:run:', complete_task.run_id),
634
+ false
635
+ );
636
+ END IF;
637
+
638
+ -- ---------- Archive completed task message ----------
639
+ -- Move message from active queue to archive table
640
+ PERFORM (
641
+ WITH completed_tasks AS (
642
+ SELECT r.flow_slug, st.message_id
643
+ FROM pgflow.step_tasks st
644
+ JOIN pgflow.runs r ON st.run_id = r.run_id
645
+ WHERE st.run_id = complete_task.run_id
646
+ AND st.step_slug = complete_task.step_slug
647
+ AND st.task_index = complete_task.task_index
648
+ AND st.status = 'completed'
649
+ )
650
+ SELECT pgmq.archive(ct.flow_slug, ct.message_id)
651
+ FROM completed_tasks ct
652
+ WHERE EXISTS (SELECT 1 FROM completed_tasks)
653
+ );
654
+
655
+ -- ---------- Trigger next steps ----------
656
+ -- Start any steps that are now ready (deps satisfied)
657
+ PERFORM pgflow.start_ready_steps(complete_task.run_id);
658
+
659
+ -- Check if the entire run is complete
660
+ PERFORM pgflow.maybe_complete_run(complete_task.run_id);
661
+
662
+ -- ---------- Return completed task ----------
663
+ RETURN QUERY SELECT *
664
+ FROM pgflow.step_tasks AS step_task
665
+ WHERE step_task.run_id = complete_task.run_id
666
+ AND step_task.step_slug = complete_task.step_slug
667
+ AND step_task.task_index = complete_task.task_index;
668
+
669
+ end;
670
+ $$;
671
+ -- Modify "fail_task" function
672
+ CREATE OR REPLACE FUNCTION "pgflow"."fail_task" ("run_id" uuid, "step_slug" text, "task_index" integer, "error_message" text) RETURNS SETOF "pgflow"."step_tasks" LANGUAGE plpgsql SET "search_path" = '' AS $$
673
+ DECLARE
674
+ v_run_failed boolean;
675
+ v_step_failed boolean;
676
+ begin
677
+
678
+ -- If run is already failed, no retries allowed
679
+ IF EXISTS (SELECT 1 FROM pgflow.runs WHERE pgflow.runs.run_id = fail_task.run_id AND pgflow.runs.status = 'failed') THEN
680
+ UPDATE pgflow.step_tasks
681
+ SET status = 'failed',
682
+ failed_at = now(),
683
+ error_message = fail_task.error_message
684
+ WHERE pgflow.step_tasks.run_id = fail_task.run_id
685
+ AND pgflow.step_tasks.step_slug = fail_task.step_slug
686
+ AND pgflow.step_tasks.task_index = fail_task.task_index
687
+ AND pgflow.step_tasks.status = 'started';
688
+
689
+ -- Archive the task's message
690
+ PERFORM pgmq.archive(r.flow_slug, ARRAY_AGG(st.message_id))
691
+ FROM pgflow.step_tasks st
692
+ JOIN pgflow.runs r ON st.run_id = r.run_id
693
+ WHERE st.run_id = fail_task.run_id
694
+ AND st.step_slug = fail_task.step_slug
695
+ AND st.task_index = fail_task.task_index
696
+ AND st.message_id IS NOT NULL
697
+ GROUP BY r.flow_slug
698
+ HAVING COUNT(st.message_id) > 0;
699
+
700
+ RETURN QUERY SELECT * FROM pgflow.step_tasks
701
+ WHERE pgflow.step_tasks.run_id = fail_task.run_id
702
+ AND pgflow.step_tasks.step_slug = fail_task.step_slug
703
+ AND pgflow.step_tasks.task_index = fail_task.task_index;
704
+ RETURN;
705
+ END IF;
706
+
707
+ WITH run_lock AS (
708
+ SELECT * FROM pgflow.runs
709
+ WHERE pgflow.runs.run_id = fail_task.run_id
710
+ FOR UPDATE
711
+ ),
712
+ step_lock AS (
713
+ SELECT * FROM pgflow.step_states
714
+ WHERE pgflow.step_states.run_id = fail_task.run_id
715
+ AND pgflow.step_states.step_slug = fail_task.step_slug
716
+ FOR UPDATE
717
+ ),
718
+ flow_info AS (
719
+ SELECT r.flow_slug
720
+ FROM pgflow.runs r
721
+ WHERE r.run_id = fail_task.run_id
722
+ ),
723
+ config AS (
724
+ SELECT
725
+ COALESCE(s.opt_max_attempts, f.opt_max_attempts) AS opt_max_attempts,
726
+ COALESCE(s.opt_base_delay, f.opt_base_delay) AS opt_base_delay
727
+ FROM pgflow.steps s
728
+ JOIN pgflow.flows f ON f.flow_slug = s.flow_slug
729
+ JOIN flow_info fi ON fi.flow_slug = s.flow_slug
730
+ WHERE s.flow_slug = fi.flow_slug AND s.step_slug = fail_task.step_slug
731
+ ),
732
+ fail_or_retry_task as (
733
+ UPDATE pgflow.step_tasks as task
734
+ SET
735
+ status = CASE
736
+ WHEN task.attempts_count < (SELECT opt_max_attempts FROM config) THEN 'queued'
737
+ ELSE 'failed'
738
+ END,
739
+ failed_at = CASE
740
+ WHEN task.attempts_count >= (SELECT opt_max_attempts FROM config) THEN now()
741
+ ELSE NULL
742
+ END,
743
+ started_at = CASE
744
+ WHEN task.attempts_count < (SELECT opt_max_attempts FROM config) THEN NULL
745
+ ELSE task.started_at
746
+ END,
747
+ error_message = fail_task.error_message
748
+ WHERE task.run_id = fail_task.run_id
749
+ AND task.step_slug = fail_task.step_slug
750
+ AND task.task_index = fail_task.task_index
751
+ AND task.status = 'started'
752
+ RETURNING *
753
+ ),
754
+ maybe_fail_step AS (
755
+ UPDATE pgflow.step_states
756
+ SET
757
+ status = CASE
758
+ WHEN (select fail_or_retry_task.status from fail_or_retry_task) = 'failed' THEN 'failed'
759
+ ELSE pgflow.step_states.status
760
+ END,
761
+ failed_at = CASE
762
+ WHEN (select fail_or_retry_task.status from fail_or_retry_task) = 'failed' THEN now()
763
+ ELSE NULL
764
+ END,
765
+ error_message = CASE
766
+ WHEN (select fail_or_retry_task.status from fail_or_retry_task) = 'failed' THEN fail_task.error_message
767
+ ELSE NULL
768
+ END
769
+ FROM fail_or_retry_task
770
+ WHERE pgflow.step_states.run_id = fail_task.run_id
771
+ AND pgflow.step_states.step_slug = fail_task.step_slug
772
+ RETURNING pgflow.step_states.*
773
+ )
774
+ -- Update run status
775
+ UPDATE pgflow.runs
776
+ SET status = CASE
777
+ WHEN (select status from maybe_fail_step) = 'failed' THEN 'failed'
778
+ ELSE status
779
+ END,
780
+ failed_at = CASE
781
+ WHEN (select status from maybe_fail_step) = 'failed' THEN now()
782
+ ELSE NULL
783
+ END
784
+ WHERE pgflow.runs.run_id = fail_task.run_id
785
+ RETURNING (status = 'failed') INTO v_run_failed;
786
+
787
+ -- Check if step failed by querying the step_states table
788
+ SELECT (status = 'failed') INTO v_step_failed
789
+ FROM pgflow.step_states
790
+ WHERE pgflow.step_states.run_id = fail_task.run_id
791
+ AND pgflow.step_states.step_slug = fail_task.step_slug;
792
+
793
+ -- Send broadcast event for step failure if the step was failed
794
+ IF v_step_failed THEN
795
+ PERFORM realtime.send(
796
+ jsonb_build_object(
797
+ 'event_type', 'step:failed',
798
+ 'run_id', fail_task.run_id,
799
+ 'step_slug', fail_task.step_slug,
800
+ 'status', 'failed',
801
+ 'error_message', fail_task.error_message,
802
+ 'failed_at', now()
803
+ ),
804
+ concat('step:', fail_task.step_slug, ':failed'),
805
+ concat('pgflow:run:', fail_task.run_id),
806
+ false
807
+ );
808
+ END IF;
809
+
810
+ -- Send broadcast event for run failure if the run was failed
811
+ IF v_run_failed THEN
812
+ DECLARE
813
+ v_flow_slug text;
814
+ BEGIN
815
+ SELECT flow_slug INTO v_flow_slug FROM pgflow.runs WHERE pgflow.runs.run_id = fail_task.run_id;
816
+
817
+ PERFORM realtime.send(
818
+ jsonb_build_object(
819
+ 'event_type', 'run:failed',
820
+ 'run_id', fail_task.run_id,
821
+ 'flow_slug', v_flow_slug,
822
+ 'status', 'failed',
823
+ 'error_message', fail_task.error_message,
824
+ 'failed_at', now()
825
+ ),
826
+ 'run:failed',
827
+ concat('pgflow:run:', fail_task.run_id),
828
+ false
829
+ );
830
+ END;
831
+ END IF;
832
+
833
+ -- Archive all active messages (both queued and started) when run fails
834
+ IF v_run_failed THEN
835
+ PERFORM pgmq.archive(r.flow_slug, ARRAY_AGG(st.message_id))
836
+ FROM pgflow.step_tasks st
837
+ JOIN pgflow.runs r ON st.run_id = r.run_id
838
+ WHERE st.run_id = fail_task.run_id
839
+ AND st.status IN ('queued', 'started')
840
+ AND st.message_id IS NOT NULL
841
+ GROUP BY r.flow_slug
842
+ HAVING COUNT(st.message_id) > 0;
843
+ END IF;
844
+
845
+ -- For queued tasks: delay the message for retry with exponential backoff
846
+ PERFORM (
847
+ WITH retry_config AS (
848
+ SELECT
849
+ COALESCE(s.opt_base_delay, f.opt_base_delay) AS base_delay
850
+ FROM pgflow.steps s
851
+ JOIN pgflow.flows f ON f.flow_slug = s.flow_slug
852
+ JOIN pgflow.runs r ON r.flow_slug = f.flow_slug
853
+ WHERE r.run_id = fail_task.run_id
854
+ AND s.step_slug = fail_task.step_slug
855
+ ),
856
+ queued_tasks AS (
857
+ SELECT
858
+ r.flow_slug,
859
+ st.message_id,
860
+ pgflow.calculate_retry_delay((SELECT base_delay FROM retry_config), st.attempts_count) AS calculated_delay
861
+ FROM pgflow.step_tasks st
862
+ JOIN pgflow.runs r ON st.run_id = r.run_id
863
+ WHERE st.run_id = fail_task.run_id
864
+ AND st.step_slug = fail_task.step_slug
865
+ AND st.task_index = fail_task.task_index
866
+ AND st.status = 'queued'
867
+ )
868
+ SELECT pgmq.set_vt(qt.flow_slug, qt.message_id, qt.calculated_delay)
869
+ FROM queued_tasks qt
870
+ WHERE EXISTS (SELECT 1 FROM queued_tasks)
871
+ );
872
+
873
+ -- For failed tasks: archive the message
874
+ PERFORM pgmq.archive(r.flow_slug, ARRAY_AGG(st.message_id))
875
+ FROM pgflow.step_tasks st
876
+ JOIN pgflow.runs r ON st.run_id = r.run_id
877
+ WHERE st.run_id = fail_task.run_id
878
+ AND st.step_slug = fail_task.step_slug
879
+ AND st.task_index = fail_task.task_index
880
+ AND st.status = 'failed'
881
+ AND st.message_id IS NOT NULL
882
+ GROUP BY r.flow_slug
883
+ HAVING COUNT(st.message_id) > 0;
884
+
885
+ return query select *
886
+ from pgflow.step_tasks st
887
+ where st.run_id = fail_task.run_id
888
+ and st.step_slug = fail_task.step_slug
889
+ and st.task_index = fail_task.task_index;
890
+
891
+ end;
892
+ $$;
893
+ -- Modify "start_flow" function
894
+ CREATE OR REPLACE FUNCTION "pgflow"."start_flow" ("flow_slug" text, "input" jsonb, "run_id" uuid DEFAULT NULL::uuid) RETURNS SETOF "pgflow"."runs" LANGUAGE plpgsql SET "search_path" = '' AS $$
895
+ declare
896
+ v_created_run pgflow.runs%ROWTYPE;
897
+ v_root_map_count int;
898
+ begin
899
+
900
+ -- ==========================================
901
+ -- VALIDATION: Root map array input
902
+ -- ==========================================
903
+ WITH root_maps AS (
904
+ SELECT step_slug
905
+ FROM pgflow.steps
906
+ WHERE steps.flow_slug = start_flow.flow_slug
907
+ AND steps.step_type = 'map'
908
+ AND steps.deps_count = 0
909
+ )
910
+ SELECT COUNT(*) INTO v_root_map_count FROM root_maps;
911
+
912
+ -- If we have root map steps, validate that input is an array
913
+ IF v_root_map_count > 0 THEN
914
+ -- First check for NULL (should be caught by NOT NULL constraint, but be defensive)
915
+ IF start_flow.input IS NULL THEN
916
+ RAISE EXCEPTION 'Flow % has root map steps but input is NULL', start_flow.flow_slug;
917
+ END IF;
918
+
919
+ -- Then check if it's not an array
920
+ IF jsonb_typeof(start_flow.input) != 'array' THEN
921
+ RAISE EXCEPTION 'Flow % has root map steps but input is not an array (got %)',
922
+ start_flow.flow_slug, jsonb_typeof(start_flow.input);
923
+ END IF;
924
+ END IF;
925
+
926
+ -- ==========================================
927
+ -- MAIN CTE CHAIN: Create run and step states
928
+ -- ==========================================
929
+ WITH
930
+ -- ---------- Gather flow metadata ----------
931
+ flow_steps AS (
932
+ SELECT steps.flow_slug, steps.step_slug, steps.step_type, steps.deps_count
933
+ FROM pgflow.steps
934
+ WHERE steps.flow_slug = start_flow.flow_slug
935
+ ),
936
+ -- ---------- Create run record ----------
937
+ created_run AS (
938
+ INSERT INTO pgflow.runs (run_id, flow_slug, input, remaining_steps)
939
+ VALUES (
940
+ COALESCE(start_flow.run_id, gen_random_uuid()),
941
+ start_flow.flow_slug,
942
+ start_flow.input,
943
+ (SELECT count(*) FROM flow_steps)
944
+ )
945
+ RETURNING *
946
+ ),
947
+ -- ---------- Create step states ----------
948
+ -- Sets initial_tasks: known for root maps, NULL for dependent maps
949
+ created_step_states AS (
950
+ INSERT INTO pgflow.step_states (flow_slug, run_id, step_slug, remaining_deps, initial_tasks)
951
+ SELECT
952
+ fs.flow_slug,
953
+ (SELECT created_run.run_id FROM created_run),
954
+ fs.step_slug,
955
+ fs.deps_count,
956
+ -- Updated logic for initial_tasks:
957
+ CASE
958
+ WHEN fs.step_type = 'map' AND fs.deps_count = 0 THEN
959
+ -- Root map: get array length from input
960
+ CASE
961
+ WHEN jsonb_typeof(start_flow.input) = 'array' THEN
962
+ jsonb_array_length(start_flow.input)
963
+ ELSE
964
+ 1
965
+ END
966
+ WHEN fs.step_type = 'map' AND fs.deps_count > 0 THEN
967
+ -- Dependent map: unknown until dependencies complete
968
+ NULL
969
+ ELSE
970
+ -- Single steps: always 1 task
971
+ 1
972
+ END
973
+ FROM flow_steps fs
974
+ )
975
+ SELECT * FROM created_run INTO v_created_run;
976
+
977
+ -- ==========================================
978
+ -- POST-CREATION ACTIONS
979
+ -- ==========================================
980
+
981
+ -- ---------- Broadcast run:started event ----------
982
+ PERFORM realtime.send(
983
+ jsonb_build_object(
984
+ 'event_type', 'run:started',
985
+ 'run_id', v_created_run.run_id,
986
+ 'flow_slug', v_created_run.flow_slug,
987
+ 'input', v_created_run.input,
988
+ 'status', 'started',
989
+ 'remaining_steps', v_created_run.remaining_steps,
990
+ 'started_at', v_created_run.started_at
991
+ ),
992
+ 'run:started',
993
+ concat('pgflow:run:', v_created_run.run_id),
994
+ false
995
+ );
996
+
997
+ -- ---------- Complete taskless steps ----------
998
+ -- Handle empty array maps that should auto-complete
999
+ PERFORM pgflow.cascade_complete_taskless_steps(v_created_run.run_id);
1000
+
1001
+ -- ---------- Start initial steps ----------
1002
+ -- Start root steps (those with no dependencies)
1003
+ PERFORM pgflow.start_ready_steps(v_created_run.run_id);
1004
+
1005
+ -- ---------- Check for run completion ----------
1006
+ -- If cascade completed all steps (zero-task flows), finalize the run
1007
+ PERFORM pgflow.maybe_complete_run(v_created_run.run_id);
1008
+
1009
+ RETURN QUERY SELECT * FROM pgflow.runs where pgflow.runs.run_id = v_created_run.run_id;
1010
+
1011
+ end;
1012
+ $$;
1013
+ -- Modify "start_tasks" function
1014
+ CREATE OR REPLACE FUNCTION "pgflow"."start_tasks" ("flow_slug" text, "msg_ids" bigint[], "worker_id" uuid) RETURNS SETOF "pgflow"."step_task_record" LANGUAGE sql SET "search_path" = '' AS $$
1015
+ with tasks as (
1016
+ select
1017
+ task.flow_slug,
1018
+ task.run_id,
1019
+ task.step_slug,
1020
+ task.task_index,
1021
+ task.message_id
1022
+ from pgflow.step_tasks as task
1023
+ join pgflow.runs r on r.run_id = task.run_id
1024
+ where task.flow_slug = start_tasks.flow_slug
1025
+ and task.message_id = any(msg_ids)
1026
+ and task.status = 'queued'
1027
+ -- MVP: Don't start tasks on failed runs
1028
+ and r.status != 'failed'
1029
+ ),
1030
+ start_tasks_update as (
1031
+ update pgflow.step_tasks
1032
+ set
1033
+ attempts_count = attempts_count + 1,
1034
+ status = 'started',
1035
+ started_at = now(),
1036
+ last_worker_id = worker_id
1037
+ from tasks
1038
+ where step_tasks.message_id = tasks.message_id
1039
+ and step_tasks.flow_slug = tasks.flow_slug
1040
+ and step_tasks.status = 'queued'
1041
+ ),
1042
+ runs as (
1043
+ select
1044
+ r.run_id,
1045
+ r.input
1046
+ from pgflow.runs r
1047
+ where r.run_id in (select run_id from tasks)
1048
+ ),
1049
+ deps as (
1050
+ select
1051
+ st.run_id,
1052
+ st.step_slug,
1053
+ dep.dep_slug,
1054
+ -- Aggregate map outputs or use single output
1055
+ CASE
1056
+ WHEN dep_step.step_type = 'map' THEN
1057
+ -- Aggregate all task outputs ordered by task_index
1058
+ -- Use COALESCE to return empty array if no tasks
1059
+ (SELECT COALESCE(jsonb_agg(dt.output ORDER BY dt.task_index), '[]'::jsonb)
1060
+ FROM pgflow.step_tasks dt
1061
+ WHERE dt.run_id = st.run_id
1062
+ AND dt.step_slug = dep.dep_slug
1063
+ AND dt.status = 'completed')
1064
+ ELSE
1065
+ -- Single step: use the single task output
1066
+ dep_task.output
1067
+ END as dep_output
1068
+ from tasks st
1069
+ join pgflow.deps dep on dep.flow_slug = st.flow_slug and dep.step_slug = st.step_slug
1070
+ join pgflow.steps dep_step on dep_step.flow_slug = dep.flow_slug and dep_step.step_slug = dep.dep_slug
1071
+ left join pgflow.step_tasks dep_task on
1072
+ dep_task.run_id = st.run_id and
1073
+ dep_task.step_slug = dep.dep_slug and
1074
+ dep_task.status = 'completed'
1075
+ and dep_step.step_type = 'single' -- Only join for single steps
1076
+ ),
1077
+ deps_outputs as (
1078
+ select
1079
+ d.run_id,
1080
+ d.step_slug,
1081
+ jsonb_object_agg(d.dep_slug, d.dep_output) as deps_output,
1082
+ count(*) as dep_count
1083
+ from deps d
1084
+ group by d.run_id, d.step_slug
1085
+ ),
1086
+ timeouts as (
1087
+ select
1088
+ task.message_id,
1089
+ task.flow_slug,
1090
+ coalesce(step.opt_timeout, flow.opt_timeout) + 2 as vt_delay
1091
+ from tasks task
1092
+ join pgflow.flows flow on flow.flow_slug = task.flow_slug
1093
+ join pgflow.steps step on step.flow_slug = task.flow_slug and step.step_slug = task.step_slug
1094
+ ),
1095
+ -- Batch update visibility timeouts for all messages
1096
+ set_vt_batch as (
1097
+ select pgflow.set_vt_batch(
1098
+ start_tasks.flow_slug,
1099
+ array_agg(t.message_id order by t.message_id),
1100
+ array_agg(t.vt_delay order by t.message_id)
1101
+ )
1102
+ from timeouts t
1103
+ )
1104
+ select
1105
+ st.flow_slug,
1106
+ st.run_id,
1107
+ st.step_slug,
1108
+ -- ==========================================
1109
+ -- INPUT CONSTRUCTION LOGIC
1110
+ -- ==========================================
1111
+ -- This nested CASE statement determines how to construct the input
1112
+ -- for each task based on the step type (map vs non-map).
1113
+ --
1114
+ -- The fundamental difference:
1115
+ -- - Map steps: Receive RAW array elements (e.g., just 42 or "hello")
1116
+ -- - Non-map steps: Receive structured objects with named keys
1117
+ -- (e.g., {"run": {...}, "dependency1": {...}})
1118
+ -- ==========================================
1119
+ CASE
1120
+ -- -------------------- MAP STEPS --------------------
1121
+ -- Map steps process arrays element-by-element.
1122
+ -- Each task receives ONE element from the array at its task_index position.
1123
+ WHEN step.step_type = 'map' THEN
1124
+ -- Map steps get raw array elements without any wrapper object
1125
+ CASE
1126
+ -- ROOT MAP: Gets array from run input
1127
+ -- Example: run input = [1, 2, 3]
1128
+ -- task 0 gets: 1
1129
+ -- task 1 gets: 2
1130
+ -- task 2 gets: 3
1131
+ WHEN step.deps_count = 0 THEN
1132
+ -- Root map (deps_count = 0): no dependencies, reads from run input.
1133
+ -- Extract the element at task_index from the run's input array.
1134
+ -- Note: If run input is not an array, this will return NULL
1135
+ -- and the flow will fail (validated in start_flow).
1136
+ jsonb_array_element(r.input, st.task_index)
1137
+
1138
+ -- DEPENDENT MAP: Gets array from its single dependency
1139
+ -- Example: dependency output = ["a", "b", "c"]
1140
+ -- task 0 gets: "a"
1141
+ -- task 1 gets: "b"
1142
+ -- task 2 gets: "c"
1143
+ ELSE
1144
+ -- Has dependencies (should be exactly 1 for map steps).
1145
+ -- Extract the element at task_index from the dependency's output array.
1146
+ --
1147
+ -- Why the subquery with jsonb_each?
1148
+ -- - The dependency outputs a raw array: [1, 2, 3]
1149
+ -- - deps_outputs aggregates it into: {"dep_name": [1, 2, 3]}
1150
+ -- - We need to unwrap and get just the array value
1151
+ -- - Map steps have exactly 1 dependency (enforced by add_step)
1152
+ -- - So jsonb_each will return exactly 1 row
1153
+ -- - We extract the 'value' which is the raw array [1, 2, 3]
1154
+ -- - Then get the element at task_index from that array
1155
+ (SELECT jsonb_array_element(value, st.task_index)
1156
+ FROM jsonb_each(dep_out.deps_output)
1157
+ LIMIT 1)
1158
+ END
1159
+
1160
+ -- -------------------- NON-MAP STEPS --------------------
1161
+ -- Regular (non-map) steps receive ALL inputs as a structured object.
1162
+ -- This includes the original run input plus all dependency outputs.
1163
+ ELSE
1164
+ -- Non-map steps get structured input with named keys
1165
+ -- Example output: {
1166
+ -- "run": {"original": "input"},
1167
+ -- "step1": {"output": "from_step1"},
1168
+ -- "step2": {"output": "from_step2"}
1169
+ -- }
1170
+ --
1171
+ -- Build object with 'run' key containing original input
1172
+ jsonb_build_object('run', r.input) ||
1173
+ -- Merge with deps_output which already has dependency outputs
1174
+ -- deps_output format: {"dep1": output1, "dep2": output2, ...}
1175
+ -- If no dependencies, defaults to empty object
1176
+ coalesce(dep_out.deps_output, '{}'::jsonb)
1177
+ END as input,
1178
+ st.message_id as msg_id,
1179
+ st.task_index as task_index
1180
+ from tasks st
1181
+ join runs r on st.run_id = r.run_id
1182
+ join pgflow.steps step on
1183
+ step.flow_slug = st.flow_slug and
1184
+ step.step_slug = st.step_slug
1185
+ left join deps_outputs dep_out on
1186
+ dep_out.run_id = st.run_id and
1187
+ dep_out.step_slug = st.step_slug
1188
+ $$;
1189
+ -- Create "add_step" function
1190
+ CREATE FUNCTION "pgflow"."add_step" ("flow_slug" text, "step_slug" text, "deps_slugs" text[] DEFAULT '{}', "max_attempts" integer DEFAULT NULL::integer, "base_delay" integer DEFAULT NULL::integer, "timeout" integer DEFAULT NULL::integer, "start_delay" integer DEFAULT NULL::integer, "step_type" text DEFAULT 'single') RETURNS "pgflow"."steps" LANGUAGE plpgsql SET "search_path" = '' AS $$
1191
+ DECLARE
1192
+ result_step pgflow.steps;
1193
+ next_idx int;
1194
+ BEGIN
1195
+ -- Validate map step constraints
1196
+ -- Map steps can have either:
1197
+ -- 0 dependencies (root map - maps over flow input array)
1198
+ -- 1 dependency (dependent map - maps over dependency output array)
1199
+ IF COALESCE(add_step.step_type, 'single') = 'map' AND COALESCE(array_length(add_step.deps_slugs, 1), 0) > 1 THEN
1200
+ RAISE EXCEPTION 'Map step "%" can have at most one dependency, but % were provided: %',
1201
+ add_step.step_slug,
1202
+ COALESCE(array_length(add_step.deps_slugs, 1), 0),
1203
+ array_to_string(add_step.deps_slugs, ', ');
1204
+ END IF;
1205
+
1206
+ -- Get next step index
1207
+ SELECT COALESCE(MAX(s.step_index) + 1, 0) INTO next_idx
1208
+ FROM pgflow.steps s
1209
+ WHERE s.flow_slug = add_step.flow_slug;
1210
+
1211
+ -- Create the step
1212
+ INSERT INTO pgflow.steps (
1213
+ flow_slug, step_slug, step_type, step_index, deps_count,
1214
+ opt_max_attempts, opt_base_delay, opt_timeout, opt_start_delay
1215
+ )
1216
+ VALUES (
1217
+ add_step.flow_slug,
1218
+ add_step.step_slug,
1219
+ COALESCE(add_step.step_type, 'single'),
1220
+ next_idx,
1221
+ COALESCE(array_length(add_step.deps_slugs, 1), 0),
1222
+ add_step.max_attempts,
1223
+ add_step.base_delay,
1224
+ add_step.timeout,
1225
+ add_step.start_delay
1226
+ )
1227
+ ON CONFLICT ON CONSTRAINT steps_pkey
1228
+ DO UPDATE SET step_slug = EXCLUDED.step_slug
1229
+ RETURNING * INTO result_step;
1230
+
1231
+ -- Insert dependencies
1232
+ INSERT INTO pgflow.deps (flow_slug, dep_slug, step_slug)
1233
+ SELECT add_step.flow_slug, d.dep_slug, add_step.step_slug
1234
+ FROM unnest(COALESCE(add_step.deps_slugs, '{}')) AS d(dep_slug)
1235
+ WHERE add_step.deps_slugs IS NOT NULL AND array_length(add_step.deps_slugs, 1) > 0
1236
+ ON CONFLICT ON CONSTRAINT deps_pkey DO NOTHING;
1237
+
1238
+ RETURN result_step;
1239
+ END;
1240
+ $$;
1241
+ -- Drop "add_step" function
1242
+ DROP FUNCTION "pgflow"."add_step" (text, text, integer, integer, integer, integer);
1243
+ -- Drop "add_step" function
1244
+ DROP FUNCTION "pgflow"."add_step" (text, text, text[], integer, integer, integer, integer);