@pgflow/core 0.0.0-array-map-steps-302d00a8-20250925065142 → 0.0.0-array-map-steps-b956f8f9-20251006084236

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,157 +0,0 @@
1
- -- Modify "start_tasks" function
2
- CREATE OR REPLACE FUNCTION "pgflow"."start_tasks" ("flow_slug" text, "msg_ids" bigint[], "worker_id" uuid) RETURNS SETOF "pgflow"."step_task_record" LANGUAGE sql SET "search_path" = '' AS $$
3
- with tasks as (
4
- select
5
- task.flow_slug,
6
- task.run_id,
7
- task.step_slug,
8
- task.task_index,
9
- task.message_id
10
- from pgflow.step_tasks as task
11
- where task.flow_slug = start_tasks.flow_slug
12
- and task.message_id = any(msg_ids)
13
- and task.status = 'queued'
14
- ),
15
- start_tasks_update as (
16
- update pgflow.step_tasks
17
- set
18
- attempts_count = attempts_count + 1,
19
- status = 'started',
20
- started_at = now(),
21
- last_worker_id = worker_id
22
- from tasks
23
- where step_tasks.message_id = tasks.message_id
24
- and step_tasks.flow_slug = tasks.flow_slug
25
- and step_tasks.status = 'queued'
26
- ),
27
- runs as (
28
- select
29
- r.run_id,
30
- r.input
31
- from pgflow.runs r
32
- where r.run_id in (select run_id from tasks)
33
- ),
34
- deps as (
35
- select
36
- st.run_id,
37
- st.step_slug,
38
- dep.dep_slug,
39
- dep_task.output as dep_output
40
- from tasks st
41
- join pgflow.deps dep on dep.flow_slug = st.flow_slug and dep.step_slug = st.step_slug
42
- join pgflow.step_tasks dep_task on
43
- dep_task.run_id = st.run_id and
44
- dep_task.step_slug = dep.dep_slug and
45
- dep_task.status = 'completed'
46
- ),
47
- deps_outputs as (
48
- select
49
- d.run_id,
50
- d.step_slug,
51
- jsonb_object_agg(d.dep_slug, d.dep_output) as deps_output,
52
- count(*) as dep_count
53
- from deps d
54
- group by d.run_id, d.step_slug
55
- ),
56
- timeouts as (
57
- select
58
- task.message_id,
59
- task.flow_slug,
60
- coalesce(step.opt_timeout, flow.opt_timeout) + 2 as vt_delay
61
- from tasks task
62
- join pgflow.flows flow on flow.flow_slug = task.flow_slug
63
- join pgflow.steps step on step.flow_slug = task.flow_slug and step.step_slug = task.step_slug
64
- ),
65
- -- Batch update visibility timeouts for all messages
66
- set_vt_batch as (
67
- select pgflow.set_vt_batch(
68
- start_tasks.flow_slug,
69
- array_agg(t.message_id order by t.message_id),
70
- array_agg(t.vt_delay order by t.message_id)
71
- )
72
- from timeouts t
73
- )
74
- select
75
- st.flow_slug,
76
- st.run_id,
77
- st.step_slug,
78
- -- ==========================================
79
- -- INPUT CONSTRUCTION LOGIC
80
- -- ==========================================
81
- -- This nested CASE statement determines how to construct the input
82
- -- for each task based on the step type (map vs non-map).
83
- --
84
- -- The fundamental difference:
85
- -- - Map steps: Receive RAW array elements (e.g., just 42 or "hello")
86
- -- - Non-map steps: Receive structured objects with named keys
87
- -- (e.g., {"run": {...}, "dependency1": {...}})
88
- -- ==========================================
89
- CASE
90
- -- -------------------- MAP STEPS --------------------
91
- -- Map steps process arrays element-by-element.
92
- -- Each task receives ONE element from the array at its task_index position.
93
- WHEN step.step_type = 'map' THEN
94
- -- Map steps get raw array elements without any wrapper object
95
- CASE
96
- -- ROOT MAP: Gets array from run input
97
- -- Example: run input = [1, 2, 3]
98
- -- task 0 gets: 1
99
- -- task 1 gets: 2
100
- -- task 2 gets: 3
101
- WHEN step.deps_count = 0 THEN
102
- -- Root map (deps_count = 0): no dependencies, reads from run input.
103
- -- Extract the element at task_index from the run's input array.
104
- -- Note: If run input is not an array, this will return NULL
105
- -- and the flow will fail (validated in start_flow).
106
- jsonb_array_element(r.input, st.task_index)
107
-
108
- -- DEPENDENT MAP: Gets array from its single dependency
109
- -- Example: dependency output = ["a", "b", "c"]
110
- -- task 0 gets: "a"
111
- -- task 1 gets: "b"
112
- -- task 2 gets: "c"
113
- ELSE
114
- -- Has dependencies (should be exactly 1 for map steps).
115
- -- Extract the element at task_index from the dependency's output array.
116
- --
117
- -- Why the subquery with jsonb_each?
118
- -- - The dependency outputs a raw array: [1, 2, 3]
119
- -- - deps_outputs aggregates it into: {"dep_name": [1, 2, 3]}
120
- -- - We need to unwrap and get just the array value
121
- -- - Map steps have exactly 1 dependency (enforced by add_step)
122
- -- - So jsonb_each will return exactly 1 row
123
- -- - We extract the 'value' which is the raw array [1, 2, 3]
124
- -- - Then get the element at task_index from that array
125
- (SELECT jsonb_array_element(value, st.task_index)
126
- FROM jsonb_each(dep_out.deps_output)
127
- LIMIT 1)
128
- END
129
-
130
- -- -------------------- NON-MAP STEPS --------------------
131
- -- Regular (non-map) steps receive ALL inputs as a structured object.
132
- -- This includes the original run input plus all dependency outputs.
133
- ELSE
134
- -- Non-map steps get structured input with named keys
135
- -- Example output: {
136
- -- "run": {"original": "input"},
137
- -- "step1": {"output": "from_step1"},
138
- -- "step2": {"output": "from_step2"}
139
- -- }
140
- --
141
- -- Build object with 'run' key containing original input
142
- jsonb_build_object('run', r.input) ||
143
- -- Merge with deps_output which already has dependency outputs
144
- -- deps_output format: {"dep1": output1, "dep2": output2, ...}
145
- -- If no dependencies, defaults to empty object
146
- coalesce(dep_out.deps_output, '{}'::jsonb)
147
- END as input,
148
- st.message_id as msg_id
149
- from tasks st
150
- join runs r on st.run_id = r.run_id
151
- join pgflow.steps step on
152
- step.flow_slug = st.flow_slug and
153
- step.step_slug = st.step_slug
154
- left join deps_outputs dep_out on
155
- dep_out.run_id = st.run_id and
156
- dep_out.step_slug = st.step_slug
157
- $$;
@@ -1,489 +0,0 @@
1
- -- Modify "maybe_complete_run" function
2
- CREATE OR REPLACE FUNCTION "pgflow"."maybe_complete_run" ("run_id" uuid) RETURNS void LANGUAGE plpgsql SET "search_path" = '' AS $$
3
- declare
4
- v_completed_run pgflow.runs%ROWTYPE;
5
- begin
6
- -- ==========================================
7
- -- CHECK AND COMPLETE RUN IF FINISHED
8
- -- ==========================================
9
- -- ---------- Complete run if all steps done ----------
10
- UPDATE pgflow.runs
11
- SET
12
- status = 'completed',
13
- completed_at = now(),
14
- -- Only compute expensive aggregation when actually completing the run
15
- output = (
16
- -- ---------- Gather outputs from leaf steps ----------
17
- -- Leaf steps = steps with no dependents
18
- -- For map steps: aggregate all task outputs into array
19
- -- For single steps: use the single task output
20
- SELECT jsonb_object_agg(
21
- step_slug,
22
- CASE
23
- WHEN step_type = 'map' THEN aggregated_output
24
- ELSE single_output
25
- END
26
- )
27
- FROM (
28
- SELECT DISTINCT
29
- leaf_state.step_slug,
30
- leaf_step.step_type,
31
- -- For map steps: aggregate all task outputs
32
- CASE WHEN leaf_step.step_type = 'map' THEN
33
- (SELECT COALESCE(jsonb_agg(leaf_task.output ORDER BY leaf_task.task_index), '[]'::jsonb)
34
- FROM pgflow.step_tasks leaf_task
35
- WHERE leaf_task.run_id = leaf_state.run_id
36
- AND leaf_task.step_slug = leaf_state.step_slug
37
- AND leaf_task.status = 'completed')
38
- END as aggregated_output,
39
- -- For single steps: get the single output
40
- CASE WHEN leaf_step.step_type = 'single' THEN
41
- (SELECT leaf_task.output
42
- FROM pgflow.step_tasks leaf_task
43
- WHERE leaf_task.run_id = leaf_state.run_id
44
- AND leaf_task.step_slug = leaf_state.step_slug
45
- AND leaf_task.status = 'completed'
46
- LIMIT 1)
47
- END as single_output
48
- FROM pgflow.step_states leaf_state
49
- JOIN pgflow.steps leaf_step ON leaf_step.flow_slug = leaf_state.flow_slug AND leaf_step.step_slug = leaf_state.step_slug
50
- WHERE leaf_state.run_id = maybe_complete_run.run_id
51
- AND leaf_state.status = 'completed'
52
- AND NOT EXISTS (
53
- SELECT 1
54
- FROM pgflow.deps dep
55
- WHERE dep.flow_slug = leaf_state.flow_slug
56
- AND dep.dep_slug = leaf_state.step_slug
57
- )
58
- ) leaf_outputs
59
- )
60
- WHERE pgflow.runs.run_id = maybe_complete_run.run_id
61
- AND pgflow.runs.remaining_steps = 0
62
- AND pgflow.runs.status != 'completed'
63
- RETURNING * INTO v_completed_run;
64
-
65
- -- ==========================================
66
- -- BROADCAST COMPLETION EVENT
67
- -- ==========================================
68
- IF v_completed_run.run_id IS NOT NULL THEN
69
- PERFORM realtime.send(
70
- jsonb_build_object(
71
- 'event_type', 'run:completed',
72
- 'run_id', v_completed_run.run_id,
73
- 'flow_slug', v_completed_run.flow_slug,
74
- 'status', 'completed',
75
- 'output', v_completed_run.output,
76
- 'completed_at', v_completed_run.completed_at
77
- ),
78
- 'run:completed',
79
- concat('pgflow:run:', v_completed_run.run_id),
80
- false
81
- );
82
- END IF;
83
- end;
84
- $$;
85
- -- Modify "complete_task" function
86
- CREATE OR REPLACE FUNCTION "pgflow"."complete_task" ("run_id" uuid, "step_slug" text, "task_index" integer, "output" jsonb) RETURNS SETOF "pgflow"."step_tasks" LANGUAGE plpgsql SET "search_path" = '' AS $$
87
- declare
88
- v_step_state pgflow.step_states%ROWTYPE;
89
- v_dependent_map_slug text;
90
- begin
91
-
92
- -- ==========================================
93
- -- VALIDATION: Array output for dependent maps
94
- -- ==========================================
95
- -- Must happen BEFORE acquiring locks to fail fast without holding resources
96
- -- Only validate for single steps - map steps produce scalars that get aggregated
97
- SELECT child_step.step_slug INTO v_dependent_map_slug
98
- FROM pgflow.deps dependency
99
- JOIN pgflow.steps child_step ON child_step.flow_slug = dependency.flow_slug
100
- AND child_step.step_slug = dependency.step_slug
101
- JOIN pgflow.steps parent_step ON parent_step.flow_slug = dependency.flow_slug
102
- AND parent_step.step_slug = dependency.dep_slug
103
- JOIN pgflow.step_states child_state ON child_state.flow_slug = child_step.flow_slug
104
- AND child_state.step_slug = child_step.step_slug
105
- WHERE dependency.dep_slug = complete_task.step_slug -- parent is the completing step
106
- AND dependency.flow_slug = (SELECT r.flow_slug FROM pgflow.runs r WHERE r.run_id = complete_task.run_id)
107
- AND parent_step.step_type = 'single' -- Only validate single steps
108
- AND child_step.step_type = 'map'
109
- AND child_state.run_id = complete_task.run_id
110
- AND child_state.initial_tasks IS NULL
111
- AND (complete_task.output IS NULL OR jsonb_typeof(complete_task.output) != 'array')
112
- LIMIT 1;
113
-
114
- IF v_dependent_map_slug IS NOT NULL THEN
115
- RAISE EXCEPTION 'Map step % expects array input but dependency % produced % (output: %)',
116
- v_dependent_map_slug,
117
- complete_task.step_slug,
118
- CASE WHEN complete_task.output IS NULL THEN 'null' ELSE jsonb_typeof(complete_task.output) END,
119
- complete_task.output;
120
- END IF;
121
-
122
- -- ==========================================
123
- -- MAIN CTE CHAIN: Update task and propagate changes
124
- -- ==========================================
125
- WITH
126
- -- ---------- Lock acquisition ----------
127
- -- Acquire locks in consistent order (run -> step) to prevent deadlocks
128
- run_lock AS (
129
- SELECT * FROM pgflow.runs
130
- WHERE pgflow.runs.run_id = complete_task.run_id
131
- FOR UPDATE
132
- ),
133
- step_lock AS (
134
- SELECT * FROM pgflow.step_states
135
- WHERE pgflow.step_states.run_id = complete_task.run_id
136
- AND pgflow.step_states.step_slug = complete_task.step_slug
137
- FOR UPDATE
138
- ),
139
- -- ---------- Task completion ----------
140
- -- Update the task record with completion status and output
141
- task AS (
142
- UPDATE pgflow.step_tasks
143
- SET
144
- status = 'completed',
145
- completed_at = now(),
146
- output = complete_task.output
147
- WHERE pgflow.step_tasks.run_id = complete_task.run_id
148
- AND pgflow.step_tasks.step_slug = complete_task.step_slug
149
- AND pgflow.step_tasks.task_index = complete_task.task_index
150
- AND pgflow.step_tasks.status = 'started'
151
- RETURNING *
152
- ),
153
- -- ---------- Step state update ----------
154
- -- Decrement remaining_tasks and potentially mark step as completed
155
- step_state AS (
156
- UPDATE pgflow.step_states
157
- SET
158
- status = CASE
159
- WHEN pgflow.step_states.remaining_tasks = 1 THEN 'completed' -- Will be 0 after decrement
160
- ELSE 'started'
161
- END,
162
- completed_at = CASE
163
- WHEN pgflow.step_states.remaining_tasks = 1 THEN now() -- Will be 0 after decrement
164
- ELSE NULL
165
- END,
166
- remaining_tasks = pgflow.step_states.remaining_tasks - 1
167
- FROM task
168
- WHERE pgflow.step_states.run_id = complete_task.run_id
169
- AND pgflow.step_states.step_slug = complete_task.step_slug
170
- RETURNING pgflow.step_states.*
171
- ),
172
- -- ---------- Dependency resolution ----------
173
- -- Find all child steps that depend on the completed parent step (only if parent completed)
174
- child_steps AS (
175
- SELECT deps.step_slug AS child_step_slug
176
- FROM pgflow.deps deps
177
- JOIN step_state parent_state ON parent_state.status = 'completed' AND deps.flow_slug = parent_state.flow_slug
178
- WHERE deps.dep_slug = complete_task.step_slug -- dep_slug is the parent, step_slug is the child
179
- ORDER BY deps.step_slug -- Ensure consistent ordering
180
- ),
181
- -- ---------- Lock child steps ----------
182
- -- Acquire locks on all child steps before updating them
183
- child_steps_lock AS (
184
- SELECT * FROM pgflow.step_states
185
- WHERE pgflow.step_states.run_id = complete_task.run_id
186
- AND pgflow.step_states.step_slug IN (SELECT child_step_slug FROM child_steps)
187
- FOR UPDATE
188
- ),
189
- -- ---------- Update child steps ----------
190
- -- Decrement remaining_deps and resolve NULL initial_tasks for map steps
191
- child_steps_update AS (
192
- UPDATE pgflow.step_states child_state
193
- SET remaining_deps = child_state.remaining_deps - 1,
194
- -- Resolve NULL initial_tasks for child map steps
195
- -- This is where child maps learn their array size from the parent
196
- -- This CTE only runs when the parent step is complete (see child_steps JOIN)
197
- initial_tasks = CASE
198
- WHEN child_step.step_type = 'map' AND child_state.initial_tasks IS NULL THEN
199
- CASE
200
- WHEN parent_step.step_type = 'map' THEN
201
- -- Map->map: Count all completed tasks from parent map
202
- -- We add 1 because the current task is being completed in this transaction
203
- -- but isn't yet visible as 'completed' in the step_tasks table
204
- -- TODO: Refactor to use future column step_states.total_tasks
205
- -- Would eliminate the COUNT query and just use parent_state.total_tasks
206
- (SELECT COUNT(*)::int + 1
207
- FROM pgflow.step_tasks parent_tasks
208
- WHERE parent_tasks.run_id = complete_task.run_id
209
- AND parent_tasks.step_slug = complete_task.step_slug
210
- AND parent_tasks.status = 'completed'
211
- AND parent_tasks.task_index != complete_task.task_index)
212
- ELSE
213
- -- Single->map: Use output array length (single steps complete immediately)
214
- CASE
215
- WHEN complete_task.output IS NOT NULL
216
- AND jsonb_typeof(complete_task.output) = 'array' THEN
217
- jsonb_array_length(complete_task.output)
218
- ELSE NULL -- Keep NULL if not an array
219
- END
220
- END
221
- ELSE child_state.initial_tasks -- Keep existing value (including NULL)
222
- END
223
- FROM child_steps children
224
- JOIN pgflow.steps child_step ON child_step.flow_slug = (SELECT r.flow_slug FROM pgflow.runs r WHERE r.run_id = complete_task.run_id)
225
- AND child_step.step_slug = children.child_step_slug
226
- JOIN pgflow.steps parent_step ON parent_step.flow_slug = (SELECT r.flow_slug FROM pgflow.runs r WHERE r.run_id = complete_task.run_id)
227
- AND parent_step.step_slug = complete_task.step_slug
228
- WHERE child_state.run_id = complete_task.run_id
229
- AND child_state.step_slug = children.child_step_slug
230
- )
231
- -- ---------- Update run remaining_steps ----------
232
- -- Decrement the run's remaining_steps counter if step completed
233
- UPDATE pgflow.runs
234
- SET remaining_steps = pgflow.runs.remaining_steps - 1
235
- FROM step_state
236
- WHERE pgflow.runs.run_id = complete_task.run_id
237
- AND step_state.status = 'completed';
238
-
239
- -- ==========================================
240
- -- POST-COMPLETION ACTIONS
241
- -- ==========================================
242
-
243
- -- ---------- Get updated state for broadcasting ----------
244
- SELECT * INTO v_step_state FROM pgflow.step_states
245
- WHERE pgflow.step_states.run_id = complete_task.run_id AND pgflow.step_states.step_slug = complete_task.step_slug;
246
-
247
- -- ---------- Handle step completion ----------
248
- IF v_step_state.status = 'completed' THEN
249
- -- Cascade complete any taskless steps that are now ready
250
- PERFORM pgflow.cascade_complete_taskless_steps(complete_task.run_id);
251
-
252
- -- Broadcast step:completed event
253
- -- For map steps, aggregate all task outputs; for single steps, use the task output
254
- PERFORM realtime.send(
255
- jsonb_build_object(
256
- 'event_type', 'step:completed',
257
- 'run_id', complete_task.run_id,
258
- 'step_slug', complete_task.step_slug,
259
- 'status', 'completed',
260
- 'output', CASE
261
- WHEN (SELECT s.step_type FROM pgflow.steps s
262
- WHERE s.flow_slug = v_step_state.flow_slug
263
- AND s.step_slug = complete_task.step_slug) = 'map' THEN
264
- -- Aggregate all task outputs for map steps
265
- (SELECT COALESCE(jsonb_agg(st.output ORDER BY st.task_index), '[]'::jsonb)
266
- FROM pgflow.step_tasks st
267
- WHERE st.run_id = complete_task.run_id
268
- AND st.step_slug = complete_task.step_slug
269
- AND st.status = 'completed')
270
- ELSE
271
- -- Single step: use the individual task output
272
- complete_task.output
273
- END,
274
- 'completed_at', v_step_state.completed_at
275
- ),
276
- concat('step:', complete_task.step_slug, ':completed'),
277
- concat('pgflow:run:', complete_task.run_id),
278
- false
279
- );
280
- END IF;
281
-
282
- -- ---------- Archive completed task message ----------
283
- -- Move message from active queue to archive table
284
- PERFORM (
285
- WITH completed_tasks AS (
286
- SELECT r.flow_slug, st.message_id
287
- FROM pgflow.step_tasks st
288
- JOIN pgflow.runs r ON st.run_id = r.run_id
289
- WHERE st.run_id = complete_task.run_id
290
- AND st.step_slug = complete_task.step_slug
291
- AND st.task_index = complete_task.task_index
292
- AND st.status = 'completed'
293
- )
294
- SELECT pgmq.archive(ct.flow_slug, ct.message_id)
295
- FROM completed_tasks ct
296
- WHERE EXISTS (SELECT 1 FROM completed_tasks)
297
- );
298
-
299
- -- ---------- Trigger next steps ----------
300
- -- Start any steps that are now ready (deps satisfied)
301
- PERFORM pgflow.start_ready_steps(complete_task.run_id);
302
-
303
- -- Check if the entire run is complete
304
- PERFORM pgflow.maybe_complete_run(complete_task.run_id);
305
-
306
- -- ---------- Return completed task ----------
307
- RETURN QUERY SELECT *
308
- FROM pgflow.step_tasks AS step_task
309
- WHERE step_task.run_id = complete_task.run_id
310
- AND step_task.step_slug = complete_task.step_slug
311
- AND step_task.task_index = complete_task.task_index;
312
-
313
- end;
314
- $$;
315
- -- Modify "start_tasks" function
316
- CREATE OR REPLACE FUNCTION "pgflow"."start_tasks" ("flow_slug" text, "msg_ids" bigint[], "worker_id" uuid) RETURNS SETOF "pgflow"."step_task_record" LANGUAGE sql SET "search_path" = '' AS $$
317
- with tasks as (
318
- select
319
- task.flow_slug,
320
- task.run_id,
321
- task.step_slug,
322
- task.task_index,
323
- task.message_id
324
- from pgflow.step_tasks as task
325
- join pgflow.runs r on r.run_id = task.run_id
326
- where task.flow_slug = start_tasks.flow_slug
327
- and task.message_id = any(msg_ids)
328
- and task.status = 'queued'
329
- -- MVP: Don't start tasks on failed runs
330
- and r.status != 'failed'
331
- ),
332
- start_tasks_update as (
333
- update pgflow.step_tasks
334
- set
335
- attempts_count = attempts_count + 1,
336
- status = 'started',
337
- started_at = now(),
338
- last_worker_id = worker_id
339
- from tasks
340
- where step_tasks.message_id = tasks.message_id
341
- and step_tasks.flow_slug = tasks.flow_slug
342
- and step_tasks.status = 'queued'
343
- ),
344
- runs as (
345
- select
346
- r.run_id,
347
- r.input
348
- from pgflow.runs r
349
- where r.run_id in (select run_id from tasks)
350
- ),
351
- deps as (
352
- select
353
- st.run_id,
354
- st.step_slug,
355
- dep.dep_slug,
356
- -- Aggregate map outputs or use single output
357
- CASE
358
- WHEN dep_step.step_type = 'map' THEN
359
- -- Aggregate all task outputs ordered by task_index
360
- -- Use COALESCE to return empty array if no tasks
361
- (SELECT COALESCE(jsonb_agg(dt.output ORDER BY dt.task_index), '[]'::jsonb)
362
- FROM pgflow.step_tasks dt
363
- WHERE dt.run_id = st.run_id
364
- AND dt.step_slug = dep.dep_slug
365
- AND dt.status = 'completed')
366
- ELSE
367
- -- Single step: use the single task output
368
- dep_task.output
369
- END as dep_output
370
- from tasks st
371
- join pgflow.deps dep on dep.flow_slug = st.flow_slug and dep.step_slug = st.step_slug
372
- join pgflow.steps dep_step on dep_step.flow_slug = dep.flow_slug and dep_step.step_slug = dep.dep_slug
373
- left join pgflow.step_tasks dep_task on
374
- dep_task.run_id = st.run_id and
375
- dep_task.step_slug = dep.dep_slug and
376
- dep_task.status = 'completed'
377
- and dep_step.step_type = 'single' -- Only join for single steps
378
- ),
379
- deps_outputs as (
380
- select
381
- d.run_id,
382
- d.step_slug,
383
- jsonb_object_agg(d.dep_slug, d.dep_output) as deps_output,
384
- count(*) as dep_count
385
- from deps d
386
- group by d.run_id, d.step_slug
387
- ),
388
- timeouts as (
389
- select
390
- task.message_id,
391
- task.flow_slug,
392
- coalesce(step.opt_timeout, flow.opt_timeout) + 2 as vt_delay
393
- from tasks task
394
- join pgflow.flows flow on flow.flow_slug = task.flow_slug
395
- join pgflow.steps step on step.flow_slug = task.flow_slug and step.step_slug = task.step_slug
396
- ),
397
- -- Batch update visibility timeouts for all messages
398
- set_vt_batch as (
399
- select pgflow.set_vt_batch(
400
- start_tasks.flow_slug,
401
- array_agg(t.message_id order by t.message_id),
402
- array_agg(t.vt_delay order by t.message_id)
403
- )
404
- from timeouts t
405
- )
406
- select
407
- st.flow_slug,
408
- st.run_id,
409
- st.step_slug,
410
- -- ==========================================
411
- -- INPUT CONSTRUCTION LOGIC
412
- -- ==========================================
413
- -- This nested CASE statement determines how to construct the input
414
- -- for each task based on the step type (map vs non-map).
415
- --
416
- -- The fundamental difference:
417
- -- - Map steps: Receive RAW array elements (e.g., just 42 or "hello")
418
- -- - Non-map steps: Receive structured objects with named keys
419
- -- (e.g., {"run": {...}, "dependency1": {...}})
420
- -- ==========================================
421
- CASE
422
- -- -------------------- MAP STEPS --------------------
423
- -- Map steps process arrays element-by-element.
424
- -- Each task receives ONE element from the array at its task_index position.
425
- WHEN step.step_type = 'map' THEN
426
- -- Map steps get raw array elements without any wrapper object
427
- CASE
428
- -- ROOT MAP: Gets array from run input
429
- -- Example: run input = [1, 2, 3]
430
- -- task 0 gets: 1
431
- -- task 1 gets: 2
432
- -- task 2 gets: 3
433
- WHEN step.deps_count = 0 THEN
434
- -- Root map (deps_count = 0): no dependencies, reads from run input.
435
- -- Extract the element at task_index from the run's input array.
436
- -- Note: If run input is not an array, this will return NULL
437
- -- and the flow will fail (validated in start_flow).
438
- jsonb_array_element(r.input, st.task_index)
439
-
440
- -- DEPENDENT MAP: Gets array from its single dependency
441
- -- Example: dependency output = ["a", "b", "c"]
442
- -- task 0 gets: "a"
443
- -- task 1 gets: "b"
444
- -- task 2 gets: "c"
445
- ELSE
446
- -- Has dependencies (should be exactly 1 for map steps).
447
- -- Extract the element at task_index from the dependency's output array.
448
- --
449
- -- Why the subquery with jsonb_each?
450
- -- - The dependency outputs a raw array: [1, 2, 3]
451
- -- - deps_outputs aggregates it into: {"dep_name": [1, 2, 3]}
452
- -- - We need to unwrap and get just the array value
453
- -- - Map steps have exactly 1 dependency (enforced by add_step)
454
- -- - So jsonb_each will return exactly 1 row
455
- -- - We extract the 'value' which is the raw array [1, 2, 3]
456
- -- - Then get the element at task_index from that array
457
- (SELECT jsonb_array_element(value, st.task_index)
458
- FROM jsonb_each(dep_out.deps_output)
459
- LIMIT 1)
460
- END
461
-
462
- -- -------------------- NON-MAP STEPS --------------------
463
- -- Regular (non-map) steps receive ALL inputs as a structured object.
464
- -- This includes the original run input plus all dependency outputs.
465
- ELSE
466
- -- Non-map steps get structured input with named keys
467
- -- Example output: {
468
- -- "run": {"original": "input"},
469
- -- "step1": {"output": "from_step1"},
470
- -- "step2": {"output": "from_step2"}
471
- -- }
472
- --
473
- -- Build object with 'run' key containing original input
474
- jsonb_build_object('run', r.input) ||
475
- -- Merge with deps_output which already has dependency outputs
476
- -- deps_output format: {"dep1": output1, "dep2": output2, ...}
477
- -- If no dependencies, defaults to empty object
478
- coalesce(dep_out.deps_output, '{}'::jsonb)
479
- END as input,
480
- st.message_id as msg_id
481
- from tasks st
482
- join runs r on st.run_id = r.run_id
483
- join pgflow.steps step on
484
- step.flow_slug = st.flow_slug and
485
- step.step_slug = st.step_slug
486
- left join deps_outputs dep_out on
487
- dep_out.run_id = st.run_id and
488
- dep_out.step_slug = st.step_slug
489
- $$;