@pgflow/core 0.0.0-array-map-steps-302d00a8-20250925065142 → 0.0.0-array-map-steps-b956f8f9-20251006084236
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +148 -72
- package/dist/CHANGELOG.md +17 -15
- package/dist/README.md +148 -72
- package/dist/package.json +1 -1
- package/dist/supabase/migrations/{20250919101802_pgflow_temp_orphaned_messages_index.sql → 20251006073122_pgflow_add_map_step_type.sql} +533 -1
- package/dist/types.d.ts +1 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +2 -2
- package/dist/PLAN_race_condition_testing.md +0 -176
- package/dist/supabase/migrations/20250912075001_pgflow_temp_pr1_schema.sql +0 -185
- package/dist/supabase/migrations/20250912080800_pgflow_temp_pr2_root_maps.sql +0 -95
- package/dist/supabase/migrations/20250912125339_pgflow_TEMP_task_spawning_optimization.sql +0 -146
- package/dist/supabase/migrations/20250916093518_pgflow_temp_add_cascade_complete.sql +0 -321
- package/dist/supabase/migrations/20250916142327_pgflow_temp_make_initial_tasks_nullable.sql +0 -624
- package/dist/supabase/migrations/20250916203905_pgflow_temp_handle_arrays_in_start_tasks.sql +0 -157
- package/dist/supabase/migrations/20250918042753_pgflow_temp_handle_map_output_aggregation.sql +0 -489
- package/dist/supabase/migrations/20250919135211_pgflow_temp_return_task_index_in_start_tasks.sql +0 -178
package/dist/supabase/migrations/20250916203905_pgflow_temp_handle_arrays_in_start_tasks.sql
DELETED
|
@@ -1,157 +0,0 @@
|
|
|
1
|
-
-- Modify "start_tasks" function
|
|
2
|
-
CREATE OR REPLACE FUNCTION "pgflow"."start_tasks" ("flow_slug" text, "msg_ids" bigint[], "worker_id" uuid) RETURNS SETOF "pgflow"."step_task_record" LANGUAGE sql SET "search_path" = '' AS $$
|
|
3
|
-
with tasks as (
|
|
4
|
-
select
|
|
5
|
-
task.flow_slug,
|
|
6
|
-
task.run_id,
|
|
7
|
-
task.step_slug,
|
|
8
|
-
task.task_index,
|
|
9
|
-
task.message_id
|
|
10
|
-
from pgflow.step_tasks as task
|
|
11
|
-
where task.flow_slug = start_tasks.flow_slug
|
|
12
|
-
and task.message_id = any(msg_ids)
|
|
13
|
-
and task.status = 'queued'
|
|
14
|
-
),
|
|
15
|
-
start_tasks_update as (
|
|
16
|
-
update pgflow.step_tasks
|
|
17
|
-
set
|
|
18
|
-
attempts_count = attempts_count + 1,
|
|
19
|
-
status = 'started',
|
|
20
|
-
started_at = now(),
|
|
21
|
-
last_worker_id = worker_id
|
|
22
|
-
from tasks
|
|
23
|
-
where step_tasks.message_id = tasks.message_id
|
|
24
|
-
and step_tasks.flow_slug = tasks.flow_slug
|
|
25
|
-
and step_tasks.status = 'queued'
|
|
26
|
-
),
|
|
27
|
-
runs as (
|
|
28
|
-
select
|
|
29
|
-
r.run_id,
|
|
30
|
-
r.input
|
|
31
|
-
from pgflow.runs r
|
|
32
|
-
where r.run_id in (select run_id from tasks)
|
|
33
|
-
),
|
|
34
|
-
deps as (
|
|
35
|
-
select
|
|
36
|
-
st.run_id,
|
|
37
|
-
st.step_slug,
|
|
38
|
-
dep.dep_slug,
|
|
39
|
-
dep_task.output as dep_output
|
|
40
|
-
from tasks st
|
|
41
|
-
join pgflow.deps dep on dep.flow_slug = st.flow_slug and dep.step_slug = st.step_slug
|
|
42
|
-
join pgflow.step_tasks dep_task on
|
|
43
|
-
dep_task.run_id = st.run_id and
|
|
44
|
-
dep_task.step_slug = dep.dep_slug and
|
|
45
|
-
dep_task.status = 'completed'
|
|
46
|
-
),
|
|
47
|
-
deps_outputs as (
|
|
48
|
-
select
|
|
49
|
-
d.run_id,
|
|
50
|
-
d.step_slug,
|
|
51
|
-
jsonb_object_agg(d.dep_slug, d.dep_output) as deps_output,
|
|
52
|
-
count(*) as dep_count
|
|
53
|
-
from deps d
|
|
54
|
-
group by d.run_id, d.step_slug
|
|
55
|
-
),
|
|
56
|
-
timeouts as (
|
|
57
|
-
select
|
|
58
|
-
task.message_id,
|
|
59
|
-
task.flow_slug,
|
|
60
|
-
coalesce(step.opt_timeout, flow.opt_timeout) + 2 as vt_delay
|
|
61
|
-
from tasks task
|
|
62
|
-
join pgflow.flows flow on flow.flow_slug = task.flow_slug
|
|
63
|
-
join pgflow.steps step on step.flow_slug = task.flow_slug and step.step_slug = task.step_slug
|
|
64
|
-
),
|
|
65
|
-
-- Batch update visibility timeouts for all messages
|
|
66
|
-
set_vt_batch as (
|
|
67
|
-
select pgflow.set_vt_batch(
|
|
68
|
-
start_tasks.flow_slug,
|
|
69
|
-
array_agg(t.message_id order by t.message_id),
|
|
70
|
-
array_agg(t.vt_delay order by t.message_id)
|
|
71
|
-
)
|
|
72
|
-
from timeouts t
|
|
73
|
-
)
|
|
74
|
-
select
|
|
75
|
-
st.flow_slug,
|
|
76
|
-
st.run_id,
|
|
77
|
-
st.step_slug,
|
|
78
|
-
-- ==========================================
|
|
79
|
-
-- INPUT CONSTRUCTION LOGIC
|
|
80
|
-
-- ==========================================
|
|
81
|
-
-- This nested CASE statement determines how to construct the input
|
|
82
|
-
-- for each task based on the step type (map vs non-map).
|
|
83
|
-
--
|
|
84
|
-
-- The fundamental difference:
|
|
85
|
-
-- - Map steps: Receive RAW array elements (e.g., just 42 or "hello")
|
|
86
|
-
-- - Non-map steps: Receive structured objects with named keys
|
|
87
|
-
-- (e.g., {"run": {...}, "dependency1": {...}})
|
|
88
|
-
-- ==========================================
|
|
89
|
-
CASE
|
|
90
|
-
-- -------------------- MAP STEPS --------------------
|
|
91
|
-
-- Map steps process arrays element-by-element.
|
|
92
|
-
-- Each task receives ONE element from the array at its task_index position.
|
|
93
|
-
WHEN step.step_type = 'map' THEN
|
|
94
|
-
-- Map steps get raw array elements without any wrapper object
|
|
95
|
-
CASE
|
|
96
|
-
-- ROOT MAP: Gets array from run input
|
|
97
|
-
-- Example: run input = [1, 2, 3]
|
|
98
|
-
-- task 0 gets: 1
|
|
99
|
-
-- task 1 gets: 2
|
|
100
|
-
-- task 2 gets: 3
|
|
101
|
-
WHEN step.deps_count = 0 THEN
|
|
102
|
-
-- Root map (deps_count = 0): no dependencies, reads from run input.
|
|
103
|
-
-- Extract the element at task_index from the run's input array.
|
|
104
|
-
-- Note: If run input is not an array, this will return NULL
|
|
105
|
-
-- and the flow will fail (validated in start_flow).
|
|
106
|
-
jsonb_array_element(r.input, st.task_index)
|
|
107
|
-
|
|
108
|
-
-- DEPENDENT MAP: Gets array from its single dependency
|
|
109
|
-
-- Example: dependency output = ["a", "b", "c"]
|
|
110
|
-
-- task 0 gets: "a"
|
|
111
|
-
-- task 1 gets: "b"
|
|
112
|
-
-- task 2 gets: "c"
|
|
113
|
-
ELSE
|
|
114
|
-
-- Has dependencies (should be exactly 1 for map steps).
|
|
115
|
-
-- Extract the element at task_index from the dependency's output array.
|
|
116
|
-
--
|
|
117
|
-
-- Why the subquery with jsonb_each?
|
|
118
|
-
-- - The dependency outputs a raw array: [1, 2, 3]
|
|
119
|
-
-- - deps_outputs aggregates it into: {"dep_name": [1, 2, 3]}
|
|
120
|
-
-- - We need to unwrap and get just the array value
|
|
121
|
-
-- - Map steps have exactly 1 dependency (enforced by add_step)
|
|
122
|
-
-- - So jsonb_each will return exactly 1 row
|
|
123
|
-
-- - We extract the 'value' which is the raw array [1, 2, 3]
|
|
124
|
-
-- - Then get the element at task_index from that array
|
|
125
|
-
(SELECT jsonb_array_element(value, st.task_index)
|
|
126
|
-
FROM jsonb_each(dep_out.deps_output)
|
|
127
|
-
LIMIT 1)
|
|
128
|
-
END
|
|
129
|
-
|
|
130
|
-
-- -------------------- NON-MAP STEPS --------------------
|
|
131
|
-
-- Regular (non-map) steps receive ALL inputs as a structured object.
|
|
132
|
-
-- This includes the original run input plus all dependency outputs.
|
|
133
|
-
ELSE
|
|
134
|
-
-- Non-map steps get structured input with named keys
|
|
135
|
-
-- Example output: {
|
|
136
|
-
-- "run": {"original": "input"},
|
|
137
|
-
-- "step1": {"output": "from_step1"},
|
|
138
|
-
-- "step2": {"output": "from_step2"}
|
|
139
|
-
-- }
|
|
140
|
-
--
|
|
141
|
-
-- Build object with 'run' key containing original input
|
|
142
|
-
jsonb_build_object('run', r.input) ||
|
|
143
|
-
-- Merge with deps_output which already has dependency outputs
|
|
144
|
-
-- deps_output format: {"dep1": output1, "dep2": output2, ...}
|
|
145
|
-
-- If no dependencies, defaults to empty object
|
|
146
|
-
coalesce(dep_out.deps_output, '{}'::jsonb)
|
|
147
|
-
END as input,
|
|
148
|
-
st.message_id as msg_id
|
|
149
|
-
from tasks st
|
|
150
|
-
join runs r on st.run_id = r.run_id
|
|
151
|
-
join pgflow.steps step on
|
|
152
|
-
step.flow_slug = st.flow_slug and
|
|
153
|
-
step.step_slug = st.step_slug
|
|
154
|
-
left join deps_outputs dep_out on
|
|
155
|
-
dep_out.run_id = st.run_id and
|
|
156
|
-
dep_out.step_slug = st.step_slug
|
|
157
|
-
$$;
|
package/dist/supabase/migrations/20250918042753_pgflow_temp_handle_map_output_aggregation.sql
DELETED
|
@@ -1,489 +0,0 @@
|
|
|
1
|
-
-- Modify "maybe_complete_run" function
|
|
2
|
-
CREATE OR REPLACE FUNCTION "pgflow"."maybe_complete_run" ("run_id" uuid) RETURNS void LANGUAGE plpgsql SET "search_path" = '' AS $$
|
|
3
|
-
declare
|
|
4
|
-
v_completed_run pgflow.runs%ROWTYPE;
|
|
5
|
-
begin
|
|
6
|
-
-- ==========================================
|
|
7
|
-
-- CHECK AND COMPLETE RUN IF FINISHED
|
|
8
|
-
-- ==========================================
|
|
9
|
-
-- ---------- Complete run if all steps done ----------
|
|
10
|
-
UPDATE pgflow.runs
|
|
11
|
-
SET
|
|
12
|
-
status = 'completed',
|
|
13
|
-
completed_at = now(),
|
|
14
|
-
-- Only compute expensive aggregation when actually completing the run
|
|
15
|
-
output = (
|
|
16
|
-
-- ---------- Gather outputs from leaf steps ----------
|
|
17
|
-
-- Leaf steps = steps with no dependents
|
|
18
|
-
-- For map steps: aggregate all task outputs into array
|
|
19
|
-
-- For single steps: use the single task output
|
|
20
|
-
SELECT jsonb_object_agg(
|
|
21
|
-
step_slug,
|
|
22
|
-
CASE
|
|
23
|
-
WHEN step_type = 'map' THEN aggregated_output
|
|
24
|
-
ELSE single_output
|
|
25
|
-
END
|
|
26
|
-
)
|
|
27
|
-
FROM (
|
|
28
|
-
SELECT DISTINCT
|
|
29
|
-
leaf_state.step_slug,
|
|
30
|
-
leaf_step.step_type,
|
|
31
|
-
-- For map steps: aggregate all task outputs
|
|
32
|
-
CASE WHEN leaf_step.step_type = 'map' THEN
|
|
33
|
-
(SELECT COALESCE(jsonb_agg(leaf_task.output ORDER BY leaf_task.task_index), '[]'::jsonb)
|
|
34
|
-
FROM pgflow.step_tasks leaf_task
|
|
35
|
-
WHERE leaf_task.run_id = leaf_state.run_id
|
|
36
|
-
AND leaf_task.step_slug = leaf_state.step_slug
|
|
37
|
-
AND leaf_task.status = 'completed')
|
|
38
|
-
END as aggregated_output,
|
|
39
|
-
-- For single steps: get the single output
|
|
40
|
-
CASE WHEN leaf_step.step_type = 'single' THEN
|
|
41
|
-
(SELECT leaf_task.output
|
|
42
|
-
FROM pgflow.step_tasks leaf_task
|
|
43
|
-
WHERE leaf_task.run_id = leaf_state.run_id
|
|
44
|
-
AND leaf_task.step_slug = leaf_state.step_slug
|
|
45
|
-
AND leaf_task.status = 'completed'
|
|
46
|
-
LIMIT 1)
|
|
47
|
-
END as single_output
|
|
48
|
-
FROM pgflow.step_states leaf_state
|
|
49
|
-
JOIN pgflow.steps leaf_step ON leaf_step.flow_slug = leaf_state.flow_slug AND leaf_step.step_slug = leaf_state.step_slug
|
|
50
|
-
WHERE leaf_state.run_id = maybe_complete_run.run_id
|
|
51
|
-
AND leaf_state.status = 'completed'
|
|
52
|
-
AND NOT EXISTS (
|
|
53
|
-
SELECT 1
|
|
54
|
-
FROM pgflow.deps dep
|
|
55
|
-
WHERE dep.flow_slug = leaf_state.flow_slug
|
|
56
|
-
AND dep.dep_slug = leaf_state.step_slug
|
|
57
|
-
)
|
|
58
|
-
) leaf_outputs
|
|
59
|
-
)
|
|
60
|
-
WHERE pgflow.runs.run_id = maybe_complete_run.run_id
|
|
61
|
-
AND pgflow.runs.remaining_steps = 0
|
|
62
|
-
AND pgflow.runs.status != 'completed'
|
|
63
|
-
RETURNING * INTO v_completed_run;
|
|
64
|
-
|
|
65
|
-
-- ==========================================
|
|
66
|
-
-- BROADCAST COMPLETION EVENT
|
|
67
|
-
-- ==========================================
|
|
68
|
-
IF v_completed_run.run_id IS NOT NULL THEN
|
|
69
|
-
PERFORM realtime.send(
|
|
70
|
-
jsonb_build_object(
|
|
71
|
-
'event_type', 'run:completed',
|
|
72
|
-
'run_id', v_completed_run.run_id,
|
|
73
|
-
'flow_slug', v_completed_run.flow_slug,
|
|
74
|
-
'status', 'completed',
|
|
75
|
-
'output', v_completed_run.output,
|
|
76
|
-
'completed_at', v_completed_run.completed_at
|
|
77
|
-
),
|
|
78
|
-
'run:completed',
|
|
79
|
-
concat('pgflow:run:', v_completed_run.run_id),
|
|
80
|
-
false
|
|
81
|
-
);
|
|
82
|
-
END IF;
|
|
83
|
-
end;
|
|
84
|
-
$$;
|
|
85
|
-
-- Modify "complete_task" function
|
|
86
|
-
CREATE OR REPLACE FUNCTION "pgflow"."complete_task" ("run_id" uuid, "step_slug" text, "task_index" integer, "output" jsonb) RETURNS SETOF "pgflow"."step_tasks" LANGUAGE plpgsql SET "search_path" = '' AS $$
|
|
87
|
-
declare
|
|
88
|
-
v_step_state pgflow.step_states%ROWTYPE;
|
|
89
|
-
v_dependent_map_slug text;
|
|
90
|
-
begin
|
|
91
|
-
|
|
92
|
-
-- ==========================================
|
|
93
|
-
-- VALIDATION: Array output for dependent maps
|
|
94
|
-
-- ==========================================
|
|
95
|
-
-- Must happen BEFORE acquiring locks to fail fast without holding resources
|
|
96
|
-
-- Only validate for single steps - map steps produce scalars that get aggregated
|
|
97
|
-
SELECT child_step.step_slug INTO v_dependent_map_slug
|
|
98
|
-
FROM pgflow.deps dependency
|
|
99
|
-
JOIN pgflow.steps child_step ON child_step.flow_slug = dependency.flow_slug
|
|
100
|
-
AND child_step.step_slug = dependency.step_slug
|
|
101
|
-
JOIN pgflow.steps parent_step ON parent_step.flow_slug = dependency.flow_slug
|
|
102
|
-
AND parent_step.step_slug = dependency.dep_slug
|
|
103
|
-
JOIN pgflow.step_states child_state ON child_state.flow_slug = child_step.flow_slug
|
|
104
|
-
AND child_state.step_slug = child_step.step_slug
|
|
105
|
-
WHERE dependency.dep_slug = complete_task.step_slug -- parent is the completing step
|
|
106
|
-
AND dependency.flow_slug = (SELECT r.flow_slug FROM pgflow.runs r WHERE r.run_id = complete_task.run_id)
|
|
107
|
-
AND parent_step.step_type = 'single' -- Only validate single steps
|
|
108
|
-
AND child_step.step_type = 'map'
|
|
109
|
-
AND child_state.run_id = complete_task.run_id
|
|
110
|
-
AND child_state.initial_tasks IS NULL
|
|
111
|
-
AND (complete_task.output IS NULL OR jsonb_typeof(complete_task.output) != 'array')
|
|
112
|
-
LIMIT 1;
|
|
113
|
-
|
|
114
|
-
IF v_dependent_map_slug IS NOT NULL THEN
|
|
115
|
-
RAISE EXCEPTION 'Map step % expects array input but dependency % produced % (output: %)',
|
|
116
|
-
v_dependent_map_slug,
|
|
117
|
-
complete_task.step_slug,
|
|
118
|
-
CASE WHEN complete_task.output IS NULL THEN 'null' ELSE jsonb_typeof(complete_task.output) END,
|
|
119
|
-
complete_task.output;
|
|
120
|
-
END IF;
|
|
121
|
-
|
|
122
|
-
-- ==========================================
|
|
123
|
-
-- MAIN CTE CHAIN: Update task and propagate changes
|
|
124
|
-
-- ==========================================
|
|
125
|
-
WITH
|
|
126
|
-
-- ---------- Lock acquisition ----------
|
|
127
|
-
-- Acquire locks in consistent order (run -> step) to prevent deadlocks
|
|
128
|
-
run_lock AS (
|
|
129
|
-
SELECT * FROM pgflow.runs
|
|
130
|
-
WHERE pgflow.runs.run_id = complete_task.run_id
|
|
131
|
-
FOR UPDATE
|
|
132
|
-
),
|
|
133
|
-
step_lock AS (
|
|
134
|
-
SELECT * FROM pgflow.step_states
|
|
135
|
-
WHERE pgflow.step_states.run_id = complete_task.run_id
|
|
136
|
-
AND pgflow.step_states.step_slug = complete_task.step_slug
|
|
137
|
-
FOR UPDATE
|
|
138
|
-
),
|
|
139
|
-
-- ---------- Task completion ----------
|
|
140
|
-
-- Update the task record with completion status and output
|
|
141
|
-
task AS (
|
|
142
|
-
UPDATE pgflow.step_tasks
|
|
143
|
-
SET
|
|
144
|
-
status = 'completed',
|
|
145
|
-
completed_at = now(),
|
|
146
|
-
output = complete_task.output
|
|
147
|
-
WHERE pgflow.step_tasks.run_id = complete_task.run_id
|
|
148
|
-
AND pgflow.step_tasks.step_slug = complete_task.step_slug
|
|
149
|
-
AND pgflow.step_tasks.task_index = complete_task.task_index
|
|
150
|
-
AND pgflow.step_tasks.status = 'started'
|
|
151
|
-
RETURNING *
|
|
152
|
-
),
|
|
153
|
-
-- ---------- Step state update ----------
|
|
154
|
-
-- Decrement remaining_tasks and potentially mark step as completed
|
|
155
|
-
step_state AS (
|
|
156
|
-
UPDATE pgflow.step_states
|
|
157
|
-
SET
|
|
158
|
-
status = CASE
|
|
159
|
-
WHEN pgflow.step_states.remaining_tasks = 1 THEN 'completed' -- Will be 0 after decrement
|
|
160
|
-
ELSE 'started'
|
|
161
|
-
END,
|
|
162
|
-
completed_at = CASE
|
|
163
|
-
WHEN pgflow.step_states.remaining_tasks = 1 THEN now() -- Will be 0 after decrement
|
|
164
|
-
ELSE NULL
|
|
165
|
-
END,
|
|
166
|
-
remaining_tasks = pgflow.step_states.remaining_tasks - 1
|
|
167
|
-
FROM task
|
|
168
|
-
WHERE pgflow.step_states.run_id = complete_task.run_id
|
|
169
|
-
AND pgflow.step_states.step_slug = complete_task.step_slug
|
|
170
|
-
RETURNING pgflow.step_states.*
|
|
171
|
-
),
|
|
172
|
-
-- ---------- Dependency resolution ----------
|
|
173
|
-
-- Find all child steps that depend on the completed parent step (only if parent completed)
|
|
174
|
-
child_steps AS (
|
|
175
|
-
SELECT deps.step_slug AS child_step_slug
|
|
176
|
-
FROM pgflow.deps deps
|
|
177
|
-
JOIN step_state parent_state ON parent_state.status = 'completed' AND deps.flow_slug = parent_state.flow_slug
|
|
178
|
-
WHERE deps.dep_slug = complete_task.step_slug -- dep_slug is the parent, step_slug is the child
|
|
179
|
-
ORDER BY deps.step_slug -- Ensure consistent ordering
|
|
180
|
-
),
|
|
181
|
-
-- ---------- Lock child steps ----------
|
|
182
|
-
-- Acquire locks on all child steps before updating them
|
|
183
|
-
child_steps_lock AS (
|
|
184
|
-
SELECT * FROM pgflow.step_states
|
|
185
|
-
WHERE pgflow.step_states.run_id = complete_task.run_id
|
|
186
|
-
AND pgflow.step_states.step_slug IN (SELECT child_step_slug FROM child_steps)
|
|
187
|
-
FOR UPDATE
|
|
188
|
-
),
|
|
189
|
-
-- ---------- Update child steps ----------
|
|
190
|
-
-- Decrement remaining_deps and resolve NULL initial_tasks for map steps
|
|
191
|
-
child_steps_update AS (
|
|
192
|
-
UPDATE pgflow.step_states child_state
|
|
193
|
-
SET remaining_deps = child_state.remaining_deps - 1,
|
|
194
|
-
-- Resolve NULL initial_tasks for child map steps
|
|
195
|
-
-- This is where child maps learn their array size from the parent
|
|
196
|
-
-- This CTE only runs when the parent step is complete (see child_steps JOIN)
|
|
197
|
-
initial_tasks = CASE
|
|
198
|
-
WHEN child_step.step_type = 'map' AND child_state.initial_tasks IS NULL THEN
|
|
199
|
-
CASE
|
|
200
|
-
WHEN parent_step.step_type = 'map' THEN
|
|
201
|
-
-- Map->map: Count all completed tasks from parent map
|
|
202
|
-
-- We add 1 because the current task is being completed in this transaction
|
|
203
|
-
-- but isn't yet visible as 'completed' in the step_tasks table
|
|
204
|
-
-- TODO: Refactor to use future column step_states.total_tasks
|
|
205
|
-
-- Would eliminate the COUNT query and just use parent_state.total_tasks
|
|
206
|
-
(SELECT COUNT(*)::int + 1
|
|
207
|
-
FROM pgflow.step_tasks parent_tasks
|
|
208
|
-
WHERE parent_tasks.run_id = complete_task.run_id
|
|
209
|
-
AND parent_tasks.step_slug = complete_task.step_slug
|
|
210
|
-
AND parent_tasks.status = 'completed'
|
|
211
|
-
AND parent_tasks.task_index != complete_task.task_index)
|
|
212
|
-
ELSE
|
|
213
|
-
-- Single->map: Use output array length (single steps complete immediately)
|
|
214
|
-
CASE
|
|
215
|
-
WHEN complete_task.output IS NOT NULL
|
|
216
|
-
AND jsonb_typeof(complete_task.output) = 'array' THEN
|
|
217
|
-
jsonb_array_length(complete_task.output)
|
|
218
|
-
ELSE NULL -- Keep NULL if not an array
|
|
219
|
-
END
|
|
220
|
-
END
|
|
221
|
-
ELSE child_state.initial_tasks -- Keep existing value (including NULL)
|
|
222
|
-
END
|
|
223
|
-
FROM child_steps children
|
|
224
|
-
JOIN pgflow.steps child_step ON child_step.flow_slug = (SELECT r.flow_slug FROM pgflow.runs r WHERE r.run_id = complete_task.run_id)
|
|
225
|
-
AND child_step.step_slug = children.child_step_slug
|
|
226
|
-
JOIN pgflow.steps parent_step ON parent_step.flow_slug = (SELECT r.flow_slug FROM pgflow.runs r WHERE r.run_id = complete_task.run_id)
|
|
227
|
-
AND parent_step.step_slug = complete_task.step_slug
|
|
228
|
-
WHERE child_state.run_id = complete_task.run_id
|
|
229
|
-
AND child_state.step_slug = children.child_step_slug
|
|
230
|
-
)
|
|
231
|
-
-- ---------- Update run remaining_steps ----------
|
|
232
|
-
-- Decrement the run's remaining_steps counter if step completed
|
|
233
|
-
UPDATE pgflow.runs
|
|
234
|
-
SET remaining_steps = pgflow.runs.remaining_steps - 1
|
|
235
|
-
FROM step_state
|
|
236
|
-
WHERE pgflow.runs.run_id = complete_task.run_id
|
|
237
|
-
AND step_state.status = 'completed';
|
|
238
|
-
|
|
239
|
-
-- ==========================================
|
|
240
|
-
-- POST-COMPLETION ACTIONS
|
|
241
|
-
-- ==========================================
|
|
242
|
-
|
|
243
|
-
-- ---------- Get updated state for broadcasting ----------
|
|
244
|
-
SELECT * INTO v_step_state FROM pgflow.step_states
|
|
245
|
-
WHERE pgflow.step_states.run_id = complete_task.run_id AND pgflow.step_states.step_slug = complete_task.step_slug;
|
|
246
|
-
|
|
247
|
-
-- ---------- Handle step completion ----------
|
|
248
|
-
IF v_step_state.status = 'completed' THEN
|
|
249
|
-
-- Cascade complete any taskless steps that are now ready
|
|
250
|
-
PERFORM pgflow.cascade_complete_taskless_steps(complete_task.run_id);
|
|
251
|
-
|
|
252
|
-
-- Broadcast step:completed event
|
|
253
|
-
-- For map steps, aggregate all task outputs; for single steps, use the task output
|
|
254
|
-
PERFORM realtime.send(
|
|
255
|
-
jsonb_build_object(
|
|
256
|
-
'event_type', 'step:completed',
|
|
257
|
-
'run_id', complete_task.run_id,
|
|
258
|
-
'step_slug', complete_task.step_slug,
|
|
259
|
-
'status', 'completed',
|
|
260
|
-
'output', CASE
|
|
261
|
-
WHEN (SELECT s.step_type FROM pgflow.steps s
|
|
262
|
-
WHERE s.flow_slug = v_step_state.flow_slug
|
|
263
|
-
AND s.step_slug = complete_task.step_slug) = 'map' THEN
|
|
264
|
-
-- Aggregate all task outputs for map steps
|
|
265
|
-
(SELECT COALESCE(jsonb_agg(st.output ORDER BY st.task_index), '[]'::jsonb)
|
|
266
|
-
FROM pgflow.step_tasks st
|
|
267
|
-
WHERE st.run_id = complete_task.run_id
|
|
268
|
-
AND st.step_slug = complete_task.step_slug
|
|
269
|
-
AND st.status = 'completed')
|
|
270
|
-
ELSE
|
|
271
|
-
-- Single step: use the individual task output
|
|
272
|
-
complete_task.output
|
|
273
|
-
END,
|
|
274
|
-
'completed_at', v_step_state.completed_at
|
|
275
|
-
),
|
|
276
|
-
concat('step:', complete_task.step_slug, ':completed'),
|
|
277
|
-
concat('pgflow:run:', complete_task.run_id),
|
|
278
|
-
false
|
|
279
|
-
);
|
|
280
|
-
END IF;
|
|
281
|
-
|
|
282
|
-
-- ---------- Archive completed task message ----------
|
|
283
|
-
-- Move message from active queue to archive table
|
|
284
|
-
PERFORM (
|
|
285
|
-
WITH completed_tasks AS (
|
|
286
|
-
SELECT r.flow_slug, st.message_id
|
|
287
|
-
FROM pgflow.step_tasks st
|
|
288
|
-
JOIN pgflow.runs r ON st.run_id = r.run_id
|
|
289
|
-
WHERE st.run_id = complete_task.run_id
|
|
290
|
-
AND st.step_slug = complete_task.step_slug
|
|
291
|
-
AND st.task_index = complete_task.task_index
|
|
292
|
-
AND st.status = 'completed'
|
|
293
|
-
)
|
|
294
|
-
SELECT pgmq.archive(ct.flow_slug, ct.message_id)
|
|
295
|
-
FROM completed_tasks ct
|
|
296
|
-
WHERE EXISTS (SELECT 1 FROM completed_tasks)
|
|
297
|
-
);
|
|
298
|
-
|
|
299
|
-
-- ---------- Trigger next steps ----------
|
|
300
|
-
-- Start any steps that are now ready (deps satisfied)
|
|
301
|
-
PERFORM pgflow.start_ready_steps(complete_task.run_id);
|
|
302
|
-
|
|
303
|
-
-- Check if the entire run is complete
|
|
304
|
-
PERFORM pgflow.maybe_complete_run(complete_task.run_id);
|
|
305
|
-
|
|
306
|
-
-- ---------- Return completed task ----------
|
|
307
|
-
RETURN QUERY SELECT *
|
|
308
|
-
FROM pgflow.step_tasks AS step_task
|
|
309
|
-
WHERE step_task.run_id = complete_task.run_id
|
|
310
|
-
AND step_task.step_slug = complete_task.step_slug
|
|
311
|
-
AND step_task.task_index = complete_task.task_index;
|
|
312
|
-
|
|
313
|
-
end;
|
|
314
|
-
$$;
|
|
315
|
-
-- Modify "start_tasks" function
|
|
316
|
-
CREATE OR REPLACE FUNCTION "pgflow"."start_tasks" ("flow_slug" text, "msg_ids" bigint[], "worker_id" uuid) RETURNS SETOF "pgflow"."step_task_record" LANGUAGE sql SET "search_path" = '' AS $$
|
|
317
|
-
with tasks as (
|
|
318
|
-
select
|
|
319
|
-
task.flow_slug,
|
|
320
|
-
task.run_id,
|
|
321
|
-
task.step_slug,
|
|
322
|
-
task.task_index,
|
|
323
|
-
task.message_id
|
|
324
|
-
from pgflow.step_tasks as task
|
|
325
|
-
join pgflow.runs r on r.run_id = task.run_id
|
|
326
|
-
where task.flow_slug = start_tasks.flow_slug
|
|
327
|
-
and task.message_id = any(msg_ids)
|
|
328
|
-
and task.status = 'queued'
|
|
329
|
-
-- MVP: Don't start tasks on failed runs
|
|
330
|
-
and r.status != 'failed'
|
|
331
|
-
),
|
|
332
|
-
start_tasks_update as (
|
|
333
|
-
update pgflow.step_tasks
|
|
334
|
-
set
|
|
335
|
-
attempts_count = attempts_count + 1,
|
|
336
|
-
status = 'started',
|
|
337
|
-
started_at = now(),
|
|
338
|
-
last_worker_id = worker_id
|
|
339
|
-
from tasks
|
|
340
|
-
where step_tasks.message_id = tasks.message_id
|
|
341
|
-
and step_tasks.flow_slug = tasks.flow_slug
|
|
342
|
-
and step_tasks.status = 'queued'
|
|
343
|
-
),
|
|
344
|
-
runs as (
|
|
345
|
-
select
|
|
346
|
-
r.run_id,
|
|
347
|
-
r.input
|
|
348
|
-
from pgflow.runs r
|
|
349
|
-
where r.run_id in (select run_id from tasks)
|
|
350
|
-
),
|
|
351
|
-
deps as (
|
|
352
|
-
select
|
|
353
|
-
st.run_id,
|
|
354
|
-
st.step_slug,
|
|
355
|
-
dep.dep_slug,
|
|
356
|
-
-- Aggregate map outputs or use single output
|
|
357
|
-
CASE
|
|
358
|
-
WHEN dep_step.step_type = 'map' THEN
|
|
359
|
-
-- Aggregate all task outputs ordered by task_index
|
|
360
|
-
-- Use COALESCE to return empty array if no tasks
|
|
361
|
-
(SELECT COALESCE(jsonb_agg(dt.output ORDER BY dt.task_index), '[]'::jsonb)
|
|
362
|
-
FROM pgflow.step_tasks dt
|
|
363
|
-
WHERE dt.run_id = st.run_id
|
|
364
|
-
AND dt.step_slug = dep.dep_slug
|
|
365
|
-
AND dt.status = 'completed')
|
|
366
|
-
ELSE
|
|
367
|
-
-- Single step: use the single task output
|
|
368
|
-
dep_task.output
|
|
369
|
-
END as dep_output
|
|
370
|
-
from tasks st
|
|
371
|
-
join pgflow.deps dep on dep.flow_slug = st.flow_slug and dep.step_slug = st.step_slug
|
|
372
|
-
join pgflow.steps dep_step on dep_step.flow_slug = dep.flow_slug and dep_step.step_slug = dep.dep_slug
|
|
373
|
-
left join pgflow.step_tasks dep_task on
|
|
374
|
-
dep_task.run_id = st.run_id and
|
|
375
|
-
dep_task.step_slug = dep.dep_slug and
|
|
376
|
-
dep_task.status = 'completed'
|
|
377
|
-
and dep_step.step_type = 'single' -- Only join for single steps
|
|
378
|
-
),
|
|
379
|
-
deps_outputs as (
|
|
380
|
-
select
|
|
381
|
-
d.run_id,
|
|
382
|
-
d.step_slug,
|
|
383
|
-
jsonb_object_agg(d.dep_slug, d.dep_output) as deps_output,
|
|
384
|
-
count(*) as dep_count
|
|
385
|
-
from deps d
|
|
386
|
-
group by d.run_id, d.step_slug
|
|
387
|
-
),
|
|
388
|
-
timeouts as (
|
|
389
|
-
select
|
|
390
|
-
task.message_id,
|
|
391
|
-
task.flow_slug,
|
|
392
|
-
coalesce(step.opt_timeout, flow.opt_timeout) + 2 as vt_delay
|
|
393
|
-
from tasks task
|
|
394
|
-
join pgflow.flows flow on flow.flow_slug = task.flow_slug
|
|
395
|
-
join pgflow.steps step on step.flow_slug = task.flow_slug and step.step_slug = task.step_slug
|
|
396
|
-
),
|
|
397
|
-
-- Batch update visibility timeouts for all messages
|
|
398
|
-
set_vt_batch as (
|
|
399
|
-
select pgflow.set_vt_batch(
|
|
400
|
-
start_tasks.flow_slug,
|
|
401
|
-
array_agg(t.message_id order by t.message_id),
|
|
402
|
-
array_agg(t.vt_delay order by t.message_id)
|
|
403
|
-
)
|
|
404
|
-
from timeouts t
|
|
405
|
-
)
|
|
406
|
-
select
|
|
407
|
-
st.flow_slug,
|
|
408
|
-
st.run_id,
|
|
409
|
-
st.step_slug,
|
|
410
|
-
-- ==========================================
|
|
411
|
-
-- INPUT CONSTRUCTION LOGIC
|
|
412
|
-
-- ==========================================
|
|
413
|
-
-- This nested CASE statement determines how to construct the input
|
|
414
|
-
-- for each task based on the step type (map vs non-map).
|
|
415
|
-
--
|
|
416
|
-
-- The fundamental difference:
|
|
417
|
-
-- - Map steps: Receive RAW array elements (e.g., just 42 or "hello")
|
|
418
|
-
-- - Non-map steps: Receive structured objects with named keys
|
|
419
|
-
-- (e.g., {"run": {...}, "dependency1": {...}})
|
|
420
|
-
-- ==========================================
|
|
421
|
-
CASE
|
|
422
|
-
-- -------------------- MAP STEPS --------------------
|
|
423
|
-
-- Map steps process arrays element-by-element.
|
|
424
|
-
-- Each task receives ONE element from the array at its task_index position.
|
|
425
|
-
WHEN step.step_type = 'map' THEN
|
|
426
|
-
-- Map steps get raw array elements without any wrapper object
|
|
427
|
-
CASE
|
|
428
|
-
-- ROOT MAP: Gets array from run input
|
|
429
|
-
-- Example: run input = [1, 2, 3]
|
|
430
|
-
-- task 0 gets: 1
|
|
431
|
-
-- task 1 gets: 2
|
|
432
|
-
-- task 2 gets: 3
|
|
433
|
-
WHEN step.deps_count = 0 THEN
|
|
434
|
-
-- Root map (deps_count = 0): no dependencies, reads from run input.
|
|
435
|
-
-- Extract the element at task_index from the run's input array.
|
|
436
|
-
-- Note: If run input is not an array, this will return NULL
|
|
437
|
-
-- and the flow will fail (validated in start_flow).
|
|
438
|
-
jsonb_array_element(r.input, st.task_index)
|
|
439
|
-
|
|
440
|
-
-- DEPENDENT MAP: Gets array from its single dependency
|
|
441
|
-
-- Example: dependency output = ["a", "b", "c"]
|
|
442
|
-
-- task 0 gets: "a"
|
|
443
|
-
-- task 1 gets: "b"
|
|
444
|
-
-- task 2 gets: "c"
|
|
445
|
-
ELSE
|
|
446
|
-
-- Has dependencies (should be exactly 1 for map steps).
|
|
447
|
-
-- Extract the element at task_index from the dependency's output array.
|
|
448
|
-
--
|
|
449
|
-
-- Why the subquery with jsonb_each?
|
|
450
|
-
-- - The dependency outputs a raw array: [1, 2, 3]
|
|
451
|
-
-- - deps_outputs aggregates it into: {"dep_name": [1, 2, 3]}
|
|
452
|
-
-- - We need to unwrap and get just the array value
|
|
453
|
-
-- - Map steps have exactly 1 dependency (enforced by add_step)
|
|
454
|
-
-- - So jsonb_each will return exactly 1 row
|
|
455
|
-
-- - We extract the 'value' which is the raw array [1, 2, 3]
|
|
456
|
-
-- - Then get the element at task_index from that array
|
|
457
|
-
(SELECT jsonb_array_element(value, st.task_index)
|
|
458
|
-
FROM jsonb_each(dep_out.deps_output)
|
|
459
|
-
LIMIT 1)
|
|
460
|
-
END
|
|
461
|
-
|
|
462
|
-
-- -------------------- NON-MAP STEPS --------------------
|
|
463
|
-
-- Regular (non-map) steps receive ALL inputs as a structured object.
|
|
464
|
-
-- This includes the original run input plus all dependency outputs.
|
|
465
|
-
ELSE
|
|
466
|
-
-- Non-map steps get structured input with named keys
|
|
467
|
-
-- Example output: {
|
|
468
|
-
-- "run": {"original": "input"},
|
|
469
|
-
-- "step1": {"output": "from_step1"},
|
|
470
|
-
-- "step2": {"output": "from_step2"}
|
|
471
|
-
-- }
|
|
472
|
-
--
|
|
473
|
-
-- Build object with 'run' key containing original input
|
|
474
|
-
jsonb_build_object('run', r.input) ||
|
|
475
|
-
-- Merge with deps_output which already has dependency outputs
|
|
476
|
-
-- deps_output format: {"dep1": output1, "dep2": output2, ...}
|
|
477
|
-
-- If no dependencies, defaults to empty object
|
|
478
|
-
coalesce(dep_out.deps_output, '{}'::jsonb)
|
|
479
|
-
END as input,
|
|
480
|
-
st.message_id as msg_id
|
|
481
|
-
from tasks st
|
|
482
|
-
join runs r on st.run_id = r.run_id
|
|
483
|
-
join pgflow.steps step on
|
|
484
|
-
step.flow_slug = st.flow_slug and
|
|
485
|
-
step.step_slug = st.step_slug
|
|
486
|
-
left join deps_outputs dep_out on
|
|
487
|
-
dep_out.run_id = st.run_id and
|
|
488
|
-
dep_out.step_slug = st.step_slug
|
|
489
|
-
$$;
|