@pgflow/core 0.0.0-array-map-steps-302d00a8-20250922101336 → 0.0.0-array-map-steps-b956f8f9-20251006084236

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,178 +0,0 @@
1
- -- Modify "step_task_record" composite type
2
- ALTER TYPE "pgflow"."step_task_record" ADD ATTRIBUTE "task_index" integer;
3
- -- Modify "start_tasks" function
4
- CREATE OR REPLACE FUNCTION "pgflow"."start_tasks" ("flow_slug" text, "msg_ids" bigint[], "worker_id" uuid) RETURNS SETOF "pgflow"."step_task_record" LANGUAGE sql SET "search_path" = '' AS $$
5
- with tasks as (
6
- select
7
- task.flow_slug,
8
- task.run_id,
9
- task.step_slug,
10
- task.task_index,
11
- task.message_id
12
- from pgflow.step_tasks as task
13
- join pgflow.runs r on r.run_id = task.run_id
14
- where task.flow_slug = start_tasks.flow_slug
15
- and task.message_id = any(msg_ids)
16
- and task.status = 'queued'
17
- -- MVP: Don't start tasks on failed runs
18
- and r.status != 'failed'
19
- ),
20
- start_tasks_update as (
21
- update pgflow.step_tasks
22
- set
23
- attempts_count = attempts_count + 1,
24
- status = 'started',
25
- started_at = now(),
26
- last_worker_id = worker_id
27
- from tasks
28
- where step_tasks.message_id = tasks.message_id
29
- and step_tasks.flow_slug = tasks.flow_slug
30
- and step_tasks.status = 'queued'
31
- ),
32
- runs as (
33
- select
34
- r.run_id,
35
- r.input
36
- from pgflow.runs r
37
- where r.run_id in (select run_id from tasks)
38
- ),
39
- deps as (
40
- select
41
- st.run_id,
42
- st.step_slug,
43
- dep.dep_slug,
44
- -- Aggregate map outputs or use single output
45
- CASE
46
- WHEN dep_step.step_type = 'map' THEN
47
- -- Aggregate all task outputs ordered by task_index
48
- -- Use COALESCE to return empty array if no tasks
49
- (SELECT COALESCE(jsonb_agg(dt.output ORDER BY dt.task_index), '[]'::jsonb)
50
- FROM pgflow.step_tasks dt
51
- WHERE dt.run_id = st.run_id
52
- AND dt.step_slug = dep.dep_slug
53
- AND dt.status = 'completed')
54
- ELSE
55
- -- Single step: use the single task output
56
- dep_task.output
57
- END as dep_output
58
- from tasks st
59
- join pgflow.deps dep on dep.flow_slug = st.flow_slug and dep.step_slug = st.step_slug
60
- join pgflow.steps dep_step on dep_step.flow_slug = dep.flow_slug and dep_step.step_slug = dep.dep_slug
61
- left join pgflow.step_tasks dep_task on
62
- dep_task.run_id = st.run_id and
63
- dep_task.step_slug = dep.dep_slug and
64
- dep_task.status = 'completed'
65
- and dep_step.step_type = 'single' -- Only join for single steps
66
- ),
67
- deps_outputs as (
68
- select
69
- d.run_id,
70
- d.step_slug,
71
- jsonb_object_agg(d.dep_slug, d.dep_output) as deps_output,
72
- count(*) as dep_count
73
- from deps d
74
- group by d.run_id, d.step_slug
75
- ),
76
- timeouts as (
77
- select
78
- task.message_id,
79
- task.flow_slug,
80
- coalesce(step.opt_timeout, flow.opt_timeout) + 2 as vt_delay
81
- from tasks task
82
- join pgflow.flows flow on flow.flow_slug = task.flow_slug
83
- join pgflow.steps step on step.flow_slug = task.flow_slug and step.step_slug = task.step_slug
84
- ),
85
- -- Batch update visibility timeouts for all messages
86
- set_vt_batch as (
87
- select pgflow.set_vt_batch(
88
- start_tasks.flow_slug,
89
- array_agg(t.message_id order by t.message_id),
90
- array_agg(t.vt_delay order by t.message_id)
91
- )
92
- from timeouts t
93
- )
94
- select
95
- st.flow_slug,
96
- st.run_id,
97
- st.step_slug,
98
- -- ==========================================
99
- -- INPUT CONSTRUCTION LOGIC
100
- -- ==========================================
101
- -- This nested CASE statement determines how to construct the input
102
- -- for each task based on the step type (map vs non-map).
103
- --
104
- -- The fundamental difference:
105
- -- - Map steps: Receive RAW array elements (e.g., just 42 or "hello")
106
- -- - Non-map steps: Receive structured objects with named keys
107
- -- (e.g., {"run": {...}, "dependency1": {...}})
108
- -- ==========================================
109
- CASE
110
- -- -------------------- MAP STEPS --------------------
111
- -- Map steps process arrays element-by-element.
112
- -- Each task receives ONE element from the array at its task_index position.
113
- WHEN step.step_type = 'map' THEN
114
- -- Map steps get raw array elements without any wrapper object
115
- CASE
116
- -- ROOT MAP: Gets array from run input
117
- -- Example: run input = [1, 2, 3]
118
- -- task 0 gets: 1
119
- -- task 1 gets: 2
120
- -- task 2 gets: 3
121
- WHEN step.deps_count = 0 THEN
122
- -- Root map (deps_count = 0): no dependencies, reads from run input.
123
- -- Extract the element at task_index from the run's input array.
124
- -- Note: If run input is not an array, this will return NULL
125
- -- and the flow will fail (validated in start_flow).
126
- jsonb_array_element(r.input, st.task_index)
127
-
128
- -- DEPENDENT MAP: Gets array from its single dependency
129
- -- Example: dependency output = ["a", "b", "c"]
130
- -- task 0 gets: "a"
131
- -- task 1 gets: "b"
132
- -- task 2 gets: "c"
133
- ELSE
134
- -- Has dependencies (should be exactly 1 for map steps).
135
- -- Extract the element at task_index from the dependency's output array.
136
- --
137
- -- Why the subquery with jsonb_each?
138
- -- - The dependency outputs a raw array: [1, 2, 3]
139
- -- - deps_outputs aggregates it into: {"dep_name": [1, 2, 3]}
140
- -- - We need to unwrap and get just the array value
141
- -- - Map steps have exactly 1 dependency (enforced by add_step)
142
- -- - So jsonb_each will return exactly 1 row
143
- -- - We extract the 'value' which is the raw array [1, 2, 3]
144
- -- - Then get the element at task_index from that array
145
- (SELECT jsonb_array_element(value, st.task_index)
146
- FROM jsonb_each(dep_out.deps_output)
147
- LIMIT 1)
148
- END
149
-
150
- -- -------------------- NON-MAP STEPS --------------------
151
- -- Regular (non-map) steps receive ALL inputs as a structured object.
152
- -- This includes the original run input plus all dependency outputs.
153
- ELSE
154
- -- Non-map steps get structured input with named keys
155
- -- Example output: {
156
- -- "run": {"original": "input"},
157
- -- "step1": {"output": "from_step1"},
158
- -- "step2": {"output": "from_step2"}
159
- -- }
160
- --
161
- -- Build object with 'run' key containing original input
162
- jsonb_build_object('run', r.input) ||
163
- -- Merge with deps_output which already has dependency outputs
164
- -- deps_output format: {"dep1": output1, "dep2": output2, ...}
165
- -- If no dependencies, defaults to empty object
166
- coalesce(dep_out.deps_output, '{}'::jsonb)
167
- END as input,
168
- st.message_id as msg_id,
169
- st.task_index as task_index
170
- from tasks st
171
- join runs r on st.run_id = r.run_id
172
- join pgflow.steps step on
173
- step.flow_slug = st.flow_slug and
174
- step.step_slug = st.step_slug
175
- left join deps_outputs dep_out on
176
- dep_out.run_id = st.run_id and
177
- dep_out.step_slug = st.step_slug
178
- $$;