@pgflow/core 0.0.0-array-map-steps-cd94242a-20251008042921 → 0.0.0-condition-4354fcb6-20260108134756

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,185 @@
1
+ -- Modify "step_task_record" composite type
2
+ ALTER TYPE "pgflow"."step_task_record" ADD ATTRIBUTE "flow_input" jsonb;
3
+ -- Modify "start_tasks" function
4
+ CREATE OR REPLACE FUNCTION "pgflow"."start_tasks" ("flow_slug" text, "msg_ids" bigint[], "worker_id" uuid) RETURNS SETOF "pgflow"."step_task_record" LANGUAGE sql SET "search_path" = '' AS $$
5
+ with tasks as (
6
+ select
7
+ task.flow_slug,
8
+ task.run_id,
9
+ task.step_slug,
10
+ task.task_index,
11
+ task.message_id
12
+ from pgflow.step_tasks as task
13
+ join pgflow.runs r on r.run_id = task.run_id
14
+ where task.flow_slug = start_tasks.flow_slug
15
+ and task.message_id = any(msg_ids)
16
+ and task.status = 'queued'
17
+ -- MVP: Don't start tasks on failed runs
18
+ and r.status != 'failed'
19
+ ),
20
+ start_tasks_update as (
21
+ update pgflow.step_tasks
22
+ set
23
+ attempts_count = attempts_count + 1,
24
+ status = 'started',
25
+ started_at = now(),
26
+ last_worker_id = worker_id
27
+ from tasks
28
+ where step_tasks.message_id = tasks.message_id
29
+ and step_tasks.flow_slug = tasks.flow_slug
30
+ and step_tasks.status = 'queued'
31
+ ),
32
+ runs as (
33
+ select
34
+ r.run_id,
35
+ r.input
36
+ from pgflow.runs r
37
+ where r.run_id in (select run_id from tasks)
38
+ ),
39
+ deps as (
40
+ select
41
+ st.run_id,
42
+ st.step_slug,
43
+ dep.dep_slug,
44
+ -- Aggregate map outputs or use single output
45
+ CASE
46
+ WHEN dep_step.step_type = 'map' THEN
47
+ -- Aggregate all task outputs ordered by task_index
48
+ -- Use COALESCE to return empty array if no tasks
49
+ (SELECT COALESCE(jsonb_agg(dt.output ORDER BY dt.task_index), '[]'::jsonb)
50
+ FROM pgflow.step_tasks dt
51
+ WHERE dt.run_id = st.run_id
52
+ AND dt.step_slug = dep.dep_slug
53
+ AND dt.status = 'completed')
54
+ ELSE
55
+ -- Single step: use the single task output
56
+ dep_task.output
57
+ END as dep_output
58
+ from tasks st
59
+ join pgflow.deps dep on dep.flow_slug = st.flow_slug and dep.step_slug = st.step_slug
60
+ join pgflow.steps dep_step on dep_step.flow_slug = dep.flow_slug and dep_step.step_slug = dep.dep_slug
61
+ left join pgflow.step_tasks dep_task on
62
+ dep_task.run_id = st.run_id and
63
+ dep_task.step_slug = dep.dep_slug and
64
+ dep_task.status = 'completed'
65
+ and dep_step.step_type = 'single' -- Only join for single steps
66
+ ),
67
+ deps_outputs as (
68
+ select
69
+ d.run_id,
70
+ d.step_slug,
71
+ jsonb_object_agg(d.dep_slug, d.dep_output) as deps_output,
72
+ count(*) as dep_count
73
+ from deps d
74
+ group by d.run_id, d.step_slug
75
+ ),
76
+ timeouts as (
77
+ select
78
+ task.message_id,
79
+ task.flow_slug,
80
+ coalesce(step.opt_timeout, flow.opt_timeout) + 2 as vt_delay
81
+ from tasks task
82
+ join pgflow.flows flow on flow.flow_slug = task.flow_slug
83
+ join pgflow.steps step on step.flow_slug = task.flow_slug and step.step_slug = task.step_slug
84
+ ),
85
+ -- Batch update visibility timeouts for all messages
86
+ set_vt_batch as (
87
+ select pgflow.set_vt_batch(
88
+ start_tasks.flow_slug,
89
+ array_agg(t.message_id order by t.message_id),
90
+ array_agg(t.vt_delay order by t.message_id)
91
+ )
92
+ from timeouts t
93
+ )
94
+ select
95
+ st.flow_slug,
96
+ st.run_id,
97
+ st.step_slug,
98
+ -- ==========================================
99
+ -- INPUT CONSTRUCTION LOGIC
100
+ -- ==========================================
101
+ -- This nested CASE statement determines how to construct the input
102
+ -- for each task based on the step type (map vs non-map).
103
+ --
104
+ -- The fundamental difference:
105
+ -- - Map steps: Receive RAW array elements (e.g., just 42 or "hello")
106
+ -- - Non-map steps: Receive structured objects with named keys
107
+ -- (e.g., {"run": {...}, "dependency1": {...}})
108
+ -- ==========================================
109
+ CASE
110
+ -- -------------------- MAP STEPS --------------------
111
+ -- Map steps process arrays element-by-element.
112
+ -- Each task receives ONE element from the array at its task_index position.
113
+ WHEN step.step_type = 'map' THEN
114
+ -- Map steps get raw array elements without any wrapper object
115
+ CASE
116
+ -- ROOT MAP: Gets array from run input
117
+ -- Example: run input = [1, 2, 3]
118
+ -- task 0 gets: 1
119
+ -- task 1 gets: 2
120
+ -- task 2 gets: 3
121
+ WHEN step.deps_count = 0 THEN
122
+ -- Root map (deps_count = 0): no dependencies, reads from run input.
123
+ -- Extract the element at task_index from the run's input array.
124
+ -- Note: If run input is not an array, this will return NULL
125
+ -- and the flow will fail (validated in start_flow).
126
+ jsonb_array_element(r.input, st.task_index)
127
+
128
+ -- DEPENDENT MAP: Gets array from its single dependency
129
+ -- Example: dependency output = ["a", "b", "c"]
130
+ -- task 0 gets: "a"
131
+ -- task 1 gets: "b"
132
+ -- task 2 gets: "c"
133
+ ELSE
134
+ -- Has dependencies (should be exactly 1 for map steps).
135
+ -- Extract the element at task_index from the dependency's output array.
136
+ --
137
+ -- Why the subquery with jsonb_each?
138
+ -- - The dependency outputs a raw array: [1, 2, 3]
139
+ -- - deps_outputs aggregates it into: {"dep_name": [1, 2, 3]}
140
+ -- - We need to unwrap and get just the array value
141
+ -- - Map steps have exactly 1 dependency (enforced by add_step)
142
+ -- - So jsonb_each will return exactly 1 row
143
+ -- - We extract the 'value' which is the raw array [1, 2, 3]
144
+ -- - Then get the element at task_index from that array
145
+ (SELECT jsonb_array_element(value, st.task_index)
146
+ FROM jsonb_each(dep_out.deps_output)
147
+ LIMIT 1)
148
+ END
149
+
150
+ -- -------------------- NON-MAP STEPS --------------------
151
+ -- Regular (non-map) steps receive dependency outputs as a structured object.
152
+ -- Root steps (no dependencies) get empty object - they access flowInput via context.
153
+ -- Dependent steps get only their dependency outputs.
154
+ ELSE
155
+ -- Non-map steps get structured input with dependency keys only
156
+ -- Example for dependent step: {
157
+ -- "step1": {"output": "from_step1"},
158
+ -- "step2": {"output": "from_step2"}
159
+ -- }
160
+ -- Example for root step: {}
161
+ --
162
+ -- Note: flow_input is available separately in the returned record
163
+ -- for workers to access via context.flowInput
164
+ coalesce(dep_out.deps_output, '{}'::jsonb)
165
+ END as input,
166
+ st.message_id as msg_id,
167
+ st.task_index as task_index,
168
+ -- flow_input: Original run input for worker context
169
+ -- Only included for root non-map steps to avoid data duplication.
170
+ -- Root map steps: flowInput IS the array, useless to include
171
+ -- Dependent steps: lazy load via ctx.flowInput when needed
172
+ CASE
173
+ WHEN step.step_type != 'map' AND step.deps_count = 0
174
+ THEN r.input
175
+ ELSE NULL
176
+ END as flow_input
177
+ from tasks st
178
+ join runs r on st.run_id = r.run_id
179
+ join pgflow.steps step on
180
+ step.flow_slug = st.flow_slug and
181
+ step.step_slug = st.step_slug
182
+ left join deps_outputs dep_out on
183
+ dep_out.run_id = st.run_id and
184
+ dep_out.step_slug = st.step_slug
185
+ $$;