@pgflow/core 0.0.0-array-map-steps-302d00a8-20250925065142 → 0.0.0-array-map-steps-f18b09ac-20251006160811
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +148 -72
- package/dist/CHANGELOG.md +17 -15
- package/dist/README.md +148 -72
- package/dist/package.json +1 -1
- package/dist/supabase/migrations/{20250919101802_pgflow_temp_orphaned_messages_index.sql → 20251006073122_pgflow_add_map_step_type.sql} +533 -1
- package/dist/types.d.ts +1 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +2 -2
- package/dist/PLAN_race_condition_testing.md +0 -176
- package/dist/supabase/migrations/20250912075001_pgflow_temp_pr1_schema.sql +0 -185
- package/dist/supabase/migrations/20250912080800_pgflow_temp_pr2_root_maps.sql +0 -95
- package/dist/supabase/migrations/20250912125339_pgflow_TEMP_task_spawning_optimization.sql +0 -146
- package/dist/supabase/migrations/20250916093518_pgflow_temp_add_cascade_complete.sql +0 -321
- package/dist/supabase/migrations/20250916142327_pgflow_temp_make_initial_tasks_nullable.sql +0 -624
- package/dist/supabase/migrations/20250916203905_pgflow_temp_handle_arrays_in_start_tasks.sql +0 -157
- package/dist/supabase/migrations/20250918042753_pgflow_temp_handle_map_output_aggregation.sql +0 -489
- package/dist/supabase/migrations/20250919135211_pgflow_temp_return_task_index_in_start_tasks.sql +0 -178
|
@@ -1,176 +0,0 @@
|
|
|
1
|
-
# PLAN: Race Condition Testing for Type Violations
|
|
2
|
-
|
|
3
|
-
## Background
|
|
4
|
-
|
|
5
|
-
When a type violation occurs (e.g., single step produces non-array for dependent map), the system must archive ALL active messages to prevent orphaned messages that cycle through workers indefinitely.
|
|
6
|
-
|
|
7
|
-
## Current Issue
|
|
8
|
-
|
|
9
|
-
The fix archives both `'queued'` AND `'started'` tasks, but existing tests don't properly validate the race condition scenarios.
|
|
10
|
-
|
|
11
|
-
## Test Scenarios Needed
|
|
12
|
-
|
|
13
|
-
### 1. Basic Type Violation (✅ Already Covered)
|
|
14
|
-
**Scenario**: Single task causes type violation
|
|
15
|
-
```
|
|
16
|
-
step1 (single) → step2 (single) → map_step
|
|
17
|
-
```
|
|
18
|
-
- Worker completes step2 with non-array
|
|
19
|
-
- Verify run fails and current task's message is archived
|
|
20
|
-
- **Coverage**: `non_array_to_map_should_fail.test.sql`
|
|
21
|
-
|
|
22
|
-
### 2. Concurrent Started Tasks (❌ Not Covered)
|
|
23
|
-
**Scenario**: Multiple workers have tasks in 'started' state when violation occurs
|
|
24
|
-
```
|
|
25
|
-
producer (single) → map_consumer (map, expects array)
|
|
26
|
-
producer (single) → parallel_task1 (single)
|
|
27
|
-
producer (single) → parallel_task2 (single)
|
|
28
|
-
```
|
|
29
|
-
|
|
30
|
-
**Test Flow**:
|
|
31
|
-
1. Complete producer with `[1, 2, 3]` (spawns 3 map tasks + 2 parallel tasks)
|
|
32
|
-
2. Worker A starts `map_consumer[0]`
|
|
33
|
-
3. Worker B starts `map_consumer[1]`
|
|
34
|
-
4. Worker C starts `parallel_task1`
|
|
35
|
-
5. Worker D starts `parallel_task2`
|
|
36
|
-
6. Worker C completes `parallel_task1` with non-array (violates some other map dependency)
|
|
37
|
-
7. **Verify**: ALL started tasks (map_consumer[0], map_consumer[1], parallel_task2) get archived
|
|
38
|
-
|
|
39
|
-
### 3. Mixed Queue States (❌ Not Covered)
|
|
40
|
-
**Scenario**: Mix of queued and started tasks across different steps
|
|
41
|
-
```
|
|
42
|
-
step1 → step2 → step3 → map_step
|
|
43
|
-
↘ step4 → step5
|
|
44
|
-
```
|
|
45
|
-
|
|
46
|
-
**Test Flow**:
|
|
47
|
-
1. Complete step1
|
|
48
|
-
2. Worker A starts step2
|
|
49
|
-
3. Worker B starts step4
|
|
50
|
-
4. Step3 and step5 remain queued
|
|
51
|
-
5. Worker A completes step2 with type violation
|
|
52
|
-
6. **Verify**: Both started (step4) AND queued (step3, step5) messages archived
|
|
53
|
-
|
|
54
|
-
### 4. Map Task Partial Processing (❌ Not Covered)
|
|
55
|
-
**Scenario**: Some map tasks started, others queued when violation occurs
|
|
56
|
-
```
|
|
57
|
-
producer → large_map (100 elements)
|
|
58
|
-
```
|
|
59
|
-
|
|
60
|
-
**Test Flow**:
|
|
61
|
-
1. Producer outputs array of 100 elements
|
|
62
|
-
2. Workers start processing first 10 tasks
|
|
63
|
-
3. 90 tasks remain queued
|
|
64
|
-
4. One of the started tasks detects downstream type violation
|
|
65
|
-
5. **Verify**: All 100 messages (10 started + 90 queued) get archived
|
|
66
|
-
|
|
67
|
-
### 5. Visibility Timeout Verification (❌ Not Covered)
|
|
68
|
-
**Scenario**: Ensure orphaned messages don't reappear after timeout
|
|
69
|
-
```
|
|
70
|
-
step1 → step2 → map_step
|
|
71
|
-
```
|
|
72
|
-
|
|
73
|
-
**Test Flow**:
|
|
74
|
-
1. Worker starts step2 (30s visibility timeout)
|
|
75
|
-
2. Type violation occurs but message NOT archived (simulate bug)
|
|
76
|
-
3. Wait 31 seconds
|
|
77
|
-
4. **Verify**: Message reappears in queue (demonstrates the bug)
|
|
78
|
-
5. Apply fix and verify message doesn't reappear
|
|
79
|
-
|
|
80
|
-
### 6. Nested Map Chains (❌ Not Covered)
|
|
81
|
-
**Scenario**: Type violation in middle of map chain
|
|
82
|
-
```
|
|
83
|
-
map1 (produces arrays) → map2 (expects arrays) → map3
|
|
84
|
-
```
|
|
85
|
-
|
|
86
|
-
**Test Flow**:
|
|
87
|
-
1. map1 task completes with non-array (violates map2 expectation)
|
|
88
|
-
2. Other map1 tasks are in various states (started/queued)
|
|
89
|
-
3. **Verify**: All map1 tasks archived, map2 never starts
|
|
90
|
-
|
|
91
|
-
### 7. Race During Archival (❌ Not Covered)
|
|
92
|
-
**Scenario**: Worker tries to complete task while archival is happening
|
|
93
|
-
```
|
|
94
|
-
step1 → step2 → map_step
|
|
95
|
-
```
|
|
96
|
-
|
|
97
|
-
**Test Flow**:
|
|
98
|
-
1. Worker A detects type violation, begins archiving
|
|
99
|
-
2. Worker B tries to complete its task during archival
|
|
100
|
-
3. **Verify**: Worker B's completion is rejected (guard clause)
|
|
101
|
-
4. **Verify**: No duplicate archival attempts
|
|
102
|
-
|
|
103
|
-
## Implementation Strategy
|
|
104
|
-
|
|
105
|
-
### Test Utilities Needed
|
|
106
|
-
|
|
107
|
-
1. **Multi-worker simulator**:
|
|
108
|
-
```sql
|
|
109
|
-
CREATE FUNCTION pgflow_tests.simulate_worker(
|
|
110
|
-
worker_id uuid,
|
|
111
|
-
flow_slug text
|
|
112
|
-
) RETURNS TABLE(...);
|
|
113
|
-
```
|
|
114
|
-
|
|
115
|
-
2. **Queue state inspector**:
|
|
116
|
-
```sql
|
|
117
|
-
CREATE FUNCTION pgflow_tests.inspect_queue_state(
|
|
118
|
-
flow_slug text
|
|
119
|
-
) RETURNS TABLE(
|
|
120
|
-
message_id bigint,
|
|
121
|
-
task_status text,
|
|
122
|
-
visibility_timeout timestamptz
|
|
123
|
-
);
|
|
124
|
-
```
|
|
125
|
-
|
|
126
|
-
3. **Time manipulation** (for visibility timeout tests):
|
|
127
|
-
```sql
|
|
128
|
-
-- May need to mock pgmq visibility behavior
|
|
129
|
-
```
|
|
130
|
-
|
|
131
|
-
### Test File Organization
|
|
132
|
-
|
|
133
|
-
```
|
|
134
|
-
supabase/tests/type_violations/
|
|
135
|
-
├── basic_violation.test.sql # Existing coverage
|
|
136
|
-
├── concurrent_started_tasks.test.sql # NEW: Scenario 2
|
|
137
|
-
├── mixed_queue_states.test.sql # NEW: Scenario 3
|
|
138
|
-
├── map_partial_processing.test.sql # NEW: Scenario 4
|
|
139
|
-
├── visibility_timeout_recovery.test.sql # NEW: Scenario 5
|
|
140
|
-
├── nested_map_chains.test.sql # NEW: Scenario 6
|
|
141
|
-
└── race_during_archival.test.sql # NEW: Scenario 7
|
|
142
|
-
```
|
|
143
|
-
|
|
144
|
-
## Success Criteria
|
|
145
|
-
|
|
146
|
-
1. **No orphaned messages**: Queue must be empty after type violation
|
|
147
|
-
2. **No message resurrection**: Archived messages don't reappear after timeout
|
|
148
|
-
3. **Complete cleanup**: ALL tasks (queued + started) for the run are handled
|
|
149
|
-
4. **Atomic operation**: Archival happens in single transaction
|
|
150
|
-
5. **Guard effectiveness**: No operations on failed runs
|
|
151
|
-
|
|
152
|
-
## Performance Considerations
|
|
153
|
-
|
|
154
|
-
- Test with large numbers of tasks (1000+) to verify batch archival performance
|
|
155
|
-
- Ensure archival doesn't lock tables for extended periods
|
|
156
|
-
- Verify index usage on `(run_id, status, message_id)`
|
|
157
|
-
|
|
158
|
-
## Current Gap Analysis
|
|
159
|
-
|
|
160
|
-
**What we have**:
|
|
161
|
-
- Basic type violation detection ✅
|
|
162
|
-
- Single task archival ✅
|
|
163
|
-
- Run failure on violation ✅
|
|
164
|
-
|
|
165
|
-
**What we need**:
|
|
166
|
-
- True concurrent worker simulation ❌
|
|
167
|
-
- Multi-task race condition validation ❌
|
|
168
|
-
- Visibility timeout verification ❌
|
|
169
|
-
- Performance under load testing ❌
|
|
170
|
-
|
|
171
|
-
## Priority
|
|
172
|
-
|
|
173
|
-
1. **HIGH**: Concurrent started tasks (Scenario 2) - Most common real-world case
|
|
174
|
-
2. **HIGH**: Map partial processing (Scenario 4) - Critical for large arrays
|
|
175
|
-
3. **MEDIUM**: Mixed queue states (Scenario 3) - Complex flows
|
|
176
|
-
4. **LOW**: Other scenarios - Edge cases but important for robustness
|
|
@@ -1,185 +0,0 @@
|
|
|
1
|
-
-- Modify "step_states" table
|
|
2
|
-
ALTER TABLE "pgflow"."step_states" DROP CONSTRAINT "step_states_remaining_tasks_check", ADD CONSTRAINT "remaining_tasks_state_consistency" CHECK ((remaining_tasks IS NULL) OR (status <> 'created'::text)), ADD CONSTRAINT "step_states_initial_tasks_check" CHECK (initial_tasks >= 0), ALTER COLUMN "remaining_tasks" DROP NOT NULL, ALTER COLUMN "remaining_tasks" DROP DEFAULT, ADD COLUMN "initial_tasks" integer NULL DEFAULT 1;
|
|
3
|
-
-- Modify "step_tasks" table
|
|
4
|
-
ALTER TABLE "pgflow"."step_tasks" DROP CONSTRAINT "only_single_task_per_step";
|
|
5
|
-
-- Modify "steps" table
|
|
6
|
-
ALTER TABLE "pgflow"."steps" DROP CONSTRAINT "steps_step_type_check", ADD CONSTRAINT "steps_step_type_check" CHECK (step_type = ANY (ARRAY['single'::text, 'map'::text]));
|
|
7
|
-
-- Modify "start_ready_steps" function
|
|
8
|
-
CREATE OR REPLACE FUNCTION "pgflow"."start_ready_steps" ("run_id" uuid) RETURNS void LANGUAGE sql SET "search_path" = '' AS $$
|
|
9
|
-
WITH ready_steps AS (
|
|
10
|
-
SELECT *
|
|
11
|
-
FROM pgflow.step_states AS step_state
|
|
12
|
-
WHERE step_state.run_id = start_ready_steps.run_id
|
|
13
|
-
AND step_state.status = 'created'
|
|
14
|
-
AND step_state.remaining_deps = 0
|
|
15
|
-
ORDER BY step_state.step_slug
|
|
16
|
-
FOR UPDATE
|
|
17
|
-
),
|
|
18
|
-
started_step_states AS (
|
|
19
|
-
UPDATE pgflow.step_states
|
|
20
|
-
SET status = 'started',
|
|
21
|
-
started_at = now(),
|
|
22
|
-
remaining_tasks = ready_steps.initial_tasks -- Copy initial_tasks to remaining_tasks when starting
|
|
23
|
-
FROM ready_steps
|
|
24
|
-
WHERE pgflow.step_states.run_id = start_ready_steps.run_id
|
|
25
|
-
AND pgflow.step_states.step_slug = ready_steps.step_slug
|
|
26
|
-
RETURNING pgflow.step_states.*
|
|
27
|
-
),
|
|
28
|
-
sent_messages AS (
|
|
29
|
-
SELECT
|
|
30
|
-
started_step.flow_slug,
|
|
31
|
-
started_step.run_id,
|
|
32
|
-
started_step.step_slug,
|
|
33
|
-
pgmq.send(
|
|
34
|
-
started_step.flow_slug,
|
|
35
|
-
jsonb_build_object(
|
|
36
|
-
'flow_slug', started_step.flow_slug,
|
|
37
|
-
'run_id', started_step.run_id,
|
|
38
|
-
'step_slug', started_step.step_slug,
|
|
39
|
-
'task_index', 0
|
|
40
|
-
),
|
|
41
|
-
COALESCE(step.opt_start_delay, 0)
|
|
42
|
-
) AS msg_id
|
|
43
|
-
FROM started_step_states AS started_step
|
|
44
|
-
JOIN pgflow.steps AS step
|
|
45
|
-
ON step.flow_slug = started_step.flow_slug
|
|
46
|
-
AND step.step_slug = started_step.step_slug
|
|
47
|
-
),
|
|
48
|
-
broadcast_events AS (
|
|
49
|
-
SELECT
|
|
50
|
-
realtime.send(
|
|
51
|
-
jsonb_build_object(
|
|
52
|
-
'event_type', 'step:started',
|
|
53
|
-
'run_id', started_step.run_id,
|
|
54
|
-
'step_slug', started_step.step_slug,
|
|
55
|
-
'status', 'started',
|
|
56
|
-
'started_at', started_step.started_at,
|
|
57
|
-
'remaining_tasks', started_step.remaining_tasks,
|
|
58
|
-
'remaining_deps', started_step.remaining_deps
|
|
59
|
-
),
|
|
60
|
-
concat('step:', started_step.step_slug, ':started'),
|
|
61
|
-
concat('pgflow:run:', started_step.run_id),
|
|
62
|
-
false
|
|
63
|
-
)
|
|
64
|
-
FROM started_step_states AS started_step
|
|
65
|
-
)
|
|
66
|
-
INSERT INTO pgflow.step_tasks (flow_slug, run_id, step_slug, message_id)
|
|
67
|
-
SELECT
|
|
68
|
-
sent_messages.flow_slug,
|
|
69
|
-
sent_messages.run_id,
|
|
70
|
-
sent_messages.step_slug,
|
|
71
|
-
sent_messages.msg_id
|
|
72
|
-
FROM sent_messages;
|
|
73
|
-
$$;
|
|
74
|
-
-- Modify "start_flow" function
|
|
75
|
-
CREATE OR REPLACE FUNCTION "pgflow"."start_flow" ("flow_slug" text, "input" jsonb, "run_id" uuid DEFAULT NULL::uuid) RETURNS SETOF "pgflow"."runs" LANGUAGE plpgsql SET "search_path" = '' AS $$
|
|
76
|
-
declare
|
|
77
|
-
v_created_run pgflow.runs%ROWTYPE;
|
|
78
|
-
begin
|
|
79
|
-
|
|
80
|
-
WITH
|
|
81
|
-
flow_steps AS (
|
|
82
|
-
SELECT steps.flow_slug, steps.step_slug, steps.deps_count
|
|
83
|
-
FROM pgflow.steps
|
|
84
|
-
WHERE steps.flow_slug = start_flow.flow_slug
|
|
85
|
-
),
|
|
86
|
-
created_run AS (
|
|
87
|
-
INSERT INTO pgflow.runs (run_id, flow_slug, input, remaining_steps)
|
|
88
|
-
VALUES (
|
|
89
|
-
COALESCE(start_flow.run_id, gen_random_uuid()),
|
|
90
|
-
start_flow.flow_slug,
|
|
91
|
-
start_flow.input,
|
|
92
|
-
(SELECT count(*) FROM flow_steps)
|
|
93
|
-
)
|
|
94
|
-
RETURNING *
|
|
95
|
-
),
|
|
96
|
-
created_step_states AS (
|
|
97
|
-
INSERT INTO pgflow.step_states (flow_slug, run_id, step_slug, remaining_deps, initial_tasks)
|
|
98
|
-
SELECT
|
|
99
|
-
fs.flow_slug,
|
|
100
|
-
(SELECT created_run.run_id FROM created_run),
|
|
101
|
-
fs.step_slug,
|
|
102
|
-
fs.deps_count,
|
|
103
|
-
1 -- For now, all steps get initial_tasks = 1 (single steps)
|
|
104
|
-
FROM flow_steps fs
|
|
105
|
-
)
|
|
106
|
-
SELECT * FROM created_run INTO v_created_run;
|
|
107
|
-
|
|
108
|
-
-- Send broadcast event for run started
|
|
109
|
-
PERFORM realtime.send(
|
|
110
|
-
jsonb_build_object(
|
|
111
|
-
'event_type', 'run:started',
|
|
112
|
-
'run_id', v_created_run.run_id,
|
|
113
|
-
'flow_slug', v_created_run.flow_slug,
|
|
114
|
-
'input', v_created_run.input,
|
|
115
|
-
'status', 'started',
|
|
116
|
-
'remaining_steps', v_created_run.remaining_steps,
|
|
117
|
-
'started_at', v_created_run.started_at
|
|
118
|
-
),
|
|
119
|
-
'run:started',
|
|
120
|
-
concat('pgflow:run:', v_created_run.run_id),
|
|
121
|
-
false
|
|
122
|
-
);
|
|
123
|
-
|
|
124
|
-
PERFORM pgflow.start_ready_steps(v_created_run.run_id);
|
|
125
|
-
|
|
126
|
-
RETURN QUERY SELECT * FROM pgflow.runs where pgflow.runs.run_id = v_created_run.run_id;
|
|
127
|
-
|
|
128
|
-
end;
|
|
129
|
-
$$;
|
|
130
|
-
-- Create "add_step" function
|
|
131
|
-
CREATE FUNCTION "pgflow"."add_step" ("flow_slug" text, "step_slug" text, "deps_slugs" text[] DEFAULT '{}', "max_attempts" integer DEFAULT NULL::integer, "base_delay" integer DEFAULT NULL::integer, "timeout" integer DEFAULT NULL::integer, "start_delay" integer DEFAULT NULL::integer, "step_type" text DEFAULT 'single') RETURNS "pgflow"."steps" LANGUAGE plpgsql SET "search_path" = '' AS $$
|
|
132
|
-
DECLARE
|
|
133
|
-
result_step pgflow.steps;
|
|
134
|
-
next_idx int;
|
|
135
|
-
BEGIN
|
|
136
|
-
-- Validate map step constraints
|
|
137
|
-
-- Map steps can have either:
|
|
138
|
-
-- 0 dependencies (root map - maps over flow input array)
|
|
139
|
-
-- 1 dependency (dependent map - maps over dependency output array)
|
|
140
|
-
IF COALESCE(add_step.step_type, 'single') = 'map' AND COALESCE(array_length(add_step.deps_slugs, 1), 0) > 1 THEN
|
|
141
|
-
RAISE EXCEPTION 'Map step "%" can have at most one dependency, but % were provided: %',
|
|
142
|
-
add_step.step_slug,
|
|
143
|
-
COALESCE(array_length(add_step.deps_slugs, 1), 0),
|
|
144
|
-
array_to_string(add_step.deps_slugs, ', ');
|
|
145
|
-
END IF;
|
|
146
|
-
|
|
147
|
-
-- Get next step index
|
|
148
|
-
SELECT COALESCE(MAX(s.step_index) + 1, 0) INTO next_idx
|
|
149
|
-
FROM pgflow.steps s
|
|
150
|
-
WHERE s.flow_slug = add_step.flow_slug;
|
|
151
|
-
|
|
152
|
-
-- Create the step
|
|
153
|
-
INSERT INTO pgflow.steps (
|
|
154
|
-
flow_slug, step_slug, step_type, step_index, deps_count,
|
|
155
|
-
opt_max_attempts, opt_base_delay, opt_timeout, opt_start_delay
|
|
156
|
-
)
|
|
157
|
-
VALUES (
|
|
158
|
-
add_step.flow_slug,
|
|
159
|
-
add_step.step_slug,
|
|
160
|
-
COALESCE(add_step.step_type, 'single'),
|
|
161
|
-
next_idx,
|
|
162
|
-
COALESCE(array_length(add_step.deps_slugs, 1), 0),
|
|
163
|
-
add_step.max_attempts,
|
|
164
|
-
add_step.base_delay,
|
|
165
|
-
add_step.timeout,
|
|
166
|
-
add_step.start_delay
|
|
167
|
-
)
|
|
168
|
-
ON CONFLICT ON CONSTRAINT steps_pkey
|
|
169
|
-
DO UPDATE SET step_slug = EXCLUDED.step_slug
|
|
170
|
-
RETURNING * INTO result_step;
|
|
171
|
-
|
|
172
|
-
-- Insert dependencies
|
|
173
|
-
INSERT INTO pgflow.deps (flow_slug, dep_slug, step_slug)
|
|
174
|
-
SELECT add_step.flow_slug, d.dep_slug, add_step.step_slug
|
|
175
|
-
FROM unnest(COALESCE(add_step.deps_slugs, '{}')) AS d(dep_slug)
|
|
176
|
-
WHERE add_step.deps_slugs IS NOT NULL AND array_length(add_step.deps_slugs, 1) > 0
|
|
177
|
-
ON CONFLICT ON CONSTRAINT deps_pkey DO NOTHING;
|
|
178
|
-
|
|
179
|
-
RETURN result_step;
|
|
180
|
-
END;
|
|
181
|
-
$$;
|
|
182
|
-
-- Drop "add_step" function
|
|
183
|
-
DROP FUNCTION "pgflow"."add_step" (text, text, integer, integer, integer, integer);
|
|
184
|
-
-- Drop "add_step" function
|
|
185
|
-
DROP FUNCTION "pgflow"."add_step" (text, text, text[], integer, integer, integer, integer);
|
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
-- Modify "step_states" table
|
|
2
|
-
ALTER TABLE "pgflow"."step_states" ALTER COLUMN "initial_tasks" SET NOT NULL;
|
|
3
|
-
-- Modify "start_flow" function
|
|
4
|
-
CREATE OR REPLACE FUNCTION "pgflow"."start_flow" ("flow_slug" text, "input" jsonb, "run_id" uuid DEFAULT NULL::uuid) RETURNS SETOF "pgflow"."runs" LANGUAGE plpgsql SET "search_path" = '' AS $$
|
|
5
|
-
declare
|
|
6
|
-
v_created_run pgflow.runs%ROWTYPE;
|
|
7
|
-
v_root_map_count int;
|
|
8
|
-
begin
|
|
9
|
-
|
|
10
|
-
-- Check for root map steps and validate input
|
|
11
|
-
WITH root_maps AS (
|
|
12
|
-
SELECT step_slug
|
|
13
|
-
FROM pgflow.steps
|
|
14
|
-
WHERE steps.flow_slug = start_flow.flow_slug
|
|
15
|
-
AND steps.step_type = 'map'
|
|
16
|
-
AND steps.deps_count = 0
|
|
17
|
-
)
|
|
18
|
-
SELECT COUNT(*) INTO v_root_map_count FROM root_maps;
|
|
19
|
-
|
|
20
|
-
-- If we have root map steps, validate that input is an array
|
|
21
|
-
IF v_root_map_count > 0 THEN
|
|
22
|
-
-- First check for NULL (should be caught by NOT NULL constraint, but be defensive)
|
|
23
|
-
IF start_flow.input IS NULL THEN
|
|
24
|
-
RAISE EXCEPTION 'Flow % has root map steps but input is NULL', start_flow.flow_slug;
|
|
25
|
-
END IF;
|
|
26
|
-
|
|
27
|
-
-- Then check if it's not an array
|
|
28
|
-
IF jsonb_typeof(start_flow.input) != 'array' THEN
|
|
29
|
-
RAISE EXCEPTION 'Flow % has root map steps but input is not an array (got %)',
|
|
30
|
-
start_flow.flow_slug, jsonb_typeof(start_flow.input);
|
|
31
|
-
END IF;
|
|
32
|
-
END IF;
|
|
33
|
-
|
|
34
|
-
WITH
|
|
35
|
-
flow_steps AS (
|
|
36
|
-
SELECT steps.flow_slug, steps.step_slug, steps.step_type, steps.deps_count
|
|
37
|
-
FROM pgflow.steps
|
|
38
|
-
WHERE steps.flow_slug = start_flow.flow_slug
|
|
39
|
-
),
|
|
40
|
-
created_run AS (
|
|
41
|
-
INSERT INTO pgflow.runs (run_id, flow_slug, input, remaining_steps)
|
|
42
|
-
VALUES (
|
|
43
|
-
COALESCE(start_flow.run_id, gen_random_uuid()),
|
|
44
|
-
start_flow.flow_slug,
|
|
45
|
-
start_flow.input,
|
|
46
|
-
(SELECT count(*) FROM flow_steps)
|
|
47
|
-
)
|
|
48
|
-
RETURNING *
|
|
49
|
-
),
|
|
50
|
-
created_step_states AS (
|
|
51
|
-
INSERT INTO pgflow.step_states (flow_slug, run_id, step_slug, remaining_deps, initial_tasks)
|
|
52
|
-
SELECT
|
|
53
|
-
fs.flow_slug,
|
|
54
|
-
(SELECT created_run.run_id FROM created_run),
|
|
55
|
-
fs.step_slug,
|
|
56
|
-
fs.deps_count,
|
|
57
|
-
-- For root map steps (map with no deps), set initial_tasks to array length
|
|
58
|
-
-- For all other steps, set initial_tasks to 1
|
|
59
|
-
CASE
|
|
60
|
-
WHEN fs.step_type = 'map' AND fs.deps_count = 0 THEN
|
|
61
|
-
CASE
|
|
62
|
-
WHEN jsonb_typeof(start_flow.input) = 'array' THEN
|
|
63
|
-
jsonb_array_length(start_flow.input)
|
|
64
|
-
ELSE
|
|
65
|
-
1
|
|
66
|
-
END
|
|
67
|
-
ELSE
|
|
68
|
-
1
|
|
69
|
-
END
|
|
70
|
-
FROM flow_steps fs
|
|
71
|
-
)
|
|
72
|
-
SELECT * FROM created_run INTO v_created_run;
|
|
73
|
-
|
|
74
|
-
-- Send broadcast event for run started
|
|
75
|
-
PERFORM realtime.send(
|
|
76
|
-
jsonb_build_object(
|
|
77
|
-
'event_type', 'run:started',
|
|
78
|
-
'run_id', v_created_run.run_id,
|
|
79
|
-
'flow_slug', v_created_run.flow_slug,
|
|
80
|
-
'input', v_created_run.input,
|
|
81
|
-
'status', 'started',
|
|
82
|
-
'remaining_steps', v_created_run.remaining_steps,
|
|
83
|
-
'started_at', v_created_run.started_at
|
|
84
|
-
),
|
|
85
|
-
'run:started',
|
|
86
|
-
concat('pgflow:run:', v_created_run.run_id),
|
|
87
|
-
false
|
|
88
|
-
);
|
|
89
|
-
|
|
90
|
-
PERFORM pgflow.start_ready_steps(v_created_run.run_id);
|
|
91
|
-
|
|
92
|
-
RETURN QUERY SELECT * FROM pgflow.runs where pgflow.runs.run_id = v_created_run.run_id;
|
|
93
|
-
|
|
94
|
-
end;
|
|
95
|
-
$$;
|
|
@@ -1,146 +0,0 @@
|
|
|
1
|
-
-- Modify "start_ready_steps" function
|
|
2
|
-
CREATE OR REPLACE FUNCTION "pgflow"."start_ready_steps" ("run_id" uuid) RETURNS void LANGUAGE sql SET "search_path" = '' AS $$
|
|
3
|
-
-- First handle empty array map steps (initial_tasks = 0) - direct transition to completed
|
|
4
|
-
WITH empty_map_steps AS (
|
|
5
|
-
SELECT step_state.*
|
|
6
|
-
FROM pgflow.step_states AS step_state
|
|
7
|
-
JOIN pgflow.steps AS step
|
|
8
|
-
ON step.flow_slug = step_state.flow_slug
|
|
9
|
-
AND step.step_slug = step_state.step_slug
|
|
10
|
-
WHERE step_state.run_id = start_ready_steps.run_id
|
|
11
|
-
AND step_state.status = 'created'
|
|
12
|
-
AND step_state.remaining_deps = 0
|
|
13
|
-
AND step.step_type = 'map'
|
|
14
|
-
AND step_state.initial_tasks = 0
|
|
15
|
-
ORDER BY step_state.step_slug
|
|
16
|
-
FOR UPDATE OF step_state
|
|
17
|
-
),
|
|
18
|
-
completed_empty_steps AS (
|
|
19
|
-
UPDATE pgflow.step_states
|
|
20
|
-
SET status = 'completed',
|
|
21
|
-
started_at = now(),
|
|
22
|
-
completed_at = now(),
|
|
23
|
-
remaining_tasks = 0
|
|
24
|
-
FROM empty_map_steps
|
|
25
|
-
WHERE pgflow.step_states.run_id = start_ready_steps.run_id
|
|
26
|
-
AND pgflow.step_states.step_slug = empty_map_steps.step_slug
|
|
27
|
-
RETURNING pgflow.step_states.*
|
|
28
|
-
),
|
|
29
|
-
broadcast_empty_completed AS (
|
|
30
|
-
SELECT
|
|
31
|
-
realtime.send(
|
|
32
|
-
jsonb_build_object(
|
|
33
|
-
'event_type', 'step:completed',
|
|
34
|
-
'run_id', completed_step.run_id,
|
|
35
|
-
'step_slug', completed_step.step_slug,
|
|
36
|
-
'status', 'completed',
|
|
37
|
-
'started_at', completed_step.started_at,
|
|
38
|
-
'completed_at', completed_step.completed_at,
|
|
39
|
-
'remaining_tasks', 0,
|
|
40
|
-
'remaining_deps', 0,
|
|
41
|
-
'output', '[]'::jsonb
|
|
42
|
-
),
|
|
43
|
-
concat('step:', completed_step.step_slug, ':completed'),
|
|
44
|
-
concat('pgflow:run:', completed_step.run_id),
|
|
45
|
-
false
|
|
46
|
-
)
|
|
47
|
-
FROM completed_empty_steps AS completed_step
|
|
48
|
-
),
|
|
49
|
-
|
|
50
|
-
-- Now handle non-empty steps (both single and map with initial_tasks > 0)
|
|
51
|
-
ready_steps AS (
|
|
52
|
-
SELECT *
|
|
53
|
-
FROM pgflow.step_states AS step_state
|
|
54
|
-
WHERE step_state.run_id = start_ready_steps.run_id
|
|
55
|
-
AND step_state.status = 'created'
|
|
56
|
-
AND step_state.remaining_deps = 0
|
|
57
|
-
-- Exclude empty map steps already handled
|
|
58
|
-
AND NOT EXISTS (
|
|
59
|
-
SELECT 1 FROM empty_map_steps
|
|
60
|
-
WHERE empty_map_steps.run_id = step_state.run_id
|
|
61
|
-
AND empty_map_steps.step_slug = step_state.step_slug
|
|
62
|
-
)
|
|
63
|
-
ORDER BY step_state.step_slug
|
|
64
|
-
FOR UPDATE
|
|
65
|
-
),
|
|
66
|
-
started_step_states AS (
|
|
67
|
-
UPDATE pgflow.step_states
|
|
68
|
-
SET status = 'started',
|
|
69
|
-
started_at = now(),
|
|
70
|
-
remaining_tasks = ready_steps.initial_tasks -- Copy initial_tasks to remaining_tasks when starting
|
|
71
|
-
FROM ready_steps
|
|
72
|
-
WHERE pgflow.step_states.run_id = start_ready_steps.run_id
|
|
73
|
-
AND pgflow.step_states.step_slug = ready_steps.step_slug
|
|
74
|
-
RETURNING pgflow.step_states.*
|
|
75
|
-
),
|
|
76
|
-
|
|
77
|
-
-- Generate tasks based on initial_tasks count
|
|
78
|
-
-- For single steps: initial_tasks = 1, so generate_series(0, 0) = single task with index 0
|
|
79
|
-
-- For map steps: initial_tasks = N, so generate_series(0, N-1) = N tasks with indices 0..N-1
|
|
80
|
-
-- Group messages by step for batch sending
|
|
81
|
-
message_batches AS (
|
|
82
|
-
SELECT
|
|
83
|
-
started_step.flow_slug,
|
|
84
|
-
started_step.run_id,
|
|
85
|
-
started_step.step_slug,
|
|
86
|
-
COALESCE(step.opt_start_delay, 0) as delay,
|
|
87
|
-
array_agg(
|
|
88
|
-
jsonb_build_object(
|
|
89
|
-
'flow_slug', started_step.flow_slug,
|
|
90
|
-
'run_id', started_step.run_id,
|
|
91
|
-
'step_slug', started_step.step_slug,
|
|
92
|
-
'task_index', task_idx.task_index
|
|
93
|
-
) ORDER BY task_idx.task_index
|
|
94
|
-
) AS messages,
|
|
95
|
-
array_agg(task_idx.task_index ORDER BY task_idx.task_index) AS task_indices
|
|
96
|
-
FROM started_step_states AS started_step
|
|
97
|
-
JOIN pgflow.steps AS step
|
|
98
|
-
ON step.flow_slug = started_step.flow_slug
|
|
99
|
-
AND step.step_slug = started_step.step_slug
|
|
100
|
-
-- Generate task indices from 0 to initial_tasks-1
|
|
101
|
-
CROSS JOIN LATERAL generate_series(0, started_step.initial_tasks - 1) AS task_idx(task_index)
|
|
102
|
-
GROUP BY started_step.flow_slug, started_step.run_id, started_step.step_slug, step.opt_start_delay
|
|
103
|
-
),
|
|
104
|
-
-- Send messages in batch for better performance with large arrays
|
|
105
|
-
sent_messages AS (
|
|
106
|
-
SELECT
|
|
107
|
-
mb.flow_slug,
|
|
108
|
-
mb.run_id,
|
|
109
|
-
mb.step_slug,
|
|
110
|
-
task_indices.task_index,
|
|
111
|
-
msg_ids.msg_id
|
|
112
|
-
FROM message_batches mb
|
|
113
|
-
CROSS JOIN LATERAL unnest(mb.task_indices) WITH ORDINALITY AS task_indices(task_index, idx_ord)
|
|
114
|
-
CROSS JOIN LATERAL pgmq.send_batch(mb.flow_slug, mb.messages, mb.delay) WITH ORDINALITY AS msg_ids(msg_id, msg_ord)
|
|
115
|
-
WHERE task_indices.idx_ord = msg_ids.msg_ord
|
|
116
|
-
),
|
|
117
|
-
|
|
118
|
-
broadcast_events AS (
|
|
119
|
-
SELECT
|
|
120
|
-
realtime.send(
|
|
121
|
-
jsonb_build_object(
|
|
122
|
-
'event_type', 'step:started',
|
|
123
|
-
'run_id', started_step.run_id,
|
|
124
|
-
'step_slug', started_step.step_slug,
|
|
125
|
-
'status', 'started',
|
|
126
|
-
'started_at', started_step.started_at,
|
|
127
|
-
'remaining_tasks', started_step.remaining_tasks,
|
|
128
|
-
'remaining_deps', started_step.remaining_deps
|
|
129
|
-
),
|
|
130
|
-
concat('step:', started_step.step_slug, ':started'),
|
|
131
|
-
concat('pgflow:run:', started_step.run_id),
|
|
132
|
-
false
|
|
133
|
-
)
|
|
134
|
-
FROM started_step_states AS started_step
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
-- Insert all generated tasks with their respective task_index values
|
|
138
|
-
INSERT INTO pgflow.step_tasks (flow_slug, run_id, step_slug, task_index, message_id)
|
|
139
|
-
SELECT
|
|
140
|
-
sent_messages.flow_slug,
|
|
141
|
-
sent_messages.run_id,
|
|
142
|
-
sent_messages.step_slug,
|
|
143
|
-
sent_messages.task_index,
|
|
144
|
-
sent_messages.msg_id
|
|
145
|
-
FROM sent_messages;
|
|
146
|
-
$$;
|