@cascade-flow/backend-postgres 0.2.15 → 0.2.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/db.d.ts +42 -8
- package/dist/db.d.ts.map +1 -1
- package/dist/index.d.ts +20 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +404 -42
- package/dist/index.js.map +5 -5
- package/dist/migrations.d.ts.map +1 -1
- package/package.json +3 -3
package/dist/index.js
CHANGED
|
@@ -4849,6 +4849,9 @@ class DatabaseClient {
|
|
|
4849
4849
|
let attemptNumber = null;
|
|
4850
4850
|
let availableAtUs = null;
|
|
4851
4851
|
let exportOutput = null;
|
|
4852
|
+
let priority = null;
|
|
4853
|
+
let slotIndex = null;
|
|
4854
|
+
let workerConcurrency = null;
|
|
4852
4855
|
let errorNameHash = "";
|
|
4853
4856
|
let errorMessageHash = "";
|
|
4854
4857
|
let errorStackExactHash = "";
|
|
@@ -4856,12 +4859,15 @@ class DatabaseClient {
|
|
|
4856
4859
|
let errorStackPortableHash = "";
|
|
4857
4860
|
if (se.type === "StepStarted" || se.type === "StepHeartbeat") {
|
|
4858
4861
|
workerId = se.workerId;
|
|
4862
|
+
slotIndex = se.slotIndex ?? null;
|
|
4863
|
+
workerConcurrency = se.workerConcurrency ?? null;
|
|
4859
4864
|
}
|
|
4860
4865
|
if ("attemptNumber" in se) {
|
|
4861
4866
|
attemptNumber = se.attemptNumber;
|
|
4862
4867
|
}
|
|
4863
4868
|
if (se.type === "StepScheduled") {
|
|
4864
4869
|
availableAtUs = se.availableAtUs;
|
|
4870
|
+
priority = se.priority ?? null;
|
|
4865
4871
|
}
|
|
4866
4872
|
if (se.type === "StepCompleted") {
|
|
4867
4873
|
exportOutput = se.exportOutput;
|
|
@@ -4884,9 +4890,10 @@ class DatabaseClient {
|
|
|
4884
4890
|
event_id, workflow_slug, run_id, step_id, timestamp_us, category, type, event_data,
|
|
4885
4891
|
worker_id, attempt_number, available_at_us, export_output,
|
|
4886
4892
|
error_name_hash, error_message_hash, error_stack_exact_hash,
|
|
4887
|
-
error_stack_normalized_hash, error_stack_portable_hash, version_id
|
|
4893
|
+
error_stack_normalized_hash, error_stack_portable_hash, version_id, priority,
|
|
4894
|
+
slot_index, worker_concurrency
|
|
4888
4895
|
)
|
|
4889
|
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)`, [
|
|
4896
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21)`, [
|
|
4890
4897
|
se.eventId,
|
|
4891
4898
|
se.workflowSlug,
|
|
4892
4899
|
se.runId,
|
|
@@ -4904,7 +4911,10 @@ class DatabaseClient {
|
|
|
4904
4911
|
errorStackExactHash,
|
|
4905
4912
|
errorStackNormalizedHash,
|
|
4906
4913
|
errorStackPortableHash,
|
|
4907
|
-
versionId
|
|
4914
|
+
versionId,
|
|
4915
|
+
priority,
|
|
4916
|
+
slotIndex,
|
|
4917
|
+
workerConcurrency
|
|
4908
4918
|
]);
|
|
4909
4919
|
}
|
|
4910
4920
|
} finally {
|
|
@@ -4966,6 +4976,9 @@ class DatabaseClient {
|
|
|
4966
4976
|
let attemptNumber = null;
|
|
4967
4977
|
let availableAtUs = null;
|
|
4968
4978
|
let exportOutput = null;
|
|
4979
|
+
let priority = null;
|
|
4980
|
+
let slotIndex = null;
|
|
4981
|
+
let workerConcurrency = null;
|
|
4969
4982
|
let errorNameHash = "";
|
|
4970
4983
|
let errorMessageHash = "";
|
|
4971
4984
|
let errorStackExactHash = "";
|
|
@@ -4973,12 +4986,15 @@ class DatabaseClient {
|
|
|
4973
4986
|
let errorStackPortableHash = "";
|
|
4974
4987
|
if (se.type === "StepStarted" || se.type === "StepHeartbeat") {
|
|
4975
4988
|
workerId = se.workerId;
|
|
4989
|
+
slotIndex = se.slotIndex ?? null;
|
|
4990
|
+
workerConcurrency = se.workerConcurrency ?? null;
|
|
4976
4991
|
}
|
|
4977
4992
|
if ("attemptNumber" in se) {
|
|
4978
4993
|
attemptNumber = se.attemptNumber;
|
|
4979
4994
|
}
|
|
4980
4995
|
if (se.type === "StepScheduled") {
|
|
4981
4996
|
availableAtUs = se.availableAtUs;
|
|
4997
|
+
priority = se.priority ?? null;
|
|
4982
4998
|
}
|
|
4983
4999
|
if (se.type === "StepCompleted") {
|
|
4984
5000
|
exportOutput = se.exportOutput;
|
|
@@ -5001,9 +5017,10 @@ class DatabaseClient {
|
|
|
5001
5017
|
event_id, workflow_slug, run_id, step_id, timestamp_us, category, type, event_data,
|
|
5002
5018
|
worker_id, attempt_number, available_at_us, export_output,
|
|
5003
5019
|
error_name_hash, error_message_hash, error_stack_exact_hash,
|
|
5004
|
-
error_stack_normalized_hash, error_stack_portable_hash, version_id
|
|
5020
|
+
error_stack_normalized_hash, error_stack_portable_hash, version_id, priority,
|
|
5021
|
+
slot_index, worker_concurrency
|
|
5005
5022
|
)
|
|
5006
|
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)`, [
|
|
5023
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21)`, [
|
|
5007
5024
|
se.eventId,
|
|
5008
5025
|
se.workflowSlug,
|
|
5009
5026
|
se.runId,
|
|
@@ -5021,7 +5038,10 @@ class DatabaseClient {
|
|
|
5021
5038
|
errorStackExactHash,
|
|
5022
5039
|
errorStackNormalizedHash,
|
|
5023
5040
|
errorStackPortableHash,
|
|
5024
|
-
versionId
|
|
5041
|
+
versionId,
|
|
5042
|
+
priority,
|
|
5043
|
+
slotIndex,
|
|
5044
|
+
workerConcurrency
|
|
5025
5045
|
]);
|
|
5026
5046
|
}
|
|
5027
5047
|
}
|
|
@@ -5128,6 +5148,8 @@ class DatabaseClient {
|
|
|
5128
5148
|
}
|
|
5129
5149
|
let workerId2 = null;
|
|
5130
5150
|
let attemptNumber = null;
|
|
5151
|
+
let slotIndex = null;
|
|
5152
|
+
let workerConcurrency = null;
|
|
5131
5153
|
let errorNameHash = "";
|
|
5132
5154
|
let errorMessageHash = "";
|
|
5133
5155
|
let errorStackExactHash = "";
|
|
@@ -5136,6 +5158,8 @@ class DatabaseClient {
|
|
|
5136
5158
|
if (eventToWrite.type === "StepStarted") {
|
|
5137
5159
|
workerId2 = eventToWrite.workerId;
|
|
5138
5160
|
attemptNumber = eventToWrite.attemptNumber;
|
|
5161
|
+
slotIndex = eventToWrite.slotIndex ?? null;
|
|
5162
|
+
workerConcurrency = eventToWrite.workerConcurrency ?? null;
|
|
5139
5163
|
}
|
|
5140
5164
|
if (eventToWrite.type === "StepFailed") {
|
|
5141
5165
|
errorNameHash = eventToWrite.errorFingerprints.nameHash;
|
|
@@ -5155,9 +5179,10 @@ class DatabaseClient {
|
|
|
5155
5179
|
event_id, workflow_slug, run_id, step_id, timestamp_us, category, type, event_data,
|
|
5156
5180
|
worker_id, attempt_number, available_at_us, export_output,
|
|
5157
5181
|
error_name_hash, error_message_hash, error_stack_exact_hash,
|
|
5158
|
-
error_stack_normalized_hash, error_stack_portable_hash, version_id
|
|
5182
|
+
error_stack_normalized_hash, error_stack_portable_hash, version_id,
|
|
5183
|
+
slot_index, worker_concurrency
|
|
5159
5184
|
)
|
|
5160
|
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)`, [
|
|
5185
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20)`, [
|
|
5161
5186
|
eventToWrite.eventId,
|
|
5162
5187
|
eventToWrite.workflowSlug,
|
|
5163
5188
|
eventToWrite.runId,
|
|
@@ -5175,7 +5200,9 @@ class DatabaseClient {
|
|
|
5175
5200
|
errorStackExactHash,
|
|
5176
5201
|
errorStackNormalizedHash,
|
|
5177
5202
|
errorStackPortableHash,
|
|
5178
|
-
versionId
|
|
5203
|
+
versionId,
|
|
5204
|
+
slotIndex,
|
|
5205
|
+
workerConcurrency
|
|
5179
5206
|
]);
|
|
5180
5207
|
await client.query("COMMIT");
|
|
5181
5208
|
return true;
|
|
@@ -5194,7 +5221,7 @@ class DatabaseClient {
|
|
|
5194
5221
|
let query = `
|
|
5195
5222
|
WITH latest_step_events AS (
|
|
5196
5223
|
SELECT DISTINCT ON (workflow_slug, run_id, step_id)
|
|
5197
|
-
workflow_slug, run_id, step_id, type, available_at_us
|
|
5224
|
+
workflow_slug, run_id, step_id, type, available_at_us, priority
|
|
5198
5225
|
FROM ${this.schema}.step_events
|
|
5199
5226
|
${options?.workflowSlugs ? "WHERE workflow_slug = ANY($1)" : ""}
|
|
5200
5227
|
ORDER BY workflow_slug, run_id, step_id, timestamp_us DESC, event_id DESC
|
|
@@ -5203,6 +5230,7 @@ class DatabaseClient {
|
|
|
5203
5230
|
FROM latest_step_events
|
|
5204
5231
|
WHERE type = ANY($${options?.workflowSlugs ? "2" : "1"})
|
|
5205
5232
|
AND (available_at_us IS NULL OR available_at_us <= $${options?.workflowSlugs ? "3" : "2"})
|
|
5233
|
+
ORDER BY priority DESC NULLS LAST, available_at_us ASC
|
|
5206
5234
|
${options?.limit ? `LIMIT $${options?.workflowSlugs ? "4" : "3"}` : ""}
|
|
5207
5235
|
`;
|
|
5208
5236
|
const params = [];
|
|
@@ -5609,23 +5637,30 @@ class DatabaseClient {
|
|
|
5609
5637
|
ls.run_id,
|
|
5610
5638
|
ls.status,
|
|
5611
5639
|
rs.created_at,
|
|
5612
|
-
rs.tags
|
|
5640
|
+
rs.tags,
|
|
5641
|
+
COUNT(*) OVER() AS total_count
|
|
5613
5642
|
FROM latest_status ls
|
|
5614
5643
|
JOIN run_submitted rs ON ls.workflow_slug = rs.workflow_slug AND ls.run_id = rs.run_id
|
|
5615
5644
|
WHERE ($2::text[] IS NULL OR ls.type = ANY($2))
|
|
5616
5645
|
ORDER BY rs.created_at DESC
|
|
5617
|
-
LIMIT $3
|
|
5646
|
+
LIMIT $3
|
|
5647
|
+
OFFSET $4`, [
|
|
5618
5648
|
options?.workflowSlug ?? null,
|
|
5619
5649
|
eventTypeFilter,
|
|
5620
|
-
options?.limit ?? null
|
|
5650
|
+
options?.limit ?? null,
|
|
5651
|
+
options?.offset ?? 0
|
|
5621
5652
|
]);
|
|
5622
|
-
|
|
5623
|
-
|
|
5624
|
-
|
|
5625
|
-
|
|
5626
|
-
|
|
5627
|
-
|
|
5628
|
-
|
|
5653
|
+
const total = result.rows.length > 0 ? parseInt(result.rows[0].total_count, 10) : 0;
|
|
5654
|
+
return {
|
|
5655
|
+
runs: result.rows.map((row) => ({
|
|
5656
|
+
workflowSlug: row.workflow_slug,
|
|
5657
|
+
runId: row.run_id,
|
|
5658
|
+
status: row.status,
|
|
5659
|
+
createdAt: parseInt(row.created_at, 10),
|
|
5660
|
+
tags: row.tags
|
|
5661
|
+
})),
|
|
5662
|
+
total
|
|
5663
|
+
};
|
|
5629
5664
|
} finally {
|
|
5630
5665
|
client.release();
|
|
5631
5666
|
}
|
|
@@ -5732,6 +5767,222 @@ class DatabaseClient {
|
|
|
5732
5767
|
client.release();
|
|
5733
5768
|
}
|
|
5734
5769
|
}
|
|
5770
|
+
async getActiveWorkersAggregation(options) {
|
|
5771
|
+
const client = await this.pool.connect();
|
|
5772
|
+
try {
|
|
5773
|
+
const nowUs = Date.now() * 1000;
|
|
5774
|
+
const staleThresholdUs = options?.staleThresholdUs ?? 30 * 1000 * 1000;
|
|
5775
|
+
const includeInactive = options?.includeInactive ?? false;
|
|
5776
|
+
const staleTimestamp = nowUs - staleThresholdUs;
|
|
5777
|
+
const workerStatsResult = await client.query(`
|
|
5778
|
+
WITH worker_activity AS (
|
|
5779
|
+
-- All worker activity from StepStarted and StepHeartbeat events
|
|
5780
|
+
SELECT
|
|
5781
|
+
worker_id,
|
|
5782
|
+
MAX(timestamp_us) AS last_seen_us
|
|
5783
|
+
FROM ${this.schema}.step_events
|
|
5784
|
+
WHERE worker_id IS NOT NULL
|
|
5785
|
+
AND type IN ('StepStarted', 'StepHeartbeat')
|
|
5786
|
+
${options?.timeRange ? `AND timestamp_us >= $1 AND timestamp_us <= $2` : ""}
|
|
5787
|
+
GROUP BY worker_id
|
|
5788
|
+
),
|
|
5789
|
+
-- Steps started by each worker
|
|
5790
|
+
steps_started AS (
|
|
5791
|
+
SELECT DISTINCT
|
|
5792
|
+
se.worker_id,
|
|
5793
|
+
se.workflow_slug,
|
|
5794
|
+
se.run_id,
|
|
5795
|
+
se.step_id,
|
|
5796
|
+
se.attempt_number
|
|
5797
|
+
FROM ${this.schema}.step_events se
|
|
5798
|
+
WHERE se.type = 'StepStarted'
|
|
5799
|
+
AND se.worker_id IS NOT NULL
|
|
5800
|
+
),
|
|
5801
|
+
-- Steps completed
|
|
5802
|
+
completed_steps AS (
|
|
5803
|
+
SELECT
|
|
5804
|
+
ss.worker_id,
|
|
5805
|
+
COUNT(*) AS completed_count
|
|
5806
|
+
FROM steps_started ss
|
|
5807
|
+
INNER JOIN ${this.schema}.step_events se
|
|
5808
|
+
ON se.workflow_slug = ss.workflow_slug
|
|
5809
|
+
AND se.run_id = ss.run_id
|
|
5810
|
+
AND se.step_id = ss.step_id
|
|
5811
|
+
AND se.attempt_number = ss.attempt_number
|
|
5812
|
+
AND se.type = 'StepCompleted'
|
|
5813
|
+
GROUP BY ss.worker_id
|
|
5814
|
+
),
|
|
5815
|
+
-- Steps failed
|
|
5816
|
+
failed_steps AS (
|
|
5817
|
+
SELECT
|
|
5818
|
+
ss.worker_id,
|
|
5819
|
+
COUNT(*) AS failed_count
|
|
5820
|
+
FROM steps_started ss
|
|
5821
|
+
INNER JOIN ${this.schema}.step_events se
|
|
5822
|
+
ON se.workflow_slug = ss.workflow_slug
|
|
5823
|
+
AND se.run_id = ss.run_id
|
|
5824
|
+
AND se.step_id = ss.step_id
|
|
5825
|
+
AND se.attempt_number = ss.attempt_number
|
|
5826
|
+
AND se.type = 'StepFailed'
|
|
5827
|
+
GROUP BY ss.worker_id
|
|
5828
|
+
),
|
|
5829
|
+
-- Reclamation counts (times this worker's steps were reclaimed)
|
|
5830
|
+
reclaimed_counts AS (
|
|
5831
|
+
SELECT
|
|
5832
|
+
(event_data->>'originalWorkerId') AS worker_id,
|
|
5833
|
+
COUNT(*) AS reclaimed_count
|
|
5834
|
+
FROM ${this.schema}.step_events
|
|
5835
|
+
WHERE type = 'StepReclaimed'
|
|
5836
|
+
GROUP BY (event_data->>'originalWorkerId')
|
|
5837
|
+
)
|
|
5838
|
+
SELECT
|
|
5839
|
+
wa.worker_id,
|
|
5840
|
+
wa.last_seen_us,
|
|
5841
|
+
COALESCE(cs.completed_count, 0) AS total_steps_processed,
|
|
5842
|
+
COALESCE(fs.failed_count, 0) AS failed_steps,
|
|
5843
|
+
COALESCE(rc.reclaimed_count, 0) AS reclaimed_from_count
|
|
5844
|
+
FROM worker_activity wa
|
|
5845
|
+
LEFT JOIN completed_steps cs ON wa.worker_id = cs.worker_id
|
|
5846
|
+
LEFT JOIN failed_steps fs ON wa.worker_id = fs.worker_id
|
|
5847
|
+
LEFT JOIN reclaimed_counts rc ON wa.worker_id = rc.worker_id
|
|
5848
|
+
${!includeInactive ? `WHERE wa.last_seen_us >= ${staleTimestamp}` : ""}
|
|
5849
|
+
ORDER BY wa.last_seen_us DESC
|
|
5850
|
+
`, options?.timeRange ? [options.timeRange.startUs, options.timeRange.endUs] : []);
|
|
5851
|
+
const runningStepsResult = await client.query(`
|
|
5852
|
+
WITH latest_step_events AS (
|
|
5853
|
+
-- Get the latest event per step (excluding LogEntry)
|
|
5854
|
+
SELECT DISTINCT ON (workflow_slug, run_id, step_id)
|
|
5855
|
+
workflow_slug,
|
|
5856
|
+
run_id,
|
|
5857
|
+
step_id,
|
|
5858
|
+
type,
|
|
5859
|
+
worker_id,
|
|
5860
|
+
timestamp_us,
|
|
5861
|
+
slot_index,
|
|
5862
|
+
worker_concurrency
|
|
5863
|
+
FROM ${this.schema}.step_events
|
|
5864
|
+
WHERE type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
|
|
5865
|
+
ORDER BY workflow_slug, run_id, step_id, timestamp_us DESC, event_id DESC
|
|
5866
|
+
),
|
|
5867
|
+
-- Steps currently running (latest event is StepStarted or StepHeartbeat)
|
|
5868
|
+
running_steps AS (
|
|
5869
|
+
SELECT
|
|
5870
|
+
workflow_slug,
|
|
5871
|
+
run_id,
|
|
5872
|
+
step_id,
|
|
5873
|
+
worker_id,
|
|
5874
|
+
slot_index,
|
|
5875
|
+
worker_concurrency
|
|
5876
|
+
FROM latest_step_events
|
|
5877
|
+
WHERE type IN ('StepStarted', 'StepHeartbeat')
|
|
5878
|
+
AND worker_id IS NOT NULL
|
|
5879
|
+
),
|
|
5880
|
+
-- Get start time and last heartbeat for each running step
|
|
5881
|
+
step_times AS (
|
|
5882
|
+
SELECT
|
|
5883
|
+
rs.workflow_slug,
|
|
5884
|
+
rs.run_id,
|
|
5885
|
+
rs.step_id,
|
|
5886
|
+
rs.worker_id,
|
|
5887
|
+
rs.slot_index,
|
|
5888
|
+
rs.worker_concurrency,
|
|
5889
|
+
MIN(se.timestamp_us) FILTER (WHERE se.type = 'StepStarted') AS started_at_us,
|
|
5890
|
+
MAX(se.timestamp_us) FILTER (WHERE se.type IN ('StepStarted', 'StepHeartbeat')) AS last_heartbeat_us
|
|
5891
|
+
FROM running_steps rs
|
|
5892
|
+
INNER JOIN ${this.schema}.step_events se
|
|
5893
|
+
ON se.workflow_slug = rs.workflow_slug
|
|
5894
|
+
AND se.run_id = rs.run_id
|
|
5895
|
+
AND se.step_id = rs.step_id
|
|
5896
|
+
AND se.type IN ('StepStarted', 'StepHeartbeat')
|
|
5897
|
+
GROUP BY rs.workflow_slug, rs.run_id, rs.step_id, rs.worker_id, rs.slot_index, rs.worker_concurrency
|
|
5898
|
+
)
|
|
5899
|
+
SELECT
|
|
5900
|
+
worker_id,
|
|
5901
|
+
workflow_slug,
|
|
5902
|
+
run_id,
|
|
5903
|
+
step_id,
|
|
5904
|
+
started_at_us,
|
|
5905
|
+
last_heartbeat_us,
|
|
5906
|
+
slot_index,
|
|
5907
|
+
worker_concurrency
|
|
5908
|
+
FROM step_times
|
|
5909
|
+
ORDER BY worker_id, last_heartbeat_us DESC
|
|
5910
|
+
`);
|
|
5911
|
+
const workerConcurrencyResult = await client.query(`
|
|
5912
|
+
SELECT DISTINCT ON (worker_id)
|
|
5913
|
+
worker_id,
|
|
5914
|
+
worker_concurrency
|
|
5915
|
+
FROM ${this.schema}.step_events
|
|
5916
|
+
WHERE worker_id IS NOT NULL
|
|
5917
|
+
AND worker_concurrency IS NOT NULL
|
|
5918
|
+
AND type IN ('StepStarted', 'StepHeartbeat')
|
|
5919
|
+
ORDER BY worker_id, timestamp_us DESC
|
|
5920
|
+
`);
|
|
5921
|
+
const workerConcurrencyMap = new Map;
|
|
5922
|
+
for (const row of workerConcurrencyResult.rows) {
|
|
5923
|
+
workerConcurrencyMap.set(row.worker_id, row.worker_concurrency);
|
|
5924
|
+
}
|
|
5925
|
+
const workerMap = new Map;
|
|
5926
|
+
for (const row of workerStatsResult.rows) {
|
|
5927
|
+
workerMap.set(row.worker_id, {
|
|
5928
|
+
workerId: row.worker_id,
|
|
5929
|
+
lastSeenUs: parseInt(row.last_seen_us, 10),
|
|
5930
|
+
totalStepsProcessed: parseInt(row.total_steps_processed, 10),
|
|
5931
|
+
failedSteps: parseInt(row.failed_steps, 10),
|
|
5932
|
+
reclaimedFromCount: parseInt(row.reclaimed_from_count, 10),
|
|
5933
|
+
activeSteps: []
|
|
5934
|
+
});
|
|
5935
|
+
}
|
|
5936
|
+
for (const row of runningStepsResult.rows) {
|
|
5937
|
+
let worker = workerMap.get(row.worker_id);
|
|
5938
|
+
if (!worker) {
|
|
5939
|
+
worker = {
|
|
5940
|
+
workerId: row.worker_id,
|
|
5941
|
+
lastSeenUs: parseInt(row.last_heartbeat_us, 10),
|
|
5942
|
+
totalStepsProcessed: 0,
|
|
5943
|
+
failedSteps: 0,
|
|
5944
|
+
reclaimedFromCount: 0,
|
|
5945
|
+
activeSteps: []
|
|
5946
|
+
};
|
|
5947
|
+
workerMap.set(row.worker_id, worker);
|
|
5948
|
+
}
|
|
5949
|
+
if (row.worker_concurrency != null && worker.workerConcurrency == null) {
|
|
5950
|
+
worker.workerConcurrency = row.worker_concurrency;
|
|
5951
|
+
}
|
|
5952
|
+
worker.activeSteps.push({
|
|
5953
|
+
workflowSlug: row.workflow_slug,
|
|
5954
|
+
runId: row.run_id,
|
|
5955
|
+
stepId: row.step_id,
|
|
5956
|
+
startedAtUs: parseInt(row.started_at_us, 10),
|
|
5957
|
+
lastHeartbeatUs: parseInt(row.last_heartbeat_us, 10),
|
|
5958
|
+
slotIndex: row.slot_index ?? undefined
|
|
5959
|
+
});
|
|
5960
|
+
}
|
|
5961
|
+
for (const worker of workerMap.values()) {
|
|
5962
|
+
if (worker.workerConcurrency == null) {
|
|
5963
|
+
const historicalConcurrency = workerConcurrencyMap.get(worker.workerId);
|
|
5964
|
+
if (historicalConcurrency != null) {
|
|
5965
|
+
worker.workerConcurrency = historicalConcurrency;
|
|
5966
|
+
}
|
|
5967
|
+
}
|
|
5968
|
+
}
|
|
5969
|
+
const workers = Array.from(workerMap.values()).sort((a, b) => {
|
|
5970
|
+
if (b.activeSteps.length !== a.activeSteps.length) {
|
|
5971
|
+
return b.activeSteps.length - a.activeSteps.length;
|
|
5972
|
+
}
|
|
5973
|
+
return b.lastSeenUs - a.lastSeenUs;
|
|
5974
|
+
});
|
|
5975
|
+
const totalRunningSteps = workers.reduce((sum, w) => sum + w.activeSteps.length, 0);
|
|
5976
|
+
const totalActiveWorkers = workers.filter((w) => w.activeSteps.length > 0).length;
|
|
5977
|
+
return {
|
|
5978
|
+
workers,
|
|
5979
|
+
totalActiveWorkers,
|
|
5980
|
+
totalRunningSteps
|
|
5981
|
+
};
|
|
5982
|
+
} finally {
|
|
5983
|
+
client.release();
|
|
5984
|
+
}
|
|
5985
|
+
}
|
|
5735
5986
|
}
|
|
5736
5987
|
function createPool(connectionString) {
|
|
5737
5988
|
return new Pool2({ connectionString });
|
|
@@ -6079,6 +6330,60 @@ async function migration008_addRunIdIndex(pool, schema) {
|
|
|
6079
6330
|
client.release();
|
|
6080
6331
|
}
|
|
6081
6332
|
}
|
|
6333
|
+
async function migration009_addStepPriority(pool, schema) {
|
|
6334
|
+
const client = await pool.connect();
|
|
6335
|
+
try {
|
|
6336
|
+
await client.query(`
|
|
6337
|
+
ALTER TABLE ${schema}.step_events
|
|
6338
|
+
ADD COLUMN IF NOT EXISTS priority INTEGER
|
|
6339
|
+
`);
|
|
6340
|
+
await client.query(`
|
|
6341
|
+
CREATE INDEX IF NOT EXISTS idx_step_events_priority_queue
|
|
6342
|
+
ON ${schema}.step_events (priority DESC NULLS LAST, available_at_us ASC, timestamp_us ASC)
|
|
6343
|
+
WHERE type IN ('StepScheduled', 'StepReclaimed', 'StepRetrying')
|
|
6344
|
+
`);
|
|
6345
|
+
console.log("[Migration 009] Step priority column and index added successfully");
|
|
6346
|
+
} catch (error) {
|
|
6347
|
+
console.error("[Migration 009] Error adding step priority:", error);
|
|
6348
|
+
throw error;
|
|
6349
|
+
} finally {
|
|
6350
|
+
client.release();
|
|
6351
|
+
}
|
|
6352
|
+
}
|
|
6353
|
+
async function migration010_addSlotTracking(pool, schema) {
|
|
6354
|
+
const client = await pool.connect();
|
|
6355
|
+
try {
|
|
6356
|
+
await client.query(`
|
|
6357
|
+
ALTER TABLE ${schema}.step_events
|
|
6358
|
+
ADD COLUMN IF NOT EXISTS slot_index INTEGER,
|
|
6359
|
+
ADD COLUMN IF NOT EXISTS worker_concurrency INTEGER
|
|
6360
|
+
`);
|
|
6361
|
+
console.log("[Migration 010] Slot tracking columns added successfully");
|
|
6362
|
+
} catch (error) {
|
|
6363
|
+
console.error("[Migration 010] Error adding slot tracking columns:", error);
|
|
6364
|
+
throw error;
|
|
6365
|
+
} finally {
|
|
6366
|
+
client.release();
|
|
6367
|
+
}
|
|
6368
|
+
}
|
|
6369
|
+
async function migration011_addWorkerConcurrencyIndex(pool, schema) {
|
|
6370
|
+
const client = await pool.connect();
|
|
6371
|
+
try {
|
|
6372
|
+
await client.query(`
|
|
6373
|
+
CREATE INDEX IF NOT EXISTS idx_step_events_worker_concurrency
|
|
6374
|
+
ON ${schema}.step_events (worker_id, timestamp_us DESC)
|
|
6375
|
+
WHERE worker_id IS NOT NULL
|
|
6376
|
+
AND worker_concurrency IS NOT NULL
|
|
6377
|
+
AND type IN ('StepStarted', 'StepHeartbeat')
|
|
6378
|
+
`);
|
|
6379
|
+
console.log("[Migration 011] Worker concurrency index added successfully");
|
|
6380
|
+
} catch (error) {
|
|
6381
|
+
console.error("[Migration 011] Error adding worker concurrency index:", error);
|
|
6382
|
+
throw error;
|
|
6383
|
+
} finally {
|
|
6384
|
+
client.release();
|
|
6385
|
+
}
|
|
6386
|
+
}
|
|
6082
6387
|
async function runMigrations(pool, schema = "cascadeflow") {
|
|
6083
6388
|
console.log(`[Migrations] Starting database migrations in schema '${schema}'...`);
|
|
6084
6389
|
try {
|
|
@@ -6091,6 +6396,9 @@ async function runMigrations(pool, schema = "cascadeflow") {
|
|
|
6091
6396
|
await migration006_addDescIndexes(pool, schema);
|
|
6092
6397
|
await migration007_addWorkerIndexes(pool, schema);
|
|
6093
6398
|
await migration008_addRunIdIndex(pool, schema);
|
|
6399
|
+
await migration009_addStepPriority(pool, schema);
|
|
6400
|
+
await migration010_addSlotTracking(pool, schema);
|
|
6401
|
+
await migration011_addWorkerConcurrencyIndex(pool, schema);
|
|
6094
6402
|
console.log("[Migrations] All migrations completed successfully");
|
|
6095
6403
|
} catch (error) {
|
|
6096
6404
|
console.error("[Migrations] Migration failed:", error);
|
|
@@ -6401,7 +6709,8 @@ class PostgresBackend extends Backend {
|
|
|
6401
6709
|
availableAtUs: metadata.availableAt,
|
|
6402
6710
|
reason: metadata.reason,
|
|
6403
6711
|
attemptNumber: metadata.attemptNumber,
|
|
6404
|
-
retryDelayMs: metadata.retryDelayMs
|
|
6712
|
+
retryDelayMs: metadata.retryDelayMs,
|
|
6713
|
+
priority: metadata.priority
|
|
6405
6714
|
};
|
|
6406
6715
|
await this.db.appendEvent("step_events", event);
|
|
6407
6716
|
}
|
|
@@ -6419,7 +6728,9 @@ class PostgresBackend extends Backend {
|
|
|
6419
6728
|
stepId,
|
|
6420
6729
|
attemptNumber,
|
|
6421
6730
|
workerId,
|
|
6422
|
-
dependencies: metadata.dependencies
|
|
6731
|
+
dependencies: metadata.dependencies,
|
|
6732
|
+
slotIndex: metadata.slotIndex,
|
|
6733
|
+
workerConcurrency: metadata.workerConcurrency
|
|
6423
6734
|
};
|
|
6424
6735
|
await this.db.appendEvent("step_events", event);
|
|
6425
6736
|
}
|
|
@@ -6538,6 +6849,7 @@ class PostgresBackend extends Backend {
|
|
|
6538
6849
|
reason: "retry",
|
|
6539
6850
|
attemptNumber: scheduleMetadata.nextAttemptNumber,
|
|
6540
6851
|
retryDelayMs: scheduleMetadata.retryDelayMs,
|
|
6852
|
+
priority: scheduleMetadata.priority,
|
|
6541
6853
|
policyIndex: scheduleMetadata.policyIndex,
|
|
6542
6854
|
attemptInPolicy: scheduleMetadata.attemptInPolicy
|
|
6543
6855
|
};
|
|
@@ -6607,7 +6919,7 @@ class PostgresBackend extends Backend {
|
|
|
6607
6919
|
};
|
|
6608
6920
|
await this.db.appendEvent("step_events", event);
|
|
6609
6921
|
}
|
|
6610
|
-
async saveStepHeartbeat(workflowSlug, runId, stepId, workerId, attemptNumber) {
|
|
6922
|
+
async saveStepHeartbeat(workflowSlug, runId, stepId, workerId, attemptNumber, slotInfo) {
|
|
6611
6923
|
const now = getMicrosecondTimestamp();
|
|
6612
6924
|
const event = {
|
|
6613
6925
|
category: "step",
|
|
@@ -6618,7 +6930,9 @@ class PostgresBackend extends Backend {
|
|
|
6618
6930
|
runId,
|
|
6619
6931
|
stepId,
|
|
6620
6932
|
workerId,
|
|
6621
|
-
attemptNumber
|
|
6933
|
+
attemptNumber,
|
|
6934
|
+
slotIndex: slotInfo?.slotIndex,
|
|
6935
|
+
workerConcurrency: slotInfo?.workerConcurrency
|
|
6622
6936
|
};
|
|
6623
6937
|
await this.db.appendEvent("step_events", event);
|
|
6624
6938
|
}
|
|
@@ -6816,39 +7130,62 @@ class PostgresBackend extends Backend {
|
|
|
6816
7130
|
return { runId, isNew: true };
|
|
6817
7131
|
}
|
|
6818
7132
|
async listRuns(options) {
|
|
6819
|
-
const filteredRuns = await this.db.listRunsFiltered({
|
|
6820
|
-
workflowSlug: options?.workflowSlug,
|
|
6821
|
-
status: options?.status,
|
|
6822
|
-
limit: options?.tags?.length ? undefined : options?.limit
|
|
6823
|
-
});
|
|
6824
|
-
let runsToLoad = filteredRuns;
|
|
6825
7133
|
if (options?.tags && options.tags.length > 0) {
|
|
6826
|
-
|
|
7134
|
+
const { runs: filteredRuns2, total: unfilteredTotal } = await this.db.listRunsFiltered({
|
|
7135
|
+
workflowSlug: options?.workflowSlug,
|
|
7136
|
+
status: options?.status
|
|
7137
|
+
});
|
|
7138
|
+
const tagFilteredRuns = filteredRuns2.filter((run) => {
|
|
6827
7139
|
const runTags = run.tags || [];
|
|
6828
7140
|
return options.tags.every((tag) => runTags.includes(tag));
|
|
6829
7141
|
});
|
|
6830
|
-
|
|
6831
|
-
|
|
7142
|
+
const total2 = tagFilteredRuns.length;
|
|
7143
|
+
const offset = options?.offset ?? 0;
|
|
7144
|
+
const limit = options?.limit;
|
|
7145
|
+
const paginatedRuns = limit ? tagFilteredRuns.slice(offset, offset + limit) : tagFilteredRuns.slice(offset);
|
|
7146
|
+
if (paginatedRuns.length === 0) {
|
|
7147
|
+
return { runs: [], total: total2 };
|
|
7148
|
+
}
|
|
7149
|
+
const eventsByRun2 = await this.db.loadWorkflowEventsForRuns(paginatedRuns.map((r) => ({ workflowSlug: r.workflowSlug, runId: r.runId })));
|
|
7150
|
+
const runs2 = [];
|
|
7151
|
+
for (const run of paginatedRuns) {
|
|
7152
|
+
const key = `${run.workflowSlug}:${run.runId}`;
|
|
7153
|
+
const events = eventsByRun2.get(key);
|
|
7154
|
+
if (!events || events.length === 0)
|
|
7155
|
+
continue;
|
|
7156
|
+
try {
|
|
7157
|
+
const state = projectRunStateFromEvents(events, run.workflowSlug);
|
|
7158
|
+
runs2.push(state);
|
|
7159
|
+
} catch {
|
|
7160
|
+
continue;
|
|
7161
|
+
}
|
|
6832
7162
|
}
|
|
7163
|
+
return { runs: runs2, total: total2 };
|
|
6833
7164
|
}
|
|
6834
|
-
|
|
6835
|
-
|
|
7165
|
+
const { runs: filteredRuns, total } = await this.db.listRunsFiltered({
|
|
7166
|
+
workflowSlug: options?.workflowSlug,
|
|
7167
|
+
status: options?.status,
|
|
7168
|
+
limit: options?.limit,
|
|
7169
|
+
offset: options?.offset
|
|
7170
|
+
});
|
|
7171
|
+
if (filteredRuns.length === 0) {
|
|
7172
|
+
return { runs: [], total };
|
|
6836
7173
|
}
|
|
6837
|
-
const eventsByRun = await this.db.loadWorkflowEventsForRuns(
|
|
6838
|
-
const
|
|
6839
|
-
for (const run of
|
|
7174
|
+
const eventsByRun = await this.db.loadWorkflowEventsForRuns(filteredRuns.map((r) => ({ workflowSlug: r.workflowSlug, runId: r.runId })));
|
|
7175
|
+
const runs = [];
|
|
7176
|
+
for (const run of filteredRuns) {
|
|
6840
7177
|
const key = `${run.workflowSlug}:${run.runId}`;
|
|
6841
7178
|
const events = eventsByRun.get(key);
|
|
6842
7179
|
if (!events || events.length === 0)
|
|
6843
7180
|
continue;
|
|
6844
7181
|
try {
|
|
6845
7182
|
const state = projectRunStateFromEvents(events, run.workflowSlug);
|
|
6846
|
-
|
|
7183
|
+
runs.push(state);
|
|
6847
7184
|
} catch {
|
|
6848
7185
|
continue;
|
|
6849
7186
|
}
|
|
6850
7187
|
}
|
|
6851
|
-
return
|
|
7188
|
+
return { runs, total };
|
|
6852
7189
|
}
|
|
6853
7190
|
async cancelRun(runId, reason) {
|
|
6854
7191
|
const workflowSlug = await this.db.getRunWorkflowSlug(runId);
|
|
@@ -6948,7 +7285,9 @@ class PostgresBackend extends Backend {
|
|
|
6948
7285
|
stepId,
|
|
6949
7286
|
workerId,
|
|
6950
7287
|
dependencies: metadata.dependencies,
|
|
6951
|
-
attemptNumber
|
|
7288
|
+
attemptNumber,
|
|
7289
|
+
slotIndex: metadata.slotIndex,
|
|
7290
|
+
workerConcurrency: metadata.workerConcurrency
|
|
6952
7291
|
};
|
|
6953
7292
|
const claimed = await this.db.claimScheduledStep(workflowSlug, runId, stepId, workerId, event);
|
|
6954
7293
|
return claimed ? { attemptNumber } : null;
|
|
@@ -7392,9 +7731,32 @@ class PostgresBackend extends Backend {
|
|
|
7392
7731
|
successRate
|
|
7393
7732
|
};
|
|
7394
7733
|
}
|
|
7734
|
+
async getActiveWorkers(options) {
|
|
7735
|
+
const staleThresholdUs = options?.staleThresholdUs ?? 30 * 1000 * 1000;
|
|
7736
|
+
const result = await this.db.getActiveWorkersAggregation({
|
|
7737
|
+
staleThresholdUs,
|
|
7738
|
+
includeInactive: options?.includeInactive,
|
|
7739
|
+
timeRange: options?.timeRange
|
|
7740
|
+
});
|
|
7741
|
+
return {
|
|
7742
|
+
workers: result.workers.map((w) => ({
|
|
7743
|
+
workerId: w.workerId,
|
|
7744
|
+
lastSeenUs: w.lastSeenUs,
|
|
7745
|
+
currentlyRunningSteps: w.activeSteps.length,
|
|
7746
|
+
totalStepsProcessed: w.totalStepsProcessed,
|
|
7747
|
+
failedSteps: w.failedSteps,
|
|
7748
|
+
reclaimedFromCount: w.reclaimedFromCount,
|
|
7749
|
+
workerConcurrency: w.workerConcurrency,
|
|
7750
|
+
activeSteps: w.activeSteps
|
|
7751
|
+
})),
|
|
7752
|
+
totalActiveWorkers: result.totalActiveWorkers,
|
|
7753
|
+
totalRunningSteps: result.totalRunningSteps,
|
|
7754
|
+
staleThresholdUs
|
|
7755
|
+
};
|
|
7756
|
+
}
|
|
7395
7757
|
}
|
|
7396
7758
|
export {
|
|
7397
7759
|
PostgresBackend
|
|
7398
7760
|
};
|
|
7399
7761
|
|
|
7400
|
-
//# debugId=
|
|
7762
|
+
//# debugId=9C62728A85A6463664756E2164756E21
|