@cascade-flow/backend-postgres 0.2.16 → 0.2.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/db.d.ts +30 -0
- package/dist/db.d.ts.map +1 -1
- package/dist/index.d.ts +15 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +348 -16
- package/dist/index.js.map +5 -5
- package/dist/migrations.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -4849,6 +4849,9 @@ class DatabaseClient {
|
|
|
4849
4849
|
let attemptNumber = null;
|
|
4850
4850
|
let availableAtUs = null;
|
|
4851
4851
|
let exportOutput = null;
|
|
4852
|
+
let priority = null;
|
|
4853
|
+
let slotIndex = null;
|
|
4854
|
+
let workerConcurrency = null;
|
|
4852
4855
|
let errorNameHash = "";
|
|
4853
4856
|
let errorMessageHash = "";
|
|
4854
4857
|
let errorStackExactHash = "";
|
|
@@ -4856,12 +4859,15 @@ class DatabaseClient {
|
|
|
4856
4859
|
let errorStackPortableHash = "";
|
|
4857
4860
|
if (se.type === "StepStarted" || se.type === "StepHeartbeat") {
|
|
4858
4861
|
workerId = se.workerId;
|
|
4862
|
+
slotIndex = se.slotIndex ?? null;
|
|
4863
|
+
workerConcurrency = se.workerConcurrency ?? null;
|
|
4859
4864
|
}
|
|
4860
4865
|
if ("attemptNumber" in se) {
|
|
4861
4866
|
attemptNumber = se.attemptNumber;
|
|
4862
4867
|
}
|
|
4863
4868
|
if (se.type === "StepScheduled") {
|
|
4864
4869
|
availableAtUs = se.availableAtUs;
|
|
4870
|
+
priority = se.priority ?? null;
|
|
4865
4871
|
}
|
|
4866
4872
|
if (se.type === "StepCompleted") {
|
|
4867
4873
|
exportOutput = se.exportOutput;
|
|
@@ -4884,9 +4890,10 @@ class DatabaseClient {
|
|
|
4884
4890
|
event_id, workflow_slug, run_id, step_id, timestamp_us, category, type, event_data,
|
|
4885
4891
|
worker_id, attempt_number, available_at_us, export_output,
|
|
4886
4892
|
error_name_hash, error_message_hash, error_stack_exact_hash,
|
|
4887
|
-
error_stack_normalized_hash, error_stack_portable_hash, version_id
|
|
4893
|
+
error_stack_normalized_hash, error_stack_portable_hash, version_id, priority,
|
|
4894
|
+
slot_index, worker_concurrency
|
|
4888
4895
|
)
|
|
4889
|
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)`, [
|
|
4896
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21)`, [
|
|
4890
4897
|
se.eventId,
|
|
4891
4898
|
se.workflowSlug,
|
|
4892
4899
|
se.runId,
|
|
@@ -4904,7 +4911,10 @@ class DatabaseClient {
|
|
|
4904
4911
|
errorStackExactHash,
|
|
4905
4912
|
errorStackNormalizedHash,
|
|
4906
4913
|
errorStackPortableHash,
|
|
4907
|
-
versionId
|
|
4914
|
+
versionId,
|
|
4915
|
+
priority,
|
|
4916
|
+
slotIndex,
|
|
4917
|
+
workerConcurrency
|
|
4908
4918
|
]);
|
|
4909
4919
|
}
|
|
4910
4920
|
} finally {
|
|
@@ -4966,6 +4976,9 @@ class DatabaseClient {
|
|
|
4966
4976
|
let attemptNumber = null;
|
|
4967
4977
|
let availableAtUs = null;
|
|
4968
4978
|
let exportOutput = null;
|
|
4979
|
+
let priority = null;
|
|
4980
|
+
let slotIndex = null;
|
|
4981
|
+
let workerConcurrency = null;
|
|
4969
4982
|
let errorNameHash = "";
|
|
4970
4983
|
let errorMessageHash = "";
|
|
4971
4984
|
let errorStackExactHash = "";
|
|
@@ -4973,12 +4986,15 @@ class DatabaseClient {
|
|
|
4973
4986
|
let errorStackPortableHash = "";
|
|
4974
4987
|
if (se.type === "StepStarted" || se.type === "StepHeartbeat") {
|
|
4975
4988
|
workerId = se.workerId;
|
|
4989
|
+
slotIndex = se.slotIndex ?? null;
|
|
4990
|
+
workerConcurrency = se.workerConcurrency ?? null;
|
|
4976
4991
|
}
|
|
4977
4992
|
if ("attemptNumber" in se) {
|
|
4978
4993
|
attemptNumber = se.attemptNumber;
|
|
4979
4994
|
}
|
|
4980
4995
|
if (se.type === "StepScheduled") {
|
|
4981
4996
|
availableAtUs = se.availableAtUs;
|
|
4997
|
+
priority = se.priority ?? null;
|
|
4982
4998
|
}
|
|
4983
4999
|
if (se.type === "StepCompleted") {
|
|
4984
5000
|
exportOutput = se.exportOutput;
|
|
@@ -5001,9 +5017,10 @@ class DatabaseClient {
|
|
|
5001
5017
|
event_id, workflow_slug, run_id, step_id, timestamp_us, category, type, event_data,
|
|
5002
5018
|
worker_id, attempt_number, available_at_us, export_output,
|
|
5003
5019
|
error_name_hash, error_message_hash, error_stack_exact_hash,
|
|
5004
|
-
error_stack_normalized_hash, error_stack_portable_hash, version_id
|
|
5020
|
+
error_stack_normalized_hash, error_stack_portable_hash, version_id, priority,
|
|
5021
|
+
slot_index, worker_concurrency
|
|
5005
5022
|
)
|
|
5006
|
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)`, [
|
|
5023
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21)`, [
|
|
5007
5024
|
se.eventId,
|
|
5008
5025
|
se.workflowSlug,
|
|
5009
5026
|
se.runId,
|
|
@@ -5021,7 +5038,10 @@ class DatabaseClient {
|
|
|
5021
5038
|
errorStackExactHash,
|
|
5022
5039
|
errorStackNormalizedHash,
|
|
5023
5040
|
errorStackPortableHash,
|
|
5024
|
-
versionId
|
|
5041
|
+
versionId,
|
|
5042
|
+
priority,
|
|
5043
|
+
slotIndex,
|
|
5044
|
+
workerConcurrency
|
|
5025
5045
|
]);
|
|
5026
5046
|
}
|
|
5027
5047
|
}
|
|
@@ -5128,6 +5148,8 @@ class DatabaseClient {
|
|
|
5128
5148
|
}
|
|
5129
5149
|
let workerId2 = null;
|
|
5130
5150
|
let attemptNumber = null;
|
|
5151
|
+
let slotIndex = null;
|
|
5152
|
+
let workerConcurrency = null;
|
|
5131
5153
|
let errorNameHash = "";
|
|
5132
5154
|
let errorMessageHash = "";
|
|
5133
5155
|
let errorStackExactHash = "";
|
|
@@ -5136,6 +5158,8 @@ class DatabaseClient {
|
|
|
5136
5158
|
if (eventToWrite.type === "StepStarted") {
|
|
5137
5159
|
workerId2 = eventToWrite.workerId;
|
|
5138
5160
|
attemptNumber = eventToWrite.attemptNumber;
|
|
5161
|
+
slotIndex = eventToWrite.slotIndex ?? null;
|
|
5162
|
+
workerConcurrency = eventToWrite.workerConcurrency ?? null;
|
|
5139
5163
|
}
|
|
5140
5164
|
if (eventToWrite.type === "StepFailed") {
|
|
5141
5165
|
errorNameHash = eventToWrite.errorFingerprints.nameHash;
|
|
@@ -5155,9 +5179,10 @@ class DatabaseClient {
|
|
|
5155
5179
|
event_id, workflow_slug, run_id, step_id, timestamp_us, category, type, event_data,
|
|
5156
5180
|
worker_id, attempt_number, available_at_us, export_output,
|
|
5157
5181
|
error_name_hash, error_message_hash, error_stack_exact_hash,
|
|
5158
|
-
error_stack_normalized_hash, error_stack_portable_hash, version_id
|
|
5182
|
+
error_stack_normalized_hash, error_stack_portable_hash, version_id,
|
|
5183
|
+
slot_index, worker_concurrency
|
|
5159
5184
|
)
|
|
5160
|
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)`, [
|
|
5185
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20)`, [
|
|
5161
5186
|
eventToWrite.eventId,
|
|
5162
5187
|
eventToWrite.workflowSlug,
|
|
5163
5188
|
eventToWrite.runId,
|
|
@@ -5175,7 +5200,9 @@ class DatabaseClient {
|
|
|
5175
5200
|
errorStackExactHash,
|
|
5176
5201
|
errorStackNormalizedHash,
|
|
5177
5202
|
errorStackPortableHash,
|
|
5178
|
-
versionId
|
|
5203
|
+
versionId,
|
|
5204
|
+
slotIndex,
|
|
5205
|
+
workerConcurrency
|
|
5179
5206
|
]);
|
|
5180
5207
|
await client.query("COMMIT");
|
|
5181
5208
|
return true;
|
|
@@ -5194,7 +5221,7 @@ class DatabaseClient {
|
|
|
5194
5221
|
let query = `
|
|
5195
5222
|
WITH latest_step_events AS (
|
|
5196
5223
|
SELECT DISTINCT ON (workflow_slug, run_id, step_id)
|
|
5197
|
-
workflow_slug, run_id, step_id, type, available_at_us
|
|
5224
|
+
workflow_slug, run_id, step_id, type, available_at_us, priority
|
|
5198
5225
|
FROM ${this.schema}.step_events
|
|
5199
5226
|
${options?.workflowSlugs ? "WHERE workflow_slug = ANY($1)" : ""}
|
|
5200
5227
|
ORDER BY workflow_slug, run_id, step_id, timestamp_us DESC, event_id DESC
|
|
@@ -5203,6 +5230,7 @@ class DatabaseClient {
|
|
|
5203
5230
|
FROM latest_step_events
|
|
5204
5231
|
WHERE type = ANY($${options?.workflowSlugs ? "2" : "1"})
|
|
5205
5232
|
AND (available_at_us IS NULL OR available_at_us <= $${options?.workflowSlugs ? "3" : "2"})
|
|
5233
|
+
ORDER BY priority DESC NULLS LAST, available_at_us ASC
|
|
5206
5234
|
${options?.limit ? `LIMIT $${options?.workflowSlugs ? "4" : "3"}` : ""}
|
|
5207
5235
|
`;
|
|
5208
5236
|
const params = [];
|
|
@@ -5739,6 +5767,222 @@ class DatabaseClient {
|
|
|
5739
5767
|
client.release();
|
|
5740
5768
|
}
|
|
5741
5769
|
}
|
|
5770
|
+
async getActiveWorkersAggregation(options) {
|
|
5771
|
+
const client = await this.pool.connect();
|
|
5772
|
+
try {
|
|
5773
|
+
const nowUs = Date.now() * 1000;
|
|
5774
|
+
const staleThresholdUs = options?.staleThresholdUs ?? 30 * 1000 * 1000;
|
|
5775
|
+
const includeInactive = options?.includeInactive ?? false;
|
|
5776
|
+
const staleTimestamp = nowUs - staleThresholdUs;
|
|
5777
|
+
const workerStatsResult = await client.query(`
|
|
5778
|
+
WITH worker_activity AS (
|
|
5779
|
+
-- All worker activity from StepStarted and StepHeartbeat events
|
|
5780
|
+
SELECT
|
|
5781
|
+
worker_id,
|
|
5782
|
+
MAX(timestamp_us) AS last_seen_us
|
|
5783
|
+
FROM ${this.schema}.step_events
|
|
5784
|
+
WHERE worker_id IS NOT NULL
|
|
5785
|
+
AND type IN ('StepStarted', 'StepHeartbeat')
|
|
5786
|
+
${options?.timeRange ? `AND timestamp_us >= $1 AND timestamp_us <= $2` : ""}
|
|
5787
|
+
GROUP BY worker_id
|
|
5788
|
+
),
|
|
5789
|
+
-- Steps started by each worker
|
|
5790
|
+
steps_started AS (
|
|
5791
|
+
SELECT DISTINCT
|
|
5792
|
+
se.worker_id,
|
|
5793
|
+
se.workflow_slug,
|
|
5794
|
+
se.run_id,
|
|
5795
|
+
se.step_id,
|
|
5796
|
+
se.attempt_number
|
|
5797
|
+
FROM ${this.schema}.step_events se
|
|
5798
|
+
WHERE se.type = 'StepStarted'
|
|
5799
|
+
AND se.worker_id IS NOT NULL
|
|
5800
|
+
),
|
|
5801
|
+
-- Steps completed
|
|
5802
|
+
completed_steps AS (
|
|
5803
|
+
SELECT
|
|
5804
|
+
ss.worker_id,
|
|
5805
|
+
COUNT(*) AS completed_count
|
|
5806
|
+
FROM steps_started ss
|
|
5807
|
+
INNER JOIN ${this.schema}.step_events se
|
|
5808
|
+
ON se.workflow_slug = ss.workflow_slug
|
|
5809
|
+
AND se.run_id = ss.run_id
|
|
5810
|
+
AND se.step_id = ss.step_id
|
|
5811
|
+
AND se.attempt_number = ss.attempt_number
|
|
5812
|
+
AND se.type = 'StepCompleted'
|
|
5813
|
+
GROUP BY ss.worker_id
|
|
5814
|
+
),
|
|
5815
|
+
-- Steps failed
|
|
5816
|
+
failed_steps AS (
|
|
5817
|
+
SELECT
|
|
5818
|
+
ss.worker_id,
|
|
5819
|
+
COUNT(*) AS failed_count
|
|
5820
|
+
FROM steps_started ss
|
|
5821
|
+
INNER JOIN ${this.schema}.step_events se
|
|
5822
|
+
ON se.workflow_slug = ss.workflow_slug
|
|
5823
|
+
AND se.run_id = ss.run_id
|
|
5824
|
+
AND se.step_id = ss.step_id
|
|
5825
|
+
AND se.attempt_number = ss.attempt_number
|
|
5826
|
+
AND se.type = 'StepFailed'
|
|
5827
|
+
GROUP BY ss.worker_id
|
|
5828
|
+
),
|
|
5829
|
+
-- Reclamation counts (times this worker's steps were reclaimed)
|
|
5830
|
+
reclaimed_counts AS (
|
|
5831
|
+
SELECT
|
|
5832
|
+
(event_data->>'originalWorkerId') AS worker_id,
|
|
5833
|
+
COUNT(*) AS reclaimed_count
|
|
5834
|
+
FROM ${this.schema}.step_events
|
|
5835
|
+
WHERE type = 'StepReclaimed'
|
|
5836
|
+
GROUP BY (event_data->>'originalWorkerId')
|
|
5837
|
+
)
|
|
5838
|
+
SELECT
|
|
5839
|
+
wa.worker_id,
|
|
5840
|
+
wa.last_seen_us,
|
|
5841
|
+
COALESCE(cs.completed_count, 0) AS total_steps_processed,
|
|
5842
|
+
COALESCE(fs.failed_count, 0) AS failed_steps,
|
|
5843
|
+
COALESCE(rc.reclaimed_count, 0) AS reclaimed_from_count
|
|
5844
|
+
FROM worker_activity wa
|
|
5845
|
+
LEFT JOIN completed_steps cs ON wa.worker_id = cs.worker_id
|
|
5846
|
+
LEFT JOIN failed_steps fs ON wa.worker_id = fs.worker_id
|
|
5847
|
+
LEFT JOIN reclaimed_counts rc ON wa.worker_id = rc.worker_id
|
|
5848
|
+
${!includeInactive ? `WHERE wa.last_seen_us >= ${staleTimestamp}` : ""}
|
|
5849
|
+
ORDER BY wa.last_seen_us DESC
|
|
5850
|
+
`, options?.timeRange ? [options.timeRange.startUs, options.timeRange.endUs] : []);
|
|
5851
|
+
const runningStepsResult = await client.query(`
|
|
5852
|
+
WITH latest_step_events AS (
|
|
5853
|
+
-- Get the latest event per step (excluding LogEntry)
|
|
5854
|
+
SELECT DISTINCT ON (workflow_slug, run_id, step_id)
|
|
5855
|
+
workflow_slug,
|
|
5856
|
+
run_id,
|
|
5857
|
+
step_id,
|
|
5858
|
+
type,
|
|
5859
|
+
worker_id,
|
|
5860
|
+
timestamp_us,
|
|
5861
|
+
slot_index,
|
|
5862
|
+
worker_concurrency
|
|
5863
|
+
FROM ${this.schema}.step_events
|
|
5864
|
+
WHERE type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
|
|
5865
|
+
ORDER BY workflow_slug, run_id, step_id, timestamp_us DESC, event_id DESC
|
|
5866
|
+
),
|
|
5867
|
+
-- Steps currently running (latest event is StepStarted or StepHeartbeat)
|
|
5868
|
+
running_steps AS (
|
|
5869
|
+
SELECT
|
|
5870
|
+
workflow_slug,
|
|
5871
|
+
run_id,
|
|
5872
|
+
step_id,
|
|
5873
|
+
worker_id,
|
|
5874
|
+
slot_index,
|
|
5875
|
+
worker_concurrency
|
|
5876
|
+
FROM latest_step_events
|
|
5877
|
+
WHERE type IN ('StepStarted', 'StepHeartbeat')
|
|
5878
|
+
AND worker_id IS NOT NULL
|
|
5879
|
+
),
|
|
5880
|
+
-- Get start time and last heartbeat for each running step
|
|
5881
|
+
step_times AS (
|
|
5882
|
+
SELECT
|
|
5883
|
+
rs.workflow_slug,
|
|
5884
|
+
rs.run_id,
|
|
5885
|
+
rs.step_id,
|
|
5886
|
+
rs.worker_id,
|
|
5887
|
+
rs.slot_index,
|
|
5888
|
+
rs.worker_concurrency,
|
|
5889
|
+
MIN(se.timestamp_us) FILTER (WHERE se.type = 'StepStarted') AS started_at_us,
|
|
5890
|
+
MAX(se.timestamp_us) FILTER (WHERE se.type IN ('StepStarted', 'StepHeartbeat')) AS last_heartbeat_us
|
|
5891
|
+
FROM running_steps rs
|
|
5892
|
+
INNER JOIN ${this.schema}.step_events se
|
|
5893
|
+
ON se.workflow_slug = rs.workflow_slug
|
|
5894
|
+
AND se.run_id = rs.run_id
|
|
5895
|
+
AND se.step_id = rs.step_id
|
|
5896
|
+
AND se.type IN ('StepStarted', 'StepHeartbeat')
|
|
5897
|
+
GROUP BY rs.workflow_slug, rs.run_id, rs.step_id, rs.worker_id, rs.slot_index, rs.worker_concurrency
|
|
5898
|
+
)
|
|
5899
|
+
SELECT
|
|
5900
|
+
worker_id,
|
|
5901
|
+
workflow_slug,
|
|
5902
|
+
run_id,
|
|
5903
|
+
step_id,
|
|
5904
|
+
started_at_us,
|
|
5905
|
+
last_heartbeat_us,
|
|
5906
|
+
slot_index,
|
|
5907
|
+
worker_concurrency
|
|
5908
|
+
FROM step_times
|
|
5909
|
+
ORDER BY worker_id, last_heartbeat_us DESC
|
|
5910
|
+
`);
|
|
5911
|
+
const workerConcurrencyResult = await client.query(`
|
|
5912
|
+
SELECT DISTINCT ON (worker_id)
|
|
5913
|
+
worker_id,
|
|
5914
|
+
worker_concurrency
|
|
5915
|
+
FROM ${this.schema}.step_events
|
|
5916
|
+
WHERE worker_id IS NOT NULL
|
|
5917
|
+
AND worker_concurrency IS NOT NULL
|
|
5918
|
+
AND type IN ('StepStarted', 'StepHeartbeat')
|
|
5919
|
+
ORDER BY worker_id, timestamp_us DESC
|
|
5920
|
+
`);
|
|
5921
|
+
const workerConcurrencyMap = new Map;
|
|
5922
|
+
for (const row of workerConcurrencyResult.rows) {
|
|
5923
|
+
workerConcurrencyMap.set(row.worker_id, row.worker_concurrency);
|
|
5924
|
+
}
|
|
5925
|
+
const workerMap = new Map;
|
|
5926
|
+
for (const row of workerStatsResult.rows) {
|
|
5927
|
+
workerMap.set(row.worker_id, {
|
|
5928
|
+
workerId: row.worker_id,
|
|
5929
|
+
lastSeenUs: parseInt(row.last_seen_us, 10),
|
|
5930
|
+
totalStepsProcessed: parseInt(row.total_steps_processed, 10),
|
|
5931
|
+
failedSteps: parseInt(row.failed_steps, 10),
|
|
5932
|
+
reclaimedFromCount: parseInt(row.reclaimed_from_count, 10),
|
|
5933
|
+
activeSteps: []
|
|
5934
|
+
});
|
|
5935
|
+
}
|
|
5936
|
+
for (const row of runningStepsResult.rows) {
|
|
5937
|
+
let worker = workerMap.get(row.worker_id);
|
|
5938
|
+
if (!worker) {
|
|
5939
|
+
worker = {
|
|
5940
|
+
workerId: row.worker_id,
|
|
5941
|
+
lastSeenUs: parseInt(row.last_heartbeat_us, 10),
|
|
5942
|
+
totalStepsProcessed: 0,
|
|
5943
|
+
failedSteps: 0,
|
|
5944
|
+
reclaimedFromCount: 0,
|
|
5945
|
+
activeSteps: []
|
|
5946
|
+
};
|
|
5947
|
+
workerMap.set(row.worker_id, worker);
|
|
5948
|
+
}
|
|
5949
|
+
if (row.worker_concurrency != null && worker.workerConcurrency == null) {
|
|
5950
|
+
worker.workerConcurrency = row.worker_concurrency;
|
|
5951
|
+
}
|
|
5952
|
+
worker.activeSteps.push({
|
|
5953
|
+
workflowSlug: row.workflow_slug,
|
|
5954
|
+
runId: row.run_id,
|
|
5955
|
+
stepId: row.step_id,
|
|
5956
|
+
startedAtUs: parseInt(row.started_at_us, 10),
|
|
5957
|
+
lastHeartbeatUs: parseInt(row.last_heartbeat_us, 10),
|
|
5958
|
+
slotIndex: row.slot_index ?? undefined
|
|
5959
|
+
});
|
|
5960
|
+
}
|
|
5961
|
+
for (const worker of workerMap.values()) {
|
|
5962
|
+
if (worker.workerConcurrency == null) {
|
|
5963
|
+
const historicalConcurrency = workerConcurrencyMap.get(worker.workerId);
|
|
5964
|
+
if (historicalConcurrency != null) {
|
|
5965
|
+
worker.workerConcurrency = historicalConcurrency;
|
|
5966
|
+
}
|
|
5967
|
+
}
|
|
5968
|
+
}
|
|
5969
|
+
const workers = Array.from(workerMap.values()).sort((a, b) => {
|
|
5970
|
+
if (b.activeSteps.length !== a.activeSteps.length) {
|
|
5971
|
+
return b.activeSteps.length - a.activeSteps.length;
|
|
5972
|
+
}
|
|
5973
|
+
return b.lastSeenUs - a.lastSeenUs;
|
|
5974
|
+
});
|
|
5975
|
+
const totalRunningSteps = workers.reduce((sum, w) => sum + w.activeSteps.length, 0);
|
|
5976
|
+
const totalActiveWorkers = workers.filter((w) => w.activeSteps.length > 0).length;
|
|
5977
|
+
return {
|
|
5978
|
+
workers,
|
|
5979
|
+
totalActiveWorkers,
|
|
5980
|
+
totalRunningSteps
|
|
5981
|
+
};
|
|
5982
|
+
} finally {
|
|
5983
|
+
client.release();
|
|
5984
|
+
}
|
|
5985
|
+
}
|
|
5742
5986
|
}
|
|
5743
5987
|
function createPool(connectionString) {
|
|
5744
5988
|
return new Pool2({ connectionString });
|
|
@@ -6086,6 +6330,60 @@ async function migration008_addRunIdIndex(pool, schema) {
|
|
|
6086
6330
|
client.release();
|
|
6087
6331
|
}
|
|
6088
6332
|
}
|
|
6333
|
+
async function migration009_addStepPriority(pool, schema) {
|
|
6334
|
+
const client = await pool.connect();
|
|
6335
|
+
try {
|
|
6336
|
+
await client.query(`
|
|
6337
|
+
ALTER TABLE ${schema}.step_events
|
|
6338
|
+
ADD COLUMN IF NOT EXISTS priority INTEGER
|
|
6339
|
+
`);
|
|
6340
|
+
await client.query(`
|
|
6341
|
+
CREATE INDEX IF NOT EXISTS idx_step_events_priority_queue
|
|
6342
|
+
ON ${schema}.step_events (priority DESC NULLS LAST, available_at_us ASC, timestamp_us ASC)
|
|
6343
|
+
WHERE type IN ('StepScheduled', 'StepReclaimed', 'StepRetrying')
|
|
6344
|
+
`);
|
|
6345
|
+
console.log("[Migration 009] Step priority column and index added successfully");
|
|
6346
|
+
} catch (error) {
|
|
6347
|
+
console.error("[Migration 009] Error adding step priority:", error);
|
|
6348
|
+
throw error;
|
|
6349
|
+
} finally {
|
|
6350
|
+
client.release();
|
|
6351
|
+
}
|
|
6352
|
+
}
|
|
6353
|
+
async function migration010_addSlotTracking(pool, schema) {
|
|
6354
|
+
const client = await pool.connect();
|
|
6355
|
+
try {
|
|
6356
|
+
await client.query(`
|
|
6357
|
+
ALTER TABLE ${schema}.step_events
|
|
6358
|
+
ADD COLUMN IF NOT EXISTS slot_index INTEGER,
|
|
6359
|
+
ADD COLUMN IF NOT EXISTS worker_concurrency INTEGER
|
|
6360
|
+
`);
|
|
6361
|
+
console.log("[Migration 010] Slot tracking columns added successfully");
|
|
6362
|
+
} catch (error) {
|
|
6363
|
+
console.error("[Migration 010] Error adding slot tracking columns:", error);
|
|
6364
|
+
throw error;
|
|
6365
|
+
} finally {
|
|
6366
|
+
client.release();
|
|
6367
|
+
}
|
|
6368
|
+
}
|
|
6369
|
+
async function migration011_addWorkerConcurrencyIndex(pool, schema) {
|
|
6370
|
+
const client = await pool.connect();
|
|
6371
|
+
try {
|
|
6372
|
+
await client.query(`
|
|
6373
|
+
CREATE INDEX IF NOT EXISTS idx_step_events_worker_concurrency
|
|
6374
|
+
ON ${schema}.step_events (worker_id, timestamp_us DESC)
|
|
6375
|
+
WHERE worker_id IS NOT NULL
|
|
6376
|
+
AND worker_concurrency IS NOT NULL
|
|
6377
|
+
AND type IN ('StepStarted', 'StepHeartbeat')
|
|
6378
|
+
`);
|
|
6379
|
+
console.log("[Migration 011] Worker concurrency index added successfully");
|
|
6380
|
+
} catch (error) {
|
|
6381
|
+
console.error("[Migration 011] Error adding worker concurrency index:", error);
|
|
6382
|
+
throw error;
|
|
6383
|
+
} finally {
|
|
6384
|
+
client.release();
|
|
6385
|
+
}
|
|
6386
|
+
}
|
|
6089
6387
|
async function runMigrations(pool, schema = "cascadeflow") {
|
|
6090
6388
|
console.log(`[Migrations] Starting database migrations in schema '${schema}'...`);
|
|
6091
6389
|
try {
|
|
@@ -6098,6 +6396,9 @@ async function runMigrations(pool, schema = "cascadeflow") {
|
|
|
6098
6396
|
await migration006_addDescIndexes(pool, schema);
|
|
6099
6397
|
await migration007_addWorkerIndexes(pool, schema);
|
|
6100
6398
|
await migration008_addRunIdIndex(pool, schema);
|
|
6399
|
+
await migration009_addStepPriority(pool, schema);
|
|
6400
|
+
await migration010_addSlotTracking(pool, schema);
|
|
6401
|
+
await migration011_addWorkerConcurrencyIndex(pool, schema);
|
|
6101
6402
|
console.log("[Migrations] All migrations completed successfully");
|
|
6102
6403
|
} catch (error) {
|
|
6103
6404
|
console.error("[Migrations] Migration failed:", error);
|
|
@@ -6408,7 +6709,8 @@ class PostgresBackend extends Backend {
|
|
|
6408
6709
|
availableAtUs: metadata.availableAt,
|
|
6409
6710
|
reason: metadata.reason,
|
|
6410
6711
|
attemptNumber: metadata.attemptNumber,
|
|
6411
|
-
retryDelayMs: metadata.retryDelayMs
|
|
6712
|
+
retryDelayMs: metadata.retryDelayMs,
|
|
6713
|
+
priority: metadata.priority
|
|
6412
6714
|
};
|
|
6413
6715
|
await this.db.appendEvent("step_events", event);
|
|
6414
6716
|
}
|
|
@@ -6426,7 +6728,9 @@ class PostgresBackend extends Backend {
|
|
|
6426
6728
|
stepId,
|
|
6427
6729
|
attemptNumber,
|
|
6428
6730
|
workerId,
|
|
6429
|
-
dependencies: metadata.dependencies
|
|
6731
|
+
dependencies: metadata.dependencies,
|
|
6732
|
+
slotIndex: metadata.slotIndex,
|
|
6733
|
+
workerConcurrency: metadata.workerConcurrency
|
|
6430
6734
|
};
|
|
6431
6735
|
await this.db.appendEvent("step_events", event);
|
|
6432
6736
|
}
|
|
@@ -6545,6 +6849,7 @@ class PostgresBackend extends Backend {
|
|
|
6545
6849
|
reason: "retry",
|
|
6546
6850
|
attemptNumber: scheduleMetadata.nextAttemptNumber,
|
|
6547
6851
|
retryDelayMs: scheduleMetadata.retryDelayMs,
|
|
6852
|
+
priority: scheduleMetadata.priority,
|
|
6548
6853
|
policyIndex: scheduleMetadata.policyIndex,
|
|
6549
6854
|
attemptInPolicy: scheduleMetadata.attemptInPolicy
|
|
6550
6855
|
};
|
|
@@ -6614,7 +6919,7 @@ class PostgresBackend extends Backend {
|
|
|
6614
6919
|
};
|
|
6615
6920
|
await this.db.appendEvent("step_events", event);
|
|
6616
6921
|
}
|
|
6617
|
-
async saveStepHeartbeat(workflowSlug, runId, stepId, workerId, attemptNumber) {
|
|
6922
|
+
async saveStepHeartbeat(workflowSlug, runId, stepId, workerId, attemptNumber, slotInfo) {
|
|
6618
6923
|
const now = getMicrosecondTimestamp();
|
|
6619
6924
|
const event = {
|
|
6620
6925
|
category: "step",
|
|
@@ -6625,7 +6930,9 @@ class PostgresBackend extends Backend {
|
|
|
6625
6930
|
runId,
|
|
6626
6931
|
stepId,
|
|
6627
6932
|
workerId,
|
|
6628
|
-
attemptNumber
|
|
6933
|
+
attemptNumber,
|
|
6934
|
+
slotIndex: slotInfo?.slotIndex,
|
|
6935
|
+
workerConcurrency: slotInfo?.workerConcurrency
|
|
6629
6936
|
};
|
|
6630
6937
|
await this.db.appendEvent("step_events", event);
|
|
6631
6938
|
}
|
|
@@ -6978,7 +7285,9 @@ class PostgresBackend extends Backend {
|
|
|
6978
7285
|
stepId,
|
|
6979
7286
|
workerId,
|
|
6980
7287
|
dependencies: metadata.dependencies,
|
|
6981
|
-
attemptNumber
|
|
7288
|
+
attemptNumber,
|
|
7289
|
+
slotIndex: metadata.slotIndex,
|
|
7290
|
+
workerConcurrency: metadata.workerConcurrency
|
|
6982
7291
|
};
|
|
6983
7292
|
const claimed = await this.db.claimScheduledStep(workflowSlug, runId, stepId, workerId, event);
|
|
6984
7293
|
return claimed ? { attemptNumber } : null;
|
|
@@ -7422,9 +7731,32 @@ class PostgresBackend extends Backend {
|
|
|
7422
7731
|
successRate
|
|
7423
7732
|
};
|
|
7424
7733
|
}
|
|
7734
|
+
async getActiveWorkers(options) {
|
|
7735
|
+
const staleThresholdUs = options?.staleThresholdUs ?? 30 * 1000 * 1000;
|
|
7736
|
+
const result = await this.db.getActiveWorkersAggregation({
|
|
7737
|
+
staleThresholdUs,
|
|
7738
|
+
includeInactive: options?.includeInactive,
|
|
7739
|
+
timeRange: options?.timeRange
|
|
7740
|
+
});
|
|
7741
|
+
return {
|
|
7742
|
+
workers: result.workers.map((w) => ({
|
|
7743
|
+
workerId: w.workerId,
|
|
7744
|
+
lastSeenUs: w.lastSeenUs,
|
|
7745
|
+
currentlyRunningSteps: w.activeSteps.length,
|
|
7746
|
+
totalStepsProcessed: w.totalStepsProcessed,
|
|
7747
|
+
failedSteps: w.failedSteps,
|
|
7748
|
+
reclaimedFromCount: w.reclaimedFromCount,
|
|
7749
|
+
workerConcurrency: w.workerConcurrency,
|
|
7750
|
+
activeSteps: w.activeSteps
|
|
7751
|
+
})),
|
|
7752
|
+
totalActiveWorkers: result.totalActiveWorkers,
|
|
7753
|
+
totalRunningSteps: result.totalRunningSteps,
|
|
7754
|
+
staleThresholdUs
|
|
7755
|
+
};
|
|
7756
|
+
}
|
|
7425
7757
|
}
|
|
7426
7758
|
export {
|
|
7427
7759
|
PostgresBackend
|
|
7428
7760
|
};
|
|
7429
7761
|
|
|
7430
|
-
//# debugId=
|
|
7762
|
+
//# debugId=9C62728A85A6463664756E2164756E21
|