@cascade-flow/backend-postgres 0.2.17 → 0.2.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -5125,13 +5125,15 @@ class DatabaseClient {
5125
5125
  client.release();
5126
5126
  }
5127
5127
  }
5128
- async claimScheduledStep(workflowSlug, runId, stepId, workerId, eventToWrite) {
5128
+ async claimScheduledStep(workflowSlug, runId, stepId, eventToWrite) {
5129
5129
  const client = await this.pool.connect();
5130
5130
  try {
5131
5131
  await client.query("BEGIN");
5132
+ const currentTimeUs = Date.now() * 1000;
5132
5133
  const checkQuery = `
5133
- SELECT event_data FROM ${this.schema}.step_events
5134
+ SELECT event_data, attempt_number, available_at_us FROM ${this.schema}.step_events
5134
5135
  WHERE workflow_slug = $1 AND run_id = $2 AND step_id = $3
5136
+ AND type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
5135
5137
  ORDER BY timestamp_us DESC, event_id DESC
5136
5138
  LIMIT 1
5137
5139
  FOR UPDATE SKIP LOCKED
@@ -5139,14 +5141,22 @@ class DatabaseClient {
5139
5141
  const checkResult = await client.query(checkQuery, [workflowSlug, runId, stepId]);
5140
5142
  if (checkResult.rows.length === 0) {
5141
5143
  await client.query("ROLLBACK");
5142
- return false;
5144
+ return null;
5143
5145
  }
5144
- const latestEvent = checkResult.rows[0].event_data;
5146
+ const latestRow = checkResult.rows[0];
5147
+ const latestEvent = latestRow.event_data;
5145
5148
  if (latestEvent.type !== "StepScheduled" && latestEvent.type !== "StepReclaimed" && latestEvent.type !== "StepRetrying") {
5146
5149
  await client.query("ROLLBACK");
5147
- return false;
5150
+ return null;
5151
+ }
5152
+ const availableAtUs = latestRow.available_at_us ?? latestEvent.availableAtUs ?? null;
5153
+ if (availableAtUs !== null && availableAtUs > currentTimeUs) {
5154
+ await client.query("ROLLBACK");
5155
+ return null;
5148
5156
  }
5149
- let workerId2 = null;
5157
+ const resolvedAttemptNumber = latestRow.attempt_number ?? latestEvent.attemptNumber ?? (eventToWrite.type === "StepStarted" ? eventToWrite.attemptNumber : null) ?? 1;
5158
+ const eventPayload = eventToWrite.type === "StepStarted" ? { ...eventToWrite, attemptNumber: resolvedAttemptNumber } : eventToWrite;
5159
+ let eventWorkerId = null;
5150
5160
  let attemptNumber = null;
5151
5161
  let slotIndex = null;
5152
5162
  let workerConcurrency = null;
@@ -5155,18 +5165,18 @@ class DatabaseClient {
5155
5165
  let errorStackExactHash = "";
5156
5166
  let errorStackNormalizedHash = "";
5157
5167
  let errorStackPortableHash = "";
5158
- if (eventToWrite.type === "StepStarted") {
5159
- workerId2 = eventToWrite.workerId;
5160
- attemptNumber = eventToWrite.attemptNumber;
5161
- slotIndex = eventToWrite.slotIndex ?? null;
5162
- workerConcurrency = eventToWrite.workerConcurrency ?? null;
5163
- }
5164
- if (eventToWrite.type === "StepFailed") {
5165
- errorNameHash = eventToWrite.errorFingerprints.nameHash;
5166
- errorMessageHash = eventToWrite.errorFingerprints.messageHash;
5167
- errorStackExactHash = eventToWrite.errorFingerprints.stackExactHash;
5168
- errorStackNormalizedHash = eventToWrite.errorFingerprints.stackNormalizedHash;
5169
- errorStackPortableHash = eventToWrite.errorFingerprints.stackPortableHash;
5168
+ if (eventPayload.type === "StepStarted") {
5169
+ eventWorkerId = eventPayload.workerId;
5170
+ attemptNumber = eventPayload.attemptNumber;
5171
+ slotIndex = eventPayload.slotIndex ?? null;
5172
+ workerConcurrency = eventPayload.workerConcurrency ?? null;
5173
+ }
5174
+ if (eventPayload.type === "StepFailed") {
5175
+ errorNameHash = eventPayload.errorFingerprints.nameHash;
5176
+ errorMessageHash = eventPayload.errorFingerprints.messageHash;
5177
+ errorStackExactHash = eventPayload.errorFingerprints.stackExactHash;
5178
+ errorStackNormalizedHash = eventPayload.errorFingerprints.stackNormalizedHash;
5179
+ errorStackPortableHash = eventPayload.errorFingerprints.stackPortableHash;
5170
5180
  }
5171
5181
  const versionResult = await client.query(`SELECT version_id FROM ${this.schema}.workflow_events
5172
5182
  WHERE workflow_slug = $1 AND run_id = $2
@@ -5183,15 +5193,15 @@ class DatabaseClient {
5183
5193
  slot_index, worker_concurrency
5184
5194
  )
5185
5195
  VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20)`, [
5186
- eventToWrite.eventId,
5187
- eventToWrite.workflowSlug,
5188
- eventToWrite.runId,
5189
- eventToWrite.stepId,
5190
- eventToWrite.timestampUs,
5191
- eventToWrite.category,
5192
- eventToWrite.type,
5193
- JSON.stringify(stripEventIdFromJson(eventToWrite)),
5194
- workerId2,
5196
+ eventPayload.eventId,
5197
+ eventPayload.workflowSlug,
5198
+ eventPayload.runId,
5199
+ eventPayload.stepId,
5200
+ eventPayload.timestampUs,
5201
+ eventPayload.category,
5202
+ eventPayload.type,
5203
+ JSON.stringify(stripEventIdFromJson(eventPayload)),
5204
+ eventWorkerId,
5195
5205
  attemptNumber,
5196
5206
  null,
5197
5207
  null,
@@ -5205,7 +5215,7 @@ class DatabaseClient {
5205
5215
  workerConcurrency
5206
5216
  ]);
5207
5217
  await client.query("COMMIT");
5208
- return true;
5218
+ return resolvedAttemptNumber;
5209
5219
  } catch (error) {
5210
5220
  await client.query("ROLLBACK");
5211
5221
  throw error;
@@ -5259,21 +5269,25 @@ class DatabaseClient {
5259
5269
  const query = `
5260
5270
  WITH latest_step_events AS (
5261
5271
  SELECT DISTINCT ON (workflow_slug, run_id, step_id)
5262
- workflow_slug, run_id, step_id, type, timestamp_us, worker_id
5272
+ workflow_slug, run_id, step_id, type, timestamp_us, worker_id, attempt_number
5263
5273
  FROM ${this.schema}.step_events
5264
- WHERE type IN ('StepStarted', 'StepHeartbeat')
5274
+ WHERE type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
5265
5275
  ORDER BY workflow_slug, run_id, step_id, timestamp_us DESC, event_id DESC
5266
5276
  )
5267
- SELECT workflow_slug, run_id, step_id, worker_id
5277
+ SELECT workflow_slug, run_id, step_id, worker_id, attempt_number, timestamp_us
5268
5278
  FROM latest_step_events
5269
- WHERE timestamp_us < $1 AND worker_id IS NOT NULL
5279
+ WHERE type IN ('StepStarted', 'StepHeartbeat')
5280
+ AND timestamp_us < $1
5281
+ AND worker_id IS NOT NULL
5270
5282
  `;
5271
5283
  const result = await client.query(query, [currentTimeUs - staleThresholdUs]);
5272
5284
  return result.rows.map((row) => ({
5273
5285
  workflowSlug: row.workflow_slug,
5274
5286
  runId: row.run_id,
5275
5287
  stepId: row.step_id,
5276
- workerId: row.worker_id
5288
+ workerId: row.worker_id,
5289
+ attemptNumber: row.attempt_number ?? 1,
5290
+ lastHeartbeatUs: Number(row.timestamp_us)
5277
5291
  }));
5278
5292
  } finally {
5279
5293
  client.release();
@@ -5630,6 +5644,15 @@ class DatabaseClient {
5630
5644
  type
5631
5645
  FROM ${this.schema}.workflow_events
5632
5646
  WHERE ($1::text IS NULL OR workflow_slug = $1)
5647
+ AND type IN (
5648
+ 'RunSubmitted',
5649
+ 'WorkflowRetryStarted',
5650
+ 'WorkflowStarted',
5651
+ 'WorkflowResumed',
5652
+ 'WorkflowCompleted',
5653
+ 'WorkflowFailed',
5654
+ 'WorkflowCancelled'
5655
+ )
5633
5656
  ORDER BY workflow_slug, run_id, timestamp_us DESC, event_id DESC
5634
5657
  )
5635
5658
  SELECT
@@ -5767,90 +5790,33 @@ class DatabaseClient {
5767
5790
  client.release();
5768
5791
  }
5769
5792
  }
5770
- async getActiveWorkersAggregation(options) {
5793
+ async getActiveWorkersAggregation(_options) {
5771
5794
  const client = await this.pool.connect();
5772
5795
  try {
5773
- const nowUs = Date.now() * 1000;
5774
- const staleThresholdUs = options?.staleThresholdUs ?? 30 * 1000 * 1000;
5775
- const includeInactive = options?.includeInactive ?? false;
5776
- const staleTimestamp = nowUs - staleThresholdUs;
5777
- const workerStatsResult = await client.query(`
5778
- WITH worker_activity AS (
5779
- -- All worker activity from StepStarted and StepHeartbeat events
5780
- SELECT
5781
- worker_id,
5782
- MAX(timestamp_us) AS last_seen_us
5783
- FROM ${this.schema}.step_events
5784
- WHERE worker_id IS NOT NULL
5785
- AND type IN ('StepStarted', 'StepHeartbeat')
5786
- ${options?.timeRange ? `AND timestamp_us >= $1 AND timestamp_us <= $2` : ""}
5787
- GROUP BY worker_id
5788
- ),
5789
- -- Steps started by each worker
5790
- steps_started AS (
5791
- SELECT DISTINCT
5792
- se.worker_id,
5793
- se.workflow_slug,
5794
- se.run_id,
5795
- se.step_id,
5796
- se.attempt_number
5797
- FROM ${this.schema}.step_events se
5798
- WHERE se.type = 'StepStarted'
5799
- AND se.worker_id IS NOT NULL
5800
- ),
5801
- -- Steps completed
5802
- completed_steps AS (
5803
- SELECT
5804
- ss.worker_id,
5805
- COUNT(*) AS completed_count
5806
- FROM steps_started ss
5807
- INNER JOIN ${this.schema}.step_events se
5808
- ON se.workflow_slug = ss.workflow_slug
5809
- AND se.run_id = ss.run_id
5810
- AND se.step_id = ss.step_id
5811
- AND se.attempt_number = ss.attempt_number
5812
- AND se.type = 'StepCompleted'
5813
- GROUP BY ss.worker_id
5814
- ),
5815
- -- Steps failed
5816
- failed_steps AS (
5817
- SELECT
5818
- ss.worker_id,
5819
- COUNT(*) AS failed_count
5820
- FROM steps_started ss
5821
- INNER JOIN ${this.schema}.step_events se
5822
- ON se.workflow_slug = ss.workflow_slug
5823
- AND se.run_id = ss.run_id
5824
- AND se.step_id = ss.step_id
5825
- AND se.attempt_number = ss.attempt_number
5826
- AND se.type = 'StepFailed'
5827
- GROUP BY ss.worker_id
5828
- ),
5829
- -- Reclamation counts (times this worker's steps were reclaimed)
5830
- reclaimed_counts AS (
5831
- SELECT
5832
- (event_data->>'originalWorkerId') AS worker_id,
5833
- COUNT(*) AS reclaimed_count
5834
- FROM ${this.schema}.step_events
5835
- WHERE type = 'StepReclaimed'
5836
- GROUP BY (event_data->>'originalWorkerId')
5796
+ const runningRunsResult = await client.query(`
5797
+ WITH latest_workflow_events AS (
5798
+ SELECT DISTINCT ON (workflow_slug, run_id)
5799
+ workflow_slug,
5800
+ run_id,
5801
+ type
5802
+ FROM ${this.schema}.workflow_events
5803
+ ORDER BY workflow_slug, run_id, timestamp_us DESC, event_id DESC
5837
5804
  )
5838
- SELECT
5839
- wa.worker_id,
5840
- wa.last_seen_us,
5841
- COALESCE(cs.completed_count, 0) AS total_steps_processed,
5842
- COALESCE(fs.failed_count, 0) AS failed_steps,
5843
- COALESCE(rc.reclaimed_count, 0) AS reclaimed_from_count
5844
- FROM worker_activity wa
5845
- LEFT JOIN completed_steps cs ON wa.worker_id = cs.worker_id
5846
- LEFT JOIN failed_steps fs ON wa.worker_id = fs.worker_id
5847
- LEFT JOIN reclaimed_counts rc ON wa.worker_id = rc.worker_id
5848
- ${!includeInactive ? `WHERE wa.last_seen_us >= ${staleTimestamp}` : ""}
5849
- ORDER BY wa.last_seen_us DESC
5850
- `, options?.timeRange ? [options.timeRange.startUs, options.timeRange.endUs] : []);
5805
+ SELECT workflow_slug, run_id
5806
+ FROM latest_workflow_events
5807
+ WHERE type IN ('WorkflowStarted', 'WorkflowResumed')
5808
+ LIMIT 500
5809
+ `);
5810
+ if (runningRunsResult.rows.length === 0) {
5811
+ return {
5812
+ workers: [],
5813
+ totalActiveWorkers: 0,
5814
+ totalRunningSteps: 0
5815
+ };
5816
+ }
5817
+ const runIds = runningRunsResult.rows.map((r) => r.run_id);
5851
5818
  const runningStepsResult = await client.query(`
5852
5819
  WITH latest_step_events AS (
5853
- -- Get the latest event per step (excluding LogEntry)
5854
5820
  SELECT DISTINCT ON (workflow_slug, run_id, step_id)
5855
5821
  workflow_slug,
5856
5822
  run_id,
@@ -5861,78 +5827,24 @@ class DatabaseClient {
5861
5827
  slot_index,
5862
5828
  worker_concurrency
5863
5829
  FROM ${this.schema}.step_events
5864
- WHERE type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
5830
+ WHERE run_id = ANY($1)
5831
+ AND type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
5865
5832
  ORDER BY workflow_slug, run_id, step_id, timestamp_us DESC, event_id DESC
5866
- ),
5867
- -- Steps currently running (latest event is StepStarted or StepHeartbeat)
5868
- running_steps AS (
5869
- SELECT
5870
- workflow_slug,
5871
- run_id,
5872
- step_id,
5873
- worker_id,
5874
- slot_index,
5875
- worker_concurrency
5876
- FROM latest_step_events
5877
- WHERE type IN ('StepStarted', 'StepHeartbeat')
5878
- AND worker_id IS NOT NULL
5879
- ),
5880
- -- Get start time and last heartbeat for each running step
5881
- step_times AS (
5882
- SELECT
5883
- rs.workflow_slug,
5884
- rs.run_id,
5885
- rs.step_id,
5886
- rs.worker_id,
5887
- rs.slot_index,
5888
- rs.worker_concurrency,
5889
- MIN(se.timestamp_us) FILTER (WHERE se.type = 'StepStarted') AS started_at_us,
5890
- MAX(se.timestamp_us) FILTER (WHERE se.type IN ('StepStarted', 'StepHeartbeat')) AS last_heartbeat_us
5891
- FROM running_steps rs
5892
- INNER JOIN ${this.schema}.step_events se
5893
- ON se.workflow_slug = rs.workflow_slug
5894
- AND se.run_id = rs.run_id
5895
- AND se.step_id = rs.step_id
5896
- AND se.type IN ('StepStarted', 'StepHeartbeat')
5897
- GROUP BY rs.workflow_slug, rs.run_id, rs.step_id, rs.worker_id, rs.slot_index, rs.worker_concurrency
5898
5833
  )
5899
5834
  SELECT
5900
5835
  worker_id,
5901
5836
  workflow_slug,
5902
5837
  run_id,
5903
5838
  step_id,
5904
- started_at_us,
5905
- last_heartbeat_us,
5839
+ timestamp_us as last_heartbeat_us,
5906
5840
  slot_index,
5907
5841
  worker_concurrency
5908
- FROM step_times
5909
- ORDER BY worker_id, last_heartbeat_us DESC
5910
- `);
5911
- const workerConcurrencyResult = await client.query(`
5912
- SELECT DISTINCT ON (worker_id)
5913
- worker_id,
5914
- worker_concurrency
5915
- FROM ${this.schema}.step_events
5916
- WHERE worker_id IS NOT NULL
5917
- AND worker_concurrency IS NOT NULL
5918
- AND type IN ('StepStarted', 'StepHeartbeat')
5919
- ORDER BY worker_id, timestamp_us DESC
5920
- `);
5921
- const workerConcurrencyMap = new Map;
5922
- for (const row of workerConcurrencyResult.rows) {
5923
- workerConcurrencyMap.set(row.worker_id, row.worker_concurrency);
5924
- }
5842
+ FROM latest_step_events
5843
+ WHERE type IN ('StepStarted', 'StepHeartbeat')
5844
+ AND worker_id IS NOT NULL
5845
+ LIMIT 1000
5846
+ `, [runIds]);
5925
5847
  const workerMap = new Map;
5926
- for (const row of workerStatsResult.rows) {
5927
- workerMap.set(row.worker_id, {
5928
- workerId: row.worker_id,
5929
- lastSeenUs: parseInt(row.last_seen_us, 10),
5930
- totalStepsProcessed: parseInt(row.total_steps_processed, 10),
5931
- failedSteps: parseInt(row.failed_steps, 10),
5932
- reclaimedFromCount: parseInt(row.reclaimed_from_count, 10),
5933
- activeSteps: []
5934
- });
5935
- }
5936
5848
  for (const row of runningStepsResult.rows) {
5937
5849
  let worker = workerMap.get(row.worker_id);
5938
5850
  if (!worker) {
@@ -5942,10 +5854,15 @@ class DatabaseClient {
5942
5854
  totalStepsProcessed: 0,
5943
5855
  failedSteps: 0,
5944
5856
  reclaimedFromCount: 0,
5857
+ workerConcurrency: row.worker_concurrency ?? undefined,
5945
5858
  activeSteps: []
5946
5859
  };
5947
5860
  workerMap.set(row.worker_id, worker);
5948
5861
  }
5862
+ const lastHeartbeatUs = parseInt(row.last_heartbeat_us, 10);
5863
+ if (lastHeartbeatUs > worker.lastSeenUs) {
5864
+ worker.lastSeenUs = lastHeartbeatUs;
5865
+ }
5949
5866
  if (row.worker_concurrency != null && worker.workerConcurrency == null) {
5950
5867
  worker.workerConcurrency = row.worker_concurrency;
5951
5868
  }
@@ -5953,19 +5870,11 @@ class DatabaseClient {
5953
5870
  workflowSlug: row.workflow_slug,
5954
5871
  runId: row.run_id,
5955
5872
  stepId: row.step_id,
5956
- startedAtUs: parseInt(row.started_at_us, 10),
5957
- lastHeartbeatUs: parseInt(row.last_heartbeat_us, 10),
5873
+ startedAtUs: lastHeartbeatUs,
5874
+ lastHeartbeatUs,
5958
5875
  slotIndex: row.slot_index ?? undefined
5959
5876
  });
5960
5877
  }
5961
- for (const worker of workerMap.values()) {
5962
- if (worker.workerConcurrency == null) {
5963
- const historicalConcurrency = workerConcurrencyMap.get(worker.workerId);
5964
- if (historicalConcurrency != null) {
5965
- worker.workerConcurrency = historicalConcurrency;
5966
- }
5967
- }
5968
- }
5969
5878
  const workers = Array.from(workerMap.values()).sort((a, b) => {
5970
5879
  if (b.activeSteps.length !== a.activeSteps.length) {
5971
5880
  return b.activeSteps.length - a.activeSteps.length;
@@ -5973,7 +5882,7 @@ class DatabaseClient {
5973
5882
  return b.lastSeenUs - a.lastSeenUs;
5974
5883
  });
5975
5884
  const totalRunningSteps = workers.reduce((sum, w) => sum + w.activeSteps.length, 0);
5976
- const totalActiveWorkers = workers.filter((w) => w.activeSteps.length > 0).length;
5885
+ const totalActiveWorkers = workers.length;
5977
5886
  return {
5978
5887
  workers,
5979
5888
  totalActiveWorkers,
@@ -5983,6 +5892,112 @@ class DatabaseClient {
5983
5892
  client.release();
5984
5893
  }
5985
5894
  }
5895
+ async getWorkerById(workerId) {
5896
+ const client = await this.pool.connect();
5897
+ try {
5898
+ const result = await client.query(`
5899
+ WITH worker_step_events AS (
5900
+ -- Get all step events for this worker
5901
+ SELECT
5902
+ workflow_slug,
5903
+ run_id,
5904
+ step_id,
5905
+ type,
5906
+ timestamp_us,
5907
+ slot_index,
5908
+ worker_concurrency
5909
+ FROM ${this.schema}.step_events
5910
+ WHERE worker_id = $1
5911
+ AND type IN ('StepStarted', 'StepHeartbeat')
5912
+ ),
5913
+ latest_per_step AS (
5914
+ -- For each step this worker touched, get the latest event
5915
+ SELECT DISTINCT ON (workflow_slug, run_id, step_id)
5916
+ workflow_slug,
5917
+ run_id,
5918
+ step_id,
5919
+ timestamp_us,
5920
+ slot_index,
5921
+ worker_concurrency
5922
+ FROM worker_step_events
5923
+ ORDER BY workflow_slug, run_id, step_id, timestamp_us DESC
5924
+ ),
5925
+ -- Check if these steps are still running (no completion/failure after our heartbeat)
5926
+ still_running AS (
5927
+ SELECT
5928
+ lps.workflow_slug,
5929
+ lps.run_id,
5930
+ lps.step_id,
5931
+ lps.timestamp_us as last_heartbeat_us,
5932
+ lps.slot_index,
5933
+ lps.worker_concurrency
5934
+ FROM latest_per_step lps
5935
+ WHERE NOT EXISTS (
5936
+ SELECT 1 FROM ${this.schema}.step_events se
5937
+ WHERE se.workflow_slug = lps.workflow_slug
5938
+ AND se.run_id = lps.run_id
5939
+ AND se.step_id = lps.step_id
5940
+ AND se.timestamp_us > lps.timestamp_us
5941
+ AND se.type IN ('StepCompleted', 'StepFailed', 'StepSkipped', 'StepReclaimed')
5942
+ )
5943
+ )
5944
+ SELECT * FROM still_running
5945
+ ORDER BY last_heartbeat_us DESC
5946
+ LIMIT 100
5947
+ `, [workerId]);
5948
+ if (result.rows.length === 0) {
5949
+ const lastSeenResult = await client.query(`
5950
+ SELECT MAX(timestamp_us) as last_seen_us, MAX(worker_concurrency) as worker_concurrency
5951
+ FROM ${this.schema}.step_events
5952
+ WHERE worker_id = $1
5953
+ AND type IN ('StepStarted', 'StepHeartbeat')
5954
+ `, [workerId]);
5955
+ if (!lastSeenResult.rows[0]?.last_seen_us) {
5956
+ return null;
5957
+ }
5958
+ return {
5959
+ workerId,
5960
+ lastSeenUs: parseInt(lastSeenResult.rows[0].last_seen_us, 10),
5961
+ totalStepsProcessed: 0,
5962
+ failedSteps: 0,
5963
+ reclaimedFromCount: 0,
5964
+ workerConcurrency: lastSeenResult.rows[0].worker_concurrency ?? undefined,
5965
+ activeSteps: []
5966
+ };
5967
+ }
5968
+ let lastSeenUs = 0;
5969
+ let workerConcurrency;
5970
+ const activeSteps = [];
5971
+ for (const row of result.rows) {
5972
+ const heartbeatUs = parseInt(row.last_heartbeat_us, 10);
5973
+ if (heartbeatUs > lastSeenUs) {
5974
+ lastSeenUs = heartbeatUs;
5975
+ }
5976
+ if (row.worker_concurrency != null && workerConcurrency == null) {
5977
+ workerConcurrency = row.worker_concurrency;
5978
+ }
5979
+ activeSteps.push({
5980
+ workflowSlug: row.workflow_slug,
5981
+ runId: row.run_id,
5982
+ stepId: row.step_id,
5983
+ startedAtUs: heartbeatUs,
5984
+ lastHeartbeatUs: heartbeatUs,
5985
+ slotIndex: row.slot_index ?? undefined
5986
+ });
5987
+ }
5988
+ return {
5989
+ workerId,
5990
+ lastSeenUs,
5991
+ totalStepsProcessed: 0,
5992
+ failedSteps: 0,
5993
+ reclaimedFromCount: 0,
5994
+ workerConcurrency,
5995
+ activeSteps
5996
+ };
5997
+ } finally {
5998
+ client.release();
5999
+ }
6000
+ }
5986
6001
  }
5987
6002
  function createPool(connectionString) {
5988
6003
  return new Pool2({ connectionString });
@@ -6384,6 +6399,87 @@ async function migration011_addWorkerConcurrencyIndex(pool, schema) {
6384
6399
  client.release();
6385
6400
  }
6386
6401
  }
6402
+ async function migration012_addWorkerAnalyticsIndexes(pool, schema) {
6403
+ const client = await pool.connect();
6404
+ try {
6405
+ await client.query(`
6406
+ CREATE INDEX IF NOT EXISTS idx_step_events_worker_activity
6407
+ ON ${schema}.step_events (timestamp_us, type, worker_id)
6408
+ WHERE worker_id IS NOT NULL
6409
+ AND type IN ('StepStarted', 'StepHeartbeat')
6410
+ `);
6411
+ await client.query(`
6412
+ CREATE INDEX IF NOT EXISTS idx_step_events_recent_by_step
6413
+ ON ${schema}.step_events (timestamp_us DESC, workflow_slug, run_id, step_id, type, event_id DESC)
6414
+ WHERE type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
6415
+ `);
6416
+ console.log("[Migration 012] Worker analytics indexes added successfully");
6417
+ } catch (error) {
6418
+ console.error("[Migration 012] Error adding worker analytics indexes:", error);
6419
+ throw error;
6420
+ } finally {
6421
+ client.release();
6422
+ }
6423
+ }
6424
+ async function migration013_addWorkersObservabilityIndexes(pool, schema) {
6425
+ const client = await pool.connect();
6426
+ try {
6427
+ await client.query(`
6428
+ CREATE INDEX IF NOT EXISTS idx_workflow_events_run_status
6429
+ ON ${schema}.workflow_events (run_id, timestamp_us DESC, event_id DESC)
6430
+ INCLUDE (workflow_slug, type)
6431
+ `);
6432
+ await client.query(`
6433
+ CREATE INDEX IF NOT EXISTS idx_step_events_by_run
6434
+ ON ${schema}.step_events (run_id, workflow_slug, step_id, timestamp_us DESC, event_id DESC)
6435
+ INCLUDE (type, worker_id, slot_index, worker_concurrency)
6436
+ WHERE type NOT IN ('LogEntry', 'StepCheckpoint', 'StepCheckpointFailed')
6437
+ `);
6438
+ console.log("[Migration 013] Workers observability indexes added successfully");
6439
+ } catch (error) {
6440
+ console.error("[Migration 013] Error adding workers observability indexes:", error);
6441
+ throw error;
6442
+ } finally {
6443
+ client.release();
6444
+ }
6445
+ }
6446
+ async function migration014_addWorkerHotPathIndexes(pool, schema) {
6447
+ const client = await pool.connect();
6448
+ try {
6449
+ await client.query(`
6450
+ CREATE INDEX IF NOT EXISTS idx_step_events_latest_cover
6451
+ ON ${schema}.step_events (workflow_slug, run_id, step_id, timestamp_us DESC, event_id DESC)
6452
+ INCLUDE (type, available_at_us, worker_id, attempt_number, priority)
6453
+ `);
6454
+ await client.query(`
6455
+ CREATE INDEX IF NOT EXISTS idx_step_events_terminal_latest
6456
+ ON ${schema}.step_events (workflow_slug, run_id, step_id, timestamp_us DESC, event_id DESC)
6457
+ WHERE type IN ('StepCompleted', 'StepFailed', 'StepSkipped', 'StepReclaimed')
6458
+ `);
6459
+ await client.query(`
6460
+ CREATE INDEX IF NOT EXISTS idx_workflow_events_active_latest
6461
+ ON ${schema}.workflow_events (type, workflow_slug, run_id, timestamp_us DESC, event_id DESC)
6462
+ WHERE type IN ('RunSubmitted', 'WorkflowRetryStarted', 'WorkflowStarted', 'WorkflowResumed')
6463
+ `);
6464
+ await client.query(`
6465
+ CREATE INDEX IF NOT EXISTS idx_workflow_events_run_chrono
6466
+ ON ${schema}.workflow_events (workflow_slug, run_id, timestamp_us ASC, event_id ASC)
6467
+ `);
6468
+ await client.query(`
6469
+ CREATE INDEX IF NOT EXISTS idx_workflow_events_version_lookup
6470
+ ON ${schema}.workflow_events (workflow_slug, run_id, timestamp_us DESC, event_id DESC)
6471
+ INCLUDE (version_id)
6472
+ WHERE type IN ('WorkflowStarted', 'RunSubmitted')
6473
+ AND version_id IS NOT NULL
6474
+ `);
6475
+ console.log("[Migration 014] Worker hot-path indexes added successfully");
6476
+ } catch (error) {
6477
+ console.error("[Migration 014] Error adding worker hot-path indexes:", error);
6478
+ throw error;
6479
+ } finally {
6480
+ client.release();
6481
+ }
6482
+ }
6387
6483
  async function runMigrations(pool, schema = "cascadeflow") {
6388
6484
  console.log(`[Migrations] Starting database migrations in schema '${schema}'...`);
6389
6485
  try {
@@ -6399,6 +6495,9 @@ async function runMigrations(pool, schema = "cascadeflow") {
6399
6495
  await migration009_addStepPriority(pool, schema);
6400
6496
  await migration010_addSlotTracking(pool, schema);
6401
6497
  await migration011_addWorkerConcurrencyIndex(pool, schema);
6498
+ await migration012_addWorkerAnalyticsIndexes(pool, schema);
6499
+ await migration013_addWorkersObservabilityIndexes(pool, schema);
6500
+ await migration014_addWorkerHotPathIndexes(pool, schema);
6402
6501
  console.log("[Migrations] All migrations completed successfully");
6403
6502
  } catch (error) {
6404
6503
  console.error("[Migrations] Migration failed:", error);
@@ -7264,16 +7363,6 @@ class PostgresBackend extends Backend {
7264
7363
  return !!(latestEvent && (latestEvent.type === "StepScheduled" || latestEvent.type === "StepReclaimed" || latestEvent.type === "StepRetrying"));
7265
7364
  }
7266
7365
  async claimScheduledStep(workflowSlug, runId, stepId, workerId, metadata) {
7267
- const initialEvents = await this.loadEvents(workflowSlug, runId, { category: "step", stepId });
7268
- if (initialEvents.length === 0) {
7269
- return null;
7270
- }
7271
- const now = getMicrosecondTimestamp();
7272
- const initialState = projectStepState(initialEvents, workflowSlug);
7273
- if (initialState.status !== "scheduled" || initialState.availableAt === undefined || initialState.availableAt > now) {
7274
- return null;
7275
- }
7276
- const attemptNumber = initialState.attemptNumber;
7277
7366
  const timestamp = getMicrosecondTimestamp();
7278
7367
  const event = {
7279
7368
  category: "step",
@@ -7285,43 +7374,34 @@ class PostgresBackend extends Backend {
7285
7374
  stepId,
7286
7375
  workerId,
7287
7376
  dependencies: metadata.dependencies,
7288
- attemptNumber,
7377
+ attemptNumber: metadata.attemptNumber,
7289
7378
  slotIndex: metadata.slotIndex,
7290
7379
  workerConcurrency: metadata.workerConcurrency
7291
7380
  };
7292
- const claimed = await this.db.claimScheduledStep(workflowSlug, runId, stepId, workerId, event);
7293
- return claimed ? { attemptNumber } : null;
7381
+ const claimedAttemptNumber = await this.db.claimScheduledStep(workflowSlug, runId, stepId, event);
7382
+ return claimedAttemptNumber !== null ? { attemptNumber: claimedAttemptNumber } : null;
7294
7383
  }
7295
7384
  async reclaimStaleSteps(staleThreshold, reclaimedBy) {
7296
7385
  const reclaimed = [];
7297
7386
  const now = getMicrosecondTimestamp();
7298
7387
  const staleSteps = await this.db.findStaleSteps(staleThreshold);
7299
7388
  for (const step of staleSteps) {
7300
- const events = await this.loadEvents(step.workflowSlug, step.runId, { category: "step", stepId: step.stepId });
7301
- if (events.length === 0)
7302
- continue;
7303
- const state = projectStepState(events, step.workflowSlug);
7304
- if (state.status !== "running")
7305
- continue;
7306
- const lastHeartbeat = state.lastHeartbeat || state.startTime || 0;
7307
- const staleDuration = now - lastHeartbeat;
7308
- if (staleDuration > staleThreshold) {
7309
- await this.saveStepReclaimed(step.workflowSlug, step.runId, step.stepId, {
7310
- originalWorkerId: state.claimedBy || "unknown",
7311
- reclaimedBy,
7312
- lastHeartbeat,
7313
- staleThreshold,
7314
- staleDuration,
7315
- attemptNumber: state.attemptNumber
7316
- });
7317
- await this.saveStepScheduled(step.workflowSlug, step.runId, step.stepId, {
7318
- availableAt: now,
7319
- reason: "retry",
7320
- attemptNumber: state.attemptNumber + 1,
7321
- retryDelayMs: 0
7322
- });
7323
- reclaimed.push({ workflowSlug: step.workflowSlug, runId: step.runId, stepId: step.stepId });
7324
- }
7389
+ const staleDuration = now - step.lastHeartbeatUs;
7390
+ await this.saveStepReclaimed(step.workflowSlug, step.runId, step.stepId, {
7391
+ originalWorkerId: step.workerId,
7392
+ reclaimedBy,
7393
+ lastHeartbeat: step.lastHeartbeatUs,
7394
+ staleThreshold,
7395
+ staleDuration,
7396
+ attemptNumber: step.attemptNumber
7397
+ });
7398
+ await this.saveStepScheduled(step.workflowSlug, step.runId, step.stepId, {
7399
+ availableAt: now,
7400
+ reason: "retry",
7401
+ attemptNumber: step.attemptNumber + 1,
7402
+ retryDelayMs: 0
7403
+ });
7404
+ reclaimed.push({ workflowSlug: step.workflowSlug, runId: step.runId, stepId: step.stepId });
7325
7405
  }
7326
7406
  return reclaimed;
7327
7407
  }
@@ -7754,9 +7834,25 @@ class PostgresBackend extends Backend {
7754
7834
  staleThresholdUs
7755
7835
  };
7756
7836
  }
7837
+ async getWorkerById(workerId) {
7838
+ const result = await this.db.getWorkerById(workerId);
7839
+ if (!result) {
7840
+ return null;
7841
+ }
7842
+ return {
7843
+ workerId: result.workerId,
7844
+ lastSeenUs: result.lastSeenUs,
7845
+ currentlyRunningSteps: result.activeSteps.length,
7846
+ totalStepsProcessed: result.totalStepsProcessed,
7847
+ failedSteps: result.failedSteps,
7848
+ reclaimedFromCount: result.reclaimedFromCount,
7849
+ workerConcurrency: result.workerConcurrency,
7850
+ activeSteps: result.activeSteps
7851
+ };
7852
+ }
7757
7853
  }
7758
7854
  export {
7759
7855
  PostgresBackend
7760
7856
  };
7761
7857
 
7762
- //# debugId=9C62728A85A6463664756E2164756E21
7858
+ //# debugId=43E020BB2B7D86DF64756E2164756E21