bulltrackers-module 1.0.744 → 1.0.746

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,6 +43,35 @@ class StorageManager {
43
43
  }
44
44
  return this._firestore;
45
45
  }
46
+
47
+ /**
48
+ * Updates the heartbeat of a zombie to "hide" it from detection
49
+ * while the recovery task is being queued.
50
+ */
51
+ async claimZombie(checkpointId) {
52
+ if (!checkpointId) return;
53
+
54
+ // FIX: Access projectId and dataset from the config object
55
+ const { projectId, dataset } = this.config.bigquery; //
56
+
57
+ const query = `
58
+ UPDATE \`${projectId}.${dataset}.computation_checkpoints\`
59
+ SET last_updated = CURRENT_TIMESTAMP()
60
+ WHERE checkpoint_id = @checkpointId
61
+ `;
62
+
63
+ try {
64
+ await this.bigquery.query({
65
+ query,
66
+ params: { checkpointId }
67
+ });
68
+ } catch (e) {
69
+ // Ignore errors here, it's an optimization, not critical
70
+ console.warn(`[Storage] Failed to claim zombie ${checkpointId}: ${e.message}`);
71
+ }
72
+ }
73
+
74
+
46
75
 
47
76
  // =========================================================================
48
77
  // RESULT COMMITTING (Batch -> GCS Buffer)
@@ -318,15 +347,40 @@ class StorageManager {
318
347
  const table = 'computation_checkpoints';
319
348
  const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
320
349
  try {
350
+ // FIX: Use subquery with ROW_NUMBER to find the TRUE latest state per computation.
351
+ // We only count it as a zombie if the LATEST row is 'running'.
352
+ // This ignores 'running' rows that have a newer (or same-time) 'completed' sibling.
321
353
  const query = `
322
354
  SELECT computation_name, date, checkpoint_id, last_updated
323
- FROM ${fullTable}
324
- WHERE status = 'running' AND last_updated < TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL @minutes MINUTE)
355
+ FROM (
356
+ SELECT
357
+ computation_name,
358
+ date,
359
+ checkpoint_id,
360
+ last_updated,
361
+ status,
362
+ ROW_NUMBER() OVER (
363
+ PARTITION BY computation_name, date
364
+ ORDER BY last_updated DESC,
365
+ CASE status
366
+ WHEN 'completed' THEN 1
367
+ WHEN 'failed' THEN 2
368
+ ELSE 3
369
+ END ASC
370
+ ) as rn
371
+ FROM ${fullTable}
372
+ )
373
+ WHERE rn = 1
374
+ AND status = 'running'
375
+ AND last_updated < TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL @minutes MINUTE)
325
376
  LIMIT 50
326
377
  `;
327
378
  const [rows] = await this.bigquery.query({ query, params: { minutes: minutesThreshold }, location: this.config.bigquery.location });
328
379
  return rows.map(r => ({ name: r.computation_name, date: r.date.value || r.date, checkpointId: r.checkpoint_id }));
329
- } catch (e) { return []; }
380
+ } catch (e) {
381
+ console.error(`[Storage] findZombies failed: ${e.message}`);
382
+ return [];
383
+ }
330
384
  }
331
385
 
332
386
  async completeCheckpoint(dateStr, computationName, checkpointId) {
@@ -344,11 +398,21 @@ class StorageManager {
344
398
  const table = 'computation_checkpoints';
345
399
  const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
346
400
  try {
401
+ // FIX: Added Tie-Breaker logic to ORDER BY
402
+ // If timestamps are identical, 'completed' (1) comes before 'failed' (2) before 'running' (3).
403
+ // This ensures we never accidentally pick a "running" row when a "completed" one exists at the exact same ms.
347
404
  const query = `
348
405
  SELECT checkpoint_id, status, processed_count, last_entity_id, completed_batches, worker_instance_id, last_updated, attempts, code_hash, started_at
349
406
  FROM ${fullTable}
350
407
  WHERE date = @date AND computation_name = @computationName
351
- ORDER BY last_updated DESC LIMIT 1
408
+ ORDER BY
409
+ last_updated DESC,
410
+ CASE status
411
+ WHEN 'completed' THEN 1
412
+ WHEN 'failed' THEN 2
413
+ ELSE 3
414
+ END ASC
415
+ LIMIT 1
352
416
  `;
353
417
  const [rows] = await this.bigquery.query({ query, params: { date: dateStr, computationName }, location: this.config.bigquery.location });
354
418
  if (rows.length === 0) return null;
@@ -696,6 +760,7 @@ class StorageManager {
696
760
 
697
761
  this._log('ERROR', `${context}: ${details}`);
698
762
  }
763
+
699
764
  }
700
765
 
701
766
  module.exports = { StorageManager };
@@ -51,14 +51,21 @@ async function schedulerHandler(req, res) {
51
51
  const dueComputations = findDueComputations(now);
52
52
 
53
53
  // 2. ZOMBIE DETECTION
54
- // Find tasks marked 'running' that haven't heartbeated in X mins
55
54
  let zombies = [];
56
55
  try {
57
56
  zombies = await storageManager.findZombies(ZOMBIE_THRESHOLD_MINUTES);
58
- // New Code
57
+
59
58
  if (zombies.length > 0) {
60
59
  const zombieDetails = zombies.map(z => `${z.name} [${z.date}]`).join(', ');
61
60
  console.log(`[Scheduler] DETECTED ${zombies.length} ZOMBIES: ${zombieDetails}`);
61
+
62
+ // --- NEW FIX: CLAIM ZOMBIES ---
63
+ // "Touch" these rows in the DB so they don't look like zombies for another 15 mins.
64
+ // This prevents re-dispatching the same task 15 times if the queue is slow.
65
+ await Promise.all(zombies.map(z =>
66
+ storageManager.claimZombie(z.checkpointId)
67
+ ));
68
+ // ------------------------------
62
69
  }
63
70
  } catch (e) {
64
71
  console.error(`[Scheduler] Zombie check failed: ${e.message}`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.744",
3
+ "version": "1.0.746",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [