bulltrackers-module 1.0.745 → 1.0.746
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -347,15 +347,40 @@ class StorageManager {
|
|
|
347
347
|
const table = 'computation_checkpoints';
|
|
348
348
|
const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
|
|
349
349
|
try {
|
|
350
|
+
// FIX: Use subquery with ROW_NUMBER to find the TRUE latest state per computation.
|
|
351
|
+
// We only count it as a zombie if the LATEST row is 'running'.
|
|
352
|
+
// This ignores 'running' rows that have a newer (or same-time) 'completed' sibling.
|
|
350
353
|
const query = `
|
|
351
354
|
SELECT computation_name, date, checkpoint_id, last_updated
|
|
352
|
-
FROM
|
|
353
|
-
|
|
355
|
+
FROM (
|
|
356
|
+
SELECT
|
|
357
|
+
computation_name,
|
|
358
|
+
date,
|
|
359
|
+
checkpoint_id,
|
|
360
|
+
last_updated,
|
|
361
|
+
status,
|
|
362
|
+
ROW_NUMBER() OVER (
|
|
363
|
+
PARTITION BY computation_name, date
|
|
364
|
+
ORDER BY last_updated DESC,
|
|
365
|
+
CASE status
|
|
366
|
+
WHEN 'completed' THEN 1
|
|
367
|
+
WHEN 'failed' THEN 2
|
|
368
|
+
ELSE 3
|
|
369
|
+
END ASC
|
|
370
|
+
) as rn
|
|
371
|
+
FROM ${fullTable}
|
|
372
|
+
)
|
|
373
|
+
WHERE rn = 1
|
|
374
|
+
AND status = 'running'
|
|
375
|
+
AND last_updated < TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL @minutes MINUTE)
|
|
354
376
|
LIMIT 50
|
|
355
377
|
`;
|
|
356
378
|
const [rows] = await this.bigquery.query({ query, params: { minutes: minutesThreshold }, location: this.config.bigquery.location });
|
|
357
379
|
return rows.map(r => ({ name: r.computation_name, date: r.date.value || r.date, checkpointId: r.checkpoint_id }));
|
|
358
|
-
} catch (e) {
|
|
380
|
+
} catch (e) {
|
|
381
|
+
console.error(`[Storage] findZombies failed: ${e.message}`);
|
|
382
|
+
return [];
|
|
383
|
+
}
|
|
359
384
|
}
|
|
360
385
|
|
|
361
386
|
async completeCheckpoint(dateStr, computationName, checkpointId) {
|
|
@@ -373,11 +398,21 @@ class StorageManager {
|
|
|
373
398
|
const table = 'computation_checkpoints';
|
|
374
399
|
const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
|
|
375
400
|
try {
|
|
401
|
+
// FIX: Added Tie-Breaker logic to ORDER BY
|
|
402
|
+
// If timestamps are identical, 'completed' (1) comes before 'failed' (2) before 'running' (3).
|
|
403
|
+
// This ensures we never accidentally pick a "running" row when a "completed" one exists at the exact same ms.
|
|
376
404
|
const query = `
|
|
377
405
|
SELECT checkpoint_id, status, processed_count, last_entity_id, completed_batches, worker_instance_id, last_updated, attempts, code_hash, started_at
|
|
378
406
|
FROM ${fullTable}
|
|
379
407
|
WHERE date = @date AND computation_name = @computationName
|
|
380
|
-
ORDER BY
|
|
408
|
+
ORDER BY
|
|
409
|
+
last_updated DESC,
|
|
410
|
+
CASE status
|
|
411
|
+
WHEN 'completed' THEN 1
|
|
412
|
+
WHEN 'failed' THEN 2
|
|
413
|
+
ELSE 3
|
|
414
|
+
END ASC
|
|
415
|
+
LIMIT 1
|
|
381
416
|
`;
|
|
382
417
|
const [rows] = await this.bigquery.query({ query, params: { date: dateStr, computationName }, location: this.config.bigquery.location });
|
|
383
418
|
if (rows.length === 0) return null;
|