bulltrackers-module 1.0.365 → 1.0.367
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -35,34 +35,22 @@ async function filterActiveTasks(db, date, pass, tasks, logger, forceRun = false
|
|
|
35
35
|
const data = snap.data();
|
|
36
36
|
const isActive = ['PENDING', 'IN_PROGRESS'].includes(data.status);
|
|
37
37
|
|
|
38
|
-
// 1. ZOMBIE CHECK (Recover Stale Locks)
|
|
39
38
|
if (isActive) {
|
|
40
39
|
const lastActivityTime = data.telemetry?.lastHeartbeat
|
|
41
40
|
? new Date(data.telemetry.lastHeartbeat).getTime()
|
|
42
41
|
: (data.startedAt ? new Date(data.startedAt).getTime() : 0);
|
|
43
42
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
if (timeSinceActive > STALE_LOCK_THRESHOLD_MS) {
|
|
43
|
+
if ((Date.now() - lastActivityTime) > STALE_LOCK_THRESHOLD_MS) {
|
|
47
44
|
if (logger) logger.log('WARN', `[Dispatcher] 🧟 Breaking stale lock for ${taskName}.`);
|
|
48
45
|
return t;
|
|
49
46
|
}
|
|
50
47
|
return null;
|
|
51
48
|
}
|
|
52
|
-
|
|
53
|
-
//
|
|
54
|
-
if (data.status === 'COMPLETED') return null;
|
|
55
|
-
|
|
56
|
-
// 3. FAILED CHECK (Pass through to Route Splitter)
|
|
57
|
-
// We do NOT filter FAILED tasks here. We pass them to splitRoutes()
|
|
58
|
-
// which decides if they get promoted to High-Mem or dropped forever.
|
|
59
|
-
if (data.status === 'FAILED') {
|
|
60
|
-
return t;
|
|
61
|
-
}
|
|
49
|
+
// Note: We do NOT filter COMPLETED here anymore for Sweep.
|
|
50
|
+
// If the Orchestrator says it needs to run, we run it.
|
|
62
51
|
}
|
|
63
52
|
return t;
|
|
64
53
|
});
|
|
65
|
-
|
|
66
54
|
const results = await Promise.all(checkPromises);
|
|
67
55
|
return results.filter(t => t !== null);
|
|
68
56
|
}
|
|
@@ -276,7 +264,6 @@ async function handleSweepDispatch(config, dependencies, computationManifest, re
|
|
|
276
264
|
return { dispatched: 0 };
|
|
277
265
|
}
|
|
278
266
|
|
|
279
|
-
// [CRITICAL UPDATE] FILTER FOR SWEEP:
|
|
280
267
|
const validTasks = [];
|
|
281
268
|
for (const task of pending) {
|
|
282
269
|
const name = normalizeName(task.name);
|
|
@@ -286,28 +273,34 @@ async function handleSweepDispatch(config, dependencies, computationManifest, re
|
|
|
286
273
|
if (doc.exists) {
|
|
287
274
|
const data = doc.data();
|
|
288
275
|
|
|
289
|
-
|
|
276
|
+
// 1. ACTIVE CHECK: Don't double-dispatch if already running... UNLESS IT'S A ZOMBIE
|
|
290
277
|
if (['PENDING', 'IN_PROGRESS'].includes(data.status)) {
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
}
|
|
278
|
+
const lastActivity = data.telemetry?.lastHeartbeat
|
|
279
|
+
? new Date(data.telemetry.lastHeartbeat).getTime()
|
|
280
|
+
: (data.startedAt ? new Date(data.startedAt).getTime() : 0);
|
|
295
281
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
282
|
+
// If it's been silent for > 15 mins, it's a Zombie. Kill it and Re-run.
|
|
283
|
+
if ((Date.now() - lastActivity) > STALE_LOCK_THRESHOLD_MS) {
|
|
284
|
+
logger.log('WARN', `[Sweep] 🧟 Found ZOMBIE lock for ${name}. Breaking lock and re-running.`);
|
|
285
|
+
// Don't continue; let it fall through to dispatch
|
|
286
|
+
} else {
|
|
287
|
+
logger.log('INFO', `[Sweep] ⏳ Skipping ${name} - Valid IN_PROGRESS.`);
|
|
288
|
+
continue;
|
|
289
|
+
}
|
|
304
290
|
}
|
|
291
|
+
|
|
292
|
+
// 2. COMPLETION CHECK (GHOST STATE FIX)
|
|
293
|
+
// We REMOVED the check that skips if (status === 'COMPLETED' && hash === task.hash).
|
|
294
|
+
// If we are here, 'analyzeDateExecution' (The Brain) decided this task is NOT done
|
|
295
|
+
// (likely due to a missing or outdated entry in computation_status).
|
|
296
|
+
// Even if the Ledger (The Log) says it finished, the system state is inconsistent.
|
|
297
|
+
// We MUST re-run to repair the Status Index.
|
|
305
298
|
|
|
306
299
|
const stage = data.error?.stage;
|
|
307
300
|
|
|
308
301
|
// 3. DETERMINISTIC FAILURE CHECK
|
|
309
302
|
if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
|
|
310
|
-
//
|
|
303
|
+
// If hash matches, it's the exact same code that failed before. Don't retry in loop.
|
|
311
304
|
if (data.hash === task.hash) {
|
|
312
305
|
logger.log('WARN', `[Sweep] 🛑 Skipping deterministic failure for ${name} (${stage}).`);
|
|
313
306
|
continue;
|
|
@@ -315,10 +308,13 @@ async function handleSweepDispatch(config, dependencies, computationManifest, re
|
|
|
315
308
|
logger.log('INFO', `[Sweep] 🔄 Code Updated for ${name}. Retrying sweep despite previous ${stage}.`);
|
|
316
309
|
}
|
|
317
310
|
|
|
318
|
-
// 4. DEAD END CHECK
|
|
311
|
+
// 4. DEAD END CHECK (High Mem)
|
|
319
312
|
if (data.resourceTier === 'high-mem' && data.status === 'FAILED') {
|
|
320
|
-
|
|
321
|
-
|
|
313
|
+
// If code hasn't changed, don't hammer it.
|
|
314
|
+
if (data.hash === task.hash) {
|
|
315
|
+
logger.log('WARN', `[Sweep] 🛑 Skipping ${name} - Already failed on High-Mem.`);
|
|
316
|
+
continue;
|
|
317
|
+
}
|
|
322
318
|
}
|
|
323
319
|
}
|
|
324
320
|
validTasks.push(task);
|
|
@@ -345,11 +341,7 @@ async function handleSweepDispatch(config, dependencies, computationManifest, re
|
|
|
345
341
|
dispatchId: currentDispatchId,
|
|
346
342
|
triggerReason: 'SWEEP_RECOVERY',
|
|
347
343
|
resources: 'high-mem', // FORCE
|
|
348
|
-
traceContext: {
|
|
349
|
-
traceId: traceId,
|
|
350
|
-
spanId: spanId,
|
|
351
|
-
sampled: true
|
|
352
|
-
}
|
|
344
|
+
traceContext: { traceId, spanId, sampled: true }
|
|
353
345
|
};
|
|
354
346
|
});
|
|
355
347
|
|