bulltrackers-module 1.0.283 → 1.0.284
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* FILENAME: computation-system/helpers/computation_dispatcher.js
|
|
3
|
-
* PURPOSE: "Smart Dispatcher" - Analyzes state and
|
|
4
|
-
* UPDATED:
|
|
3
|
+
* PURPOSE: "Smart Dispatcher" - Analyzes state and dispatches tasks.
|
|
4
|
+
* UPDATED: Removed "Zombie" logic. Now forcefully dispatches any task
|
|
5
|
+
* that is not explicitly COMPLETED, ensuring reliability for one-shot execution.
|
|
5
6
|
*/
|
|
6
7
|
|
|
7
8
|
const { getExpectedDateStrings, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
|
|
@@ -116,7 +117,6 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
116
117
|
hash: item.hash || item.newHash,
|
|
117
118
|
previousCategory: item.previousCategory || null,
|
|
118
119
|
triggerReason: item.reason || "Unknown",
|
|
119
|
-
// [NEW] Pass Content-Based hashes provided by analyzeDateExecution
|
|
120
120
|
dependencyResultHashes: item.dependencyResultHashes || {},
|
|
121
121
|
timestamp: Date.now()
|
|
122
122
|
});
|
|
@@ -142,21 +142,25 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
142
142
|
await db.runTransaction(async (t) => {
|
|
143
143
|
const doc = await t.get(ledgerRef);
|
|
144
144
|
|
|
145
|
-
// [
|
|
145
|
+
// [UPDATED] Robust One-Shot Dispatch Logic
|
|
146
|
+
// We REMOVED the "Zombie Timeout" check.
|
|
147
|
+
// If the Dispatcher is running, we assume the user intends to ensure these tasks are dispatched.
|
|
148
|
+
|
|
146
149
|
if (doc.exists) {
|
|
147
150
|
const data = doc.data();
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
// Fallback: If no lease exists, assume 1 hour timeout for legacy zombie detection
|
|
153
|
-
const isLegacyZombie = !data.leaseExpiresAt && data.createdAt && (now - data.createdAt.toMillis() > 3600000);
|
|
154
|
-
|
|
155
|
-
if (isPending && !isLeaseExpired && !isLegacyZombie) {
|
|
156
|
-
return false; // Valid active pending task, do not double dispatch
|
|
151
|
+
|
|
152
|
+
// 1. If it's already COMPLETED, do not re-run (Strict idempotency).
|
|
153
|
+
if (data.status === 'COMPLETED') {
|
|
154
|
+
return false;
|
|
157
155
|
}
|
|
156
|
+
|
|
157
|
+
// 2. If it is PENDING or IN_PROGRESS:
|
|
158
|
+
// Since the Dispatcher runs ONCE per day, seeing PENDING here means
|
|
159
|
+
// the *previous* run failed to complete, or the worker died.
|
|
160
|
+
// We overwrite it to force a restart.
|
|
158
161
|
}
|
|
159
162
|
|
|
163
|
+
// Create/Overwrite entry with PENDING to start the cycle
|
|
160
164
|
t.set(ledgerRef, {
|
|
161
165
|
status: 'PENDING',
|
|
162
166
|
dispatchId: task.dispatchId,
|
|
@@ -165,8 +169,9 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
165
169
|
createdAt: new Date(),
|
|
166
170
|
dispatcherHash: currentManifestHash,
|
|
167
171
|
triggerReason: task.triggerReason,
|
|
168
|
-
retries: 0
|
|
172
|
+
retries: 0 // Reset retries for the new attempt
|
|
169
173
|
}, { merge: true });
|
|
174
|
+
|
|
170
175
|
return true;
|
|
171
176
|
});
|
|
172
177
|
|
|
@@ -191,7 +196,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
191
196
|
|
|
192
197
|
return { dispatched: finalDispatched.length };
|
|
193
198
|
} else {
|
|
194
|
-
logger.log('INFO', `[Dispatcher] All tasks were already
|
|
199
|
+
logger.log('INFO', `[Dispatcher] All tasks were already COMPLETED (Double Dispatch avoided).`);
|
|
195
200
|
return { dispatched: 0 };
|
|
196
201
|
}
|
|
197
202
|
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* FILENAME: computation-system/helpers/computation_worker.js
|
|
3
3
|
* PURPOSE: Consumes computation tasks from Pub/Sub.
|
|
4
|
-
* UPDATED:
|
|
4
|
+
* UPDATED: Fixed "Silent Failure" bug where tasks got stuck in PENDING.
|
|
5
|
+
* Increased MAX_RETRIES and ensured Ledger is updated on poison messages.
|
|
5
6
|
*/
|
|
6
7
|
|
|
7
8
|
const { executeDispatchTask } = require('../WorkflowOrchestrator.js');
|
|
@@ -13,7 +14,10 @@ let calculationPackage;
|
|
|
13
14
|
try { calculationPackage = require('aiden-shared-calculations-unified');
|
|
14
15
|
} catch (e) {console.error("FATAL: Could not load 'aiden-shared-calculations-unified'."); throw e; }
|
|
15
16
|
const calculations = calculationPackage.calculations;
|
|
16
|
-
|
|
17
|
+
|
|
18
|
+
// [FIX] Increased from 0 to 3.
|
|
19
|
+
// 0 caused "retryCount >= MAX_RETRIES" to trigger immediately on the first run.
|
|
20
|
+
const MAX_RETRIES = 3;
|
|
17
21
|
|
|
18
22
|
async function handleComputationTask(message, config, dependencies) {
|
|
19
23
|
const systemLogger = new StructuredLogger({ minLevel: config.minLevel || 'INFO', enableStructured: true, ...config });
|
|
@@ -35,24 +39,49 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
35
39
|
|
|
36
40
|
if (!date || !pass || !computation) { logger.log('ERROR', `[Worker] Invalid payload.`, data); return; }
|
|
37
41
|
|
|
38
|
-
|
|
42
|
+
// [FIX] Ensure retryCount defaults to 1 (PubSub usually sends 1 for the first attempt)
|
|
43
|
+
const retryCount = message.deliveryAttempt || 1;
|
|
44
|
+
|
|
45
|
+
// [FIX] Changed condition to '>' so attempts 1, 2, and 3 are allowed to run.
|
|
46
|
+
if (retryCount > MAX_RETRIES) {
|
|
47
|
+
logger.log('ERROR', `[Worker] ☠️ Task POISONED. Moved to DLQ: ${computation}`);
|
|
48
|
+
try {
|
|
49
|
+
await db.collection('computation_dead_letter_queue').add({
|
|
50
|
+
originalData: data,
|
|
51
|
+
dispatchId: dispatchId,
|
|
52
|
+
error: { message: 'Max Retries Exceeded', stack: 'PubSub delivery limit reached' },
|
|
53
|
+
finalAttemptAt: new Date(),
|
|
54
|
+
failureReason: 'MAX_RETRIES_EXCEEDED'
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
// [FIX] CRITICAL: Update Ledger to FAILED.
|
|
58
|
+
// Previously, this returned without updating, leaving the Ledger stuck in 'PENDING'.
|
|
59
|
+
// Now we explicitly mark it FAILED so the pipeline knows it's dead.
|
|
60
|
+
await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).set({
|
|
61
|
+
status: 'FAILED',
|
|
62
|
+
error: 'Max Retries Exceeded (Poison Message)',
|
|
63
|
+
failedAt: new Date()
|
|
64
|
+
}, { merge: true });
|
|
65
|
+
|
|
66
|
+
return;
|
|
67
|
+
} catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
logger.log('INFO', `[Worker] 📥 Received Task: ${computation} (${date}) [Attempt ${retryCount}/${MAX_RETRIES}]`, {
|
|
39
71
|
dispatchId: dispatchId || 'legacy',
|
|
40
72
|
reason: triggerReason
|
|
41
73
|
});
|
|
42
74
|
|
|
43
|
-
//
|
|
44
|
-
// Mark task as IN_PROGRESS and set a lease timeout (e.g., 20 minutes) to prevent Zombies
|
|
75
|
+
// Mark task as IN_PROGRESS (Visual only, dispatcher does not use this for logic anymore)
|
|
45
76
|
try {
|
|
46
|
-
const leaseTimeMs = (config.workerLeaseMinutes || 20) * 60 * 1000;
|
|
47
77
|
await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).set({
|
|
48
78
|
status: 'IN_PROGRESS',
|
|
49
79
|
workerId: process.env.K_REVISION || 'unknown',
|
|
50
80
|
startedAt: new Date(),
|
|
51
|
-
leaseExpiresAt: Date.now() + leaseTimeMs,
|
|
52
81
|
dispatchId: dispatchId
|
|
53
82
|
}, { merge: true });
|
|
54
83
|
} catch (leaseErr) {
|
|
55
|
-
logger.log('WARN', `[Worker] Failed to
|
|
84
|
+
logger.log('WARN', `[Worker] Failed to update status to IN_PROGRESS for ${computation}. Continuing...`, leaseErr);
|
|
56
85
|
}
|
|
57
86
|
|
|
58
87
|
let computationManifest;
|
|
@@ -73,7 +102,7 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
73
102
|
runDependencies,
|
|
74
103
|
computationManifest,
|
|
75
104
|
previousCategory,
|
|
76
|
-
dependencyResultHashes
|
|
105
|
+
dependencyResultHashes
|
|
77
106
|
);
|
|
78
107
|
const duration = Date.now() - startTime;
|
|
79
108
|
|
|
@@ -121,28 +150,30 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
121
150
|
finalAttemptAt: new Date(),
|
|
122
151
|
failureReason: 'PERMANENT_DETERMINISTIC_ERROR'
|
|
123
152
|
});
|
|
153
|
+
|
|
154
|
+
// [FIX] Update Ledger to FAILED immediately for deterministic errors
|
|
155
|
+
await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).set({
|
|
156
|
+
status: 'FAILED',
|
|
157
|
+
error: err.message || 'Permanent Deterministic Error',
|
|
158
|
+
failedAt: new Date()
|
|
159
|
+
}, { merge: true });
|
|
160
|
+
|
|
124
161
|
await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', { message: err.message, stage: err.stage || 'PERMANENT_FAIL' }, { durationMs: 0 }, triggerReason);
|
|
125
162
|
return;
|
|
126
163
|
} catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
|
|
127
164
|
}
|
|
128
165
|
|
|
129
|
-
|
|
166
|
+
// Standard Retryable Error (Crash)
|
|
130
167
|
if (retryCount >= MAX_RETRIES) {
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
originalData: data,
|
|
135
|
-
dispatchId: dispatchId,
|
|
136
|
-
error: { message: err.message, stack: err.stack },
|
|
137
|
-
finalAttemptAt: new Date(),
|
|
138
|
-
failureReason: 'MAX_RETRIES_EXCEEDED'
|
|
139
|
-
});
|
|
140
|
-
return;
|
|
141
|
-
} catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
|
|
168
|
+
// We throw here, PubSub will retry, and the "Poison Logic" at the top
|
|
169
|
+
// will catch it on the NEXT attempt to finalize the failure.
|
|
170
|
+
throw err;
|
|
142
171
|
}
|
|
143
172
|
|
|
144
173
|
logger.log('ERROR', `[Worker] ❌ Crash: ${computation}: ${err.message}`);
|
|
174
|
+
|
|
145
175
|
await recordRunAttempt(db, { date, computation, pass }, 'CRASH', { message: err.message, stack: err.stack, stage: 'SYSTEM_CRASH' }, { durationMs: 0 }, triggerReason);
|
|
176
|
+
// Throwing triggers Pub/Sub retry
|
|
146
177
|
throw err;
|
|
147
178
|
}
|
|
148
179
|
}
|