bulltrackers-module 1.0.337 → 1.0.339

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,8 @@
1
1
  /**
2
2
  * FILENAME: computation-system/helpers/computation_dispatcher.js
3
3
  * PURPOSE: Sequential Cursor-Based Dispatcher.
4
- * BEHAVIOR: Dispatch -> Wait ETA -> Next Date.
5
- * UPDATED: Added "Sweep" Protocol for OOM recovery & High-Mem Verification.
6
- * UPDATED: Added checks to permanently skip Deterministic Failures (Quality Breakers).
4
+ * UPDATED: Enforces Strict One-Shot Policy (Standard -> HighMem -> Dead Letter).
5
+ * UPDATED: Prevents infinite loops by permanently ignoring deterministic failures.
7
6
  */
8
7
 
9
8
  const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
@@ -23,7 +22,7 @@ const STALE_LOCK_THRESHOLD_MS = 1000 * 60 * 15;
23
22
  // =============================================================================
24
23
  async function filterActiveTasks(db, date, pass, tasks, logger, forceRun = false) {
25
24
  if (!tasks || tasks.length === 0) return [];
26
- if (forceRun) return tasks; // Bypass check for Sweep Mode (Handled separately in Sweep logic)
25
+ if (forceRun) return tasks;
27
26
 
28
27
  const checkPromises = tasks.map(async (t) => {
29
28
  const taskName = normalizeName(t.name);
@@ -49,21 +48,14 @@ async function filterActiveTasks(db, date, pass, tasks, logger, forceRun = false
49
48
  return null;
50
49
  }
51
50
 
52
- // 2. GHOST CHECK (Debounce immediate re-runs)
53
- const isJustFinished = data.status === 'COMPLETED' &&
54
- data.completedAt &&
55
- (Date.now() - new Date(data.completedAt).getTime() < 60 * 1000);
56
-
57
- if (isJustFinished) return null;
51
+ // 2. COMPLETED CHECK (Ignore)
52
+ if (data.status === 'COMPLETED') return null;
58
53
 
59
- // 3. DETERMINISTIC FAILURE CHECK (Break Infinite Loops)
60
- // If the task failed due to Logic/Quality issues, never retry it automatically.
54
+ // 3. FAILED CHECK (Pass through to Route Splitter)
55
+ // We do NOT filter FAILED tasks here. We pass them to splitRoutes()
56
+ // which decides if they get promoted to High-Mem or dropped forever.
61
57
  if (data.status === 'FAILED') {
62
- const stage = data.error?.stage;
63
- if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
64
- if (logger) logger.log('WARN', `[Dispatcher] 🛑 Skipping deterministic failure for ${taskName} (${stage}).`);
65
- return null;
66
- }
58
+ return t;
67
59
  }
68
60
  }
69
61
  return t;
@@ -184,15 +176,12 @@ async function handlePassVerification(config, dependencies, computationManifest,
184
176
 
185
177
  const missingTasks = [];
186
178
 
187
- // Optimize: Batch fetch statuses if possible, but for now loop is safer for memory
188
- // In production, we might want p-limit here.
189
179
  for (const date of sessionDates) {
190
180
  const [dailyStatus, availability] = await Promise.all([
191
181
  fetchComputationStatus(date, config, dependencies),
192
182
  checkRootDataAvailability(date, config, dependencies, DEFINITIVE_EARLIEST_DATES)
193
183
  ]);
194
184
 
195
- // Need previous status for historical calcs
196
185
  let prevDailyStatus = null;
197
186
  if (calcsInPass.some(c => c.isHistorical)) {
198
187
  const prevD = new Date(date + 'T00:00:00Z');
@@ -202,12 +191,9 @@ async function handlePassVerification(config, dependencies, computationManifest,
202
191
 
203
192
  const report = analyzeDateExecution(date, calcsInPass, availability ? availability.status : {}, dailyStatus, manifestMap, prevDailyStatus);
204
193
 
205
- // We only care about Runnable (New) or ReRuns (Changed/Failed)
206
- // We ignore Blocked (impossible to run) and Impossible (permanent fail)
207
194
  const pending = [...report.runnable, ...report.reRuns];
208
195
 
209
196
  if (pending.length > 0) {
210
- // Calculate ETA
211
197
  const totalWeight = pending.reduce((sum, t) => sum + (weightMap.get(normalizeName(t.name)) || 1.0), 0);
212
198
  const eta = Math.max(30, Math.ceil(totalWeight * BASE_SECONDS_PER_WEIGHT_UNIT));
213
199
 
@@ -244,7 +230,6 @@ async function handleSweepDispatch(config, dependencies, computationManifest, re
244
230
  checkRootDataAvailability(date, config, dependencies, DEFINITIVE_EARLIEST_DATES)
245
231
  ]);
246
232
 
247
- // Previous Status Fetch (simplified for brevity, assume historical dependency check works or fails safe)
248
233
  let prevDailyStatus = null;
249
234
  if (calcsInPass.some(c => c.isHistorical)) {
250
235
  const prevD = new Date(date + 'T00:00:00Z');
@@ -260,8 +245,8 @@ async function handleSweepDispatch(config, dependencies, computationManifest, re
260
245
  return { dispatched: 0 };
261
246
  }
262
247
 
263
- // [FIX] Filter out deterministic failures from Sweep to prevent loops
264
- // Sweep is for OOM recovery. Quality failures will fail on High-Mem too.
248
+ // [CRITICAL] FILTER FOR SWEEP:
249
+ // Only dispatch if it hasn't failed High-Mem or Quality checks.
265
250
  const validTasks = [];
266
251
  for (const task of pending) {
267
252
  const name = normalizeName(task.name);
@@ -270,21 +255,28 @@ async function handleSweepDispatch(config, dependencies, computationManifest, re
270
255
  if (doc.exists) {
271
256
  const data = doc.data();
272
257
  const stage = data.error?.stage;
273
- if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
258
+
259
+ // A. QUALITY CHECK: If it failed logic, DO NOT RETRY.
260
+ if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
274
261
  logger.log('WARN', `[Sweep] 🛑 Skipping deterministic failure for ${name} (${stage}).`);
275
262
  continue;
276
263
  }
264
+
265
+ // B. DEAD END CHECK: If it failed High-Mem already, DO NOT RETRY.
266
+ if (data.resourceTier === 'high-mem' && data.status === 'FAILED') {
267
+ logger.log('WARN', `[Sweep] 🛑 Skipping ${name} - Already failed on High-Mem.`);
268
+ continue;
269
+ }
277
270
  }
278
271
  validTasks.push(task);
279
272
  }
280
273
 
281
274
  if (validTasks.length === 0) {
282
- logger.log('INFO', `[Sweep] ${date} only has deterministic failures. No dispatch.`);
275
+ logger.log('INFO', `[Sweep] ${date} has no retryable tasks. Ignoring.`);
283
276
  return { dispatched: 0 };
284
277
  }
285
278
 
286
- // 2. FORCE High Mem & Skip Zombie Check
287
- // We use validTasks now
279
+ // 2. FORCE High Mem for remaining valid tasks
288
280
  const currentDispatchId = crypto.randomUUID();
289
281
 
290
282
  const tasksPayload = validTasks.map(t => ({
@@ -367,8 +359,9 @@ async function handleStandardDispatch(config, dependencies, computationManifest,
367
359
  }
368
360
 
369
361
  if (selectedTasks.length > 0) {
370
- const reroutes = await getHighMemReroutes(db, selectedDate, passToRun, selectedTasks);
371
- if (reroutes.length > 0) selectedTasks = reroutes;
362
+ // Split Logic: Moves OOMs to High-Mem, drops dead letters
363
+ const { standard, highMem } = await splitRoutes(db, selectedDate, passToRun, selectedTasks, logger);
364
+ selectedTasks = [...standard, ...highMem];
372
365
  }
373
366
  }
374
367
  }
@@ -428,21 +421,58 @@ async function handleStandardDispatch(config, dependencies, computationManifest,
428
421
  };
429
422
  }
430
423
 
431
- async function getHighMemReroutes(db, date, pass, tasks) {
432
- const reroutes = [];
424
+ // =============================================================================
425
+ // HELPER: Route Splitting (One-Shot Enforcement)
426
+ // =============================================================================
427
+ async function splitRoutes(db, date, pass, tasks, logger) {
428
+ const standard = [];
429
+ const highMem = [];
430
+
433
431
  for (const task of tasks) {
434
432
  const name = normalizeName(task.name);
435
433
  const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${name}`;
436
434
  const doc = await db.doc(ledgerPath).get();
437
- if (doc.exists) {
438
- const data = doc.data();
439
- const isOOM = (data.status === 'FAILED' || data.status === 'CRASH') &&
440
- (data.resourceTier !== 'high-mem') &&
441
- ((data.peakMemoryMB > OOM_THRESHOLD_MB) || (data.error && /memory/i.test(data.error.message)));
442
- if (isOOM) reroutes.push({ ...task, resources: 'high-mem' });
435
+
436
+ if (!doc.exists) {
437
+ // New task -> Standard
438
+ standard.push(task);
439
+ continue;
440
+ }
441
+
442
+ const data = doc.data();
443
+
444
+ // If it FAILED, we check if we can escalate it.
445
+ if (data.status === 'FAILED') {
446
+ const stage = data.error?.stage;
447
+
448
+ // 1. QUALITY / LOGIC FAIL: Dead Letter (Drop it)
449
+ if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE'].includes(stage)) {
450
+ logger.log('WARN', `[Dispatcher] 🛑 Dropping ${name} - Deterministic Failure (${stage}).`);
451
+ continue;
452
+ }
453
+
454
+ // 2. PREVIOUSLY HIGH MEM FAIL: Dead Letter (Drop it)
455
+ if (data.resourceTier === 'high-mem') {
456
+ logger.log('WARN', `[Dispatcher] 🛑 Dropping ${name} - Failed on High-Mem already.`);
457
+ continue;
458
+ }
459
+
460
+ // 3. STANDARD FAIL (Crash/OOM): Promote to High Mem (Retry)
461
+ // If it failed standard, we give it ONE shot on high-mem.
462
+ // Note: Even if it was an "Unknown" error, we promote to High-Mem to cover OOMs that looked like crashes.
463
+ highMem.push({
464
+ ...task,
465
+ resources: 'high-mem',
466
+ reason: `Retry: ${data.error?.message || 'Standard Failure'}`
467
+ });
468
+
469
+ } else {
470
+ // If status is not FAILED (e.g. was Zombie and filterActiveTasks passed it), retry Standard.
471
+ standard.push(task);
443
472
  }
444
473
  }
445
- return reroutes;
474
+
475
+ return { standard, highMem };
446
476
  }
447
477
 
448
478
  module.exports = { dispatchComputationPass };
@@ -1,7 +1,10 @@
1
1
  /**
2
2
  * FILENAME: computation-system/helpers/computation_worker.js
3
- * UPDATED: Fixed Firestore 'undefined' field error for dispatchId.
4
- * UPDATED: Writes structured Error objects (with stage) to Ledger to prevent retry loops.
3
+ * UPDATED: Implemented Strict Idempotency Gate (The "One-Shot" Policy).
4
+ * BEHAVIOR:
5
+ * 1. Checks Ledger via Transaction before execution.
6
+ * 2. If Status is COMPLETED or FAILED, immediately ACKs (returns) to stop Pub/Sub loops.
7
+ * 3. Preserves Error 'stage' to ensure logic errors are not retried.
5
8
  */
6
9
 
7
10
  const { executeDispatchTask } = require('../WorkflowOrchestrator.js');
@@ -28,6 +31,62 @@ function startMemoryHeartbeat(db, ledgerPath, intervalMs = 2000) {
28
31
  return { timer, getPeak: () => peakRss };
29
32
  }
30
33
 
34
+ /**
35
+ * STRICT IDEMPOTENCY GATE
36
+ * Uses a transaction to verify this task hasn't already been processed.
37
+ * Returns { shouldRun: boolean, leaseData: object }
38
+ */
39
+ async function checkIdempotencyAndClaimLease(db, ledgerPath, dispatchId, workerId) {
40
+ const docRef = db.doc(ledgerPath);
41
+
42
+ try {
43
+ return await db.runTransaction(async (t) => {
44
+ const doc = await t.get(docRef);
45
+
46
+ if (doc.exists) {
47
+ const data = doc.data();
48
+
49
+ // 1. TERMINAL STATE CHECK
50
+ // If the task is already finalized, we MUST NOT run it again.
51
+ // This stops the infinite loop if Pub/Sub redelivers a FAILED task.
52
+ if (['COMPLETED', 'FAILED', 'CRASH'].includes(data.status)) {
53
+ return { shouldRun: false, reason: `Task already in terminal state: ${data.status}` };
54
+ }
55
+
56
+ // 2. DUPLICATE DELIVERY CHECK
57
+ // If it's IN_PROGRESS with the SAME dispatchId, we are likely seeing a Pub/Sub redelivery
58
+ // while the code is actually running. Ignore it.
59
+ if (data.status === 'IN_PROGRESS' && data.dispatchId === dispatchId) {
60
+ return { shouldRun: false, reason: 'Duplicate delivery: Task already IN_PROGRESS with same ID.' };
61
+ }
62
+
63
+ // 3. ZOMBIE CHECK (Optional safety, usually handled by Dispatcher)
64
+ // If it's IN_PROGRESS but clearly stale (older than 15 mins), we might steal it,
65
+ // but generally we let the Dispatcher handle zombies. For now, we block collision.
66
+ if (data.status === 'IN_PROGRESS') {
67
+ return { shouldRun: false, reason: 'Collision: Task currently IN_PROGRESS by another worker.' };
68
+ }
69
+ }
70
+
71
+ // 4. CLAIM LEASE
72
+ // If we get here, the task is either new (doesn't exist) or PENDING.
73
+ const lease = {
74
+ status: 'IN_PROGRESS',
75
+ workerId: workerId,
76
+ dispatchId: dispatchId || 'unknown',
77
+ startedAt: new Date()
78
+ };
79
+
80
+ t.set(docRef, lease, { merge: true });
81
+ return { shouldRun: true, leaseData: lease };
82
+ });
83
+ } catch (e) {
84
+ console.error(`[Idempotency] Transaction failed: ${e.message}`);
85
+ // If transaction fails (contention), assume we shouldn't run
86
+ return { shouldRun: false, reason: `Transaction Error: ${e.message}` };
87
+ }
88
+ }
89
+
31
90
  async function handleComputationTask(message, config, dependencies) {
32
91
  const logger = new StructuredLogger({ minLevel: config.minLevel || 'INFO', enableStructured: true, ...config });
33
92
  const runDeps = { ...dependencies, logger };
@@ -45,17 +104,19 @@ async function handleComputationTask(message, config, dependencies) {
45
104
  const resourceTier = resources || 'standard';
46
105
  const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${computation}`;
47
106
 
48
- logger.log('INFO', `[Worker] 📥 Task: ${computation} (${date}) [Tier: ${resourceTier}]`);
107
+ const workerId = process.env.K_REVISION || os.hostname();
49
108
 
50
- // [FIX] Build document object and only add dispatchId if it is defined
51
- const leaseData = {
52
- status: 'IN_PROGRESS',
53
- workerId: process.env.K_REVISION || os.hostname(),
54
- startedAt: new Date()
55
- };
56
- if (dispatchId) leaseData.dispatchId = dispatchId;
109
+ // --- STEP 1: IDEMPOTENCY CHECK ---
110
+ const gate = await checkIdempotencyAndClaimLease(db, ledgerPath, dispatchId, workerId);
111
+
112
+ if (!gate.shouldRun) {
113
+ // [CRITICAL] We return successfully (ACK) to remove the message from Pub/Sub.
114
+ // We do NOT throw an error, because that would cause a retry.
115
+ logger.log('WARN', `[Worker] 🛑 Idempotency Gate: Skipping ${computation}. Reason: ${gate.reason}`);
116
+ return;
117
+ }
57
118
 
58
- await db.doc(ledgerPath).set(leaseData, { merge: true });
119
+ logger.log('INFO', `[Worker] 📥 Task: ${computation} (${date}) [Tier: ${resourceTier}] [ID: ${dispatchId}]`);
59
120
 
60
121
  const heartbeat = startMemoryHeartbeat(db, ledgerPath);
61
122
 
@@ -72,7 +133,13 @@ async function handleComputationTask(message, config, dependencies) {
72
133
  const failureReport = result?.updates?.failureReport || [];
73
134
  const successUpdates = result?.updates?.successUpdates || {};
74
135
 
75
- if (failureReport.length > 0) throw new Error(failureReport[0].error.message);
136
+ // [CRITICAL] Propagate Error Stage from inner logic
137
+ if (failureReport.length > 0) {
138
+ const reportedError = failureReport[0].error;
139
+ const errorObj = new Error(reportedError.message);
140
+ errorObj.stage = reportedError.stage;
141
+ throw errorObj;
142
+ }
76
143
 
77
144
  const calcUpdate = successUpdates[normalizeName(computation)] || {};
78
145
  const metrics = {
@@ -90,16 +157,19 @@ async function handleComputationTask(message, config, dependencies) {
90
157
 
91
158
  } catch (err) {
92
159
  clearInterval(heartbeat.timer);
160
+
93
161
  const isDeterministic = ['SHARDING_LIMIT_EXCEEDED', 'QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE'].includes(err.stage);
94
-
162
+
163
+ // --- STEP 2: ERROR HANDLING ---
164
+ // If Logic Error OR Max Retries reached, mark FAILED and ACK.
95
165
  if (isDeterministic || (message.deliveryAttempt || 1) >= MAX_RETRIES) {
96
- // [FIX] Write structured error payload so Dispatcher can see the 'stage'
97
- // This prevents the Dispatcher from retrying Quality Broken tasks.
166
+
98
167
  const errorPayload = {
99
168
  message: err.message,
100
169
  stage: err.stage || 'FATAL'
101
170
  };
102
171
 
172
+ // This write ensures the Idempotency Gate blocks future retries
103
173
  await db.doc(ledgerPath).set({
104
174
  status: 'FAILED',
105
175
  error: errorPayload,
@@ -107,8 +177,10 @@ async function handleComputationTask(message, config, dependencies) {
107
177
  }, { merge: true });
108
178
 
109
179
  await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', { message: err.message, stage: err.stage || 'FATAL' }, { peakMemoryMB: heartbeat.getPeak() }, triggerReason, resourceTier);
110
- return;
180
+ return; // ACK
111
181
  }
182
+
183
+ // Only throw (NACK) for transient system errors (Network, etc)
112
184
  throw err;
113
185
  }
114
186
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.337",
3
+ "version": "1.0.339",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [