npm - bulltrackers-module - Versions diffs - 1.0.337 → 1.0.339 - Mend

bulltrackers-module 1.0.337 → 1.0.339

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/functions/computation-system/helpers/computation_dispatcher.js CHANGED Viewed

@@ -1,9 +1,8 @@
 /**
  * FILENAME: computation-system/helpers/computation_dispatcher.js
  * PURPOSE: Sequential Cursor-Based Dispatcher.
- * BEHAVIOR: Dispatch -> Wait ETA -> Next Date.
- * UPDATED: Added "Sweep" Protocol for OOM recovery & High-Mem Verification.
- * UPDATED: Added checks to permanently skip Deterministic Failures (Quality Breakers).
+ * UPDATED: Enforces Strict One-Shot Policy (Standard -> HighMem -> Dead Letter).
+ * UPDATED: Prevents infinite loops by permanently ignoring deterministic failures.
  */
 const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
@@ -23,7 +22,7 @@ const STALE_LOCK_THRESHOLD_MS = 1000 * 60 * 15;
 // =============================================================================
 async function filterActiveTasks(db, date, pass, tasks, logger, forceRun = false) {
     if (!tasks || tasks.length === 0) return [];
-    if (forceRun) return tasks; // Bypass check for Sweep Mode (Handled separately in Sweep logic)
+    if (forceRun) return tasks;
     const checkPromises = tasks.map(async (t) => {
         const taskName = normalizeName(t.name);
@@ -49,21 +48,14 @@ async function filterActiveTasks(db, date, pass, tasks, logger, forceRun = false
                 return null;
             }
-            // 2. GHOST CHECK (Debounce immediate re-runs)
-            const isJustFinished = data.status === 'COMPLETED' &&
-                                   data.completedAt &&
-                                   (Date.now() - new Date(data.completedAt).getTime() < 60 * 1000);
-            if (isJustFinished) return null;
+            // 2. COMPLETED CHECK (Ignore)
+            if (data.status === 'COMPLETED') return null;
-            // 3. DETERMINISTIC FAILURE CHECK (Break Infinite Loops)
-            // If the task failed due to Logic/Quality issues, never retry it automatically.
+            // 3. FAILED CHECK (Pass through to Route Splitter)
+            // We do NOT filter FAILED tasks here. We pass them to splitRoutes()
+            // which decides if they get promoted to High-Mem or dropped forever.
             if (data.status === 'FAILED') {
-                 const stage = data.error?.stage;
-                 if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
-                     if (logger) logger.log('WARN', `[Dispatcher] 🛑 Skipping deterministic failure for ${taskName} (${stage}).`);
-                     return null;
-                 }
+                 return t;
             }
         }
         return t;
@@ -184,15 +176,12 @@ async function handlePassVerification(config, dependencies, computationManifest,
     const missingTasks = [];
-    // Optimize: Batch fetch statuses if possible, but for now loop is safer for memory
-    // In production, we might want p-limit here.
     for (const date of sessionDates) {
         const [dailyStatus, availability] = await Promise.all([
             fetchComputationStatus(date, config, dependencies),
             checkRootDataAvailability(date, config, dependencies, DEFINITIVE_EARLIEST_DATES)
         ]);
-        // Need previous status for historical calcs
         let prevDailyStatus = null;
         if (calcsInPass.some(c => c.isHistorical)) {
              const prevD = new Date(date + 'T00:00:00Z');
@@ -202,12 +191,9 @@ async function handlePassVerification(config, dependencies, computationManifest,
         const report = analyzeDateExecution(date, calcsInPass, availability ? availability.status : {}, dailyStatus, manifestMap, prevDailyStatus);
-        // We only care about Runnable (New) or ReRuns (Changed/Failed)
-        // We ignore Blocked (impossible to run) and Impossible (permanent fail)
         const pending = [...report.runnable, ...report.reRuns];
         if (pending.length > 0) {
-            // Calculate ETA
             const totalWeight = pending.reduce((sum, t) => sum + (weightMap.get(normalizeName(t.name)) || 1.0), 0);
             const eta = Math.max(30, Math.ceil(totalWeight * BASE_SECONDS_PER_WEIGHT_UNIT));
@@ -244,7 +230,6 @@ async function handleSweepDispatch(config, dependencies, computationManifest, re
         checkRootDataAvailability(date, config, dependencies, DEFINITIVE_EARLIEST_DATES)
     ]);
-    // Previous Status Fetch (simplified for brevity, assume historical dependency check works or fails safe)
     let prevDailyStatus = null;
     if (calcsInPass.some(c => c.isHistorical)) {
          const prevD = new Date(date + 'T00:00:00Z');
@@ -260,8 +245,8 @@ async function handleSweepDispatch(config, dependencies, computationManifest, re
         return { dispatched: 0 };
     }
-    // [FIX] Filter out deterministic failures from Sweep to prevent loops
-    // Sweep is for OOM recovery. Quality failures will fail on High-Mem too.
+    // [CRITICAL] FILTER FOR SWEEP:
+    // Only dispatch if it hasn't failed High-Mem or Quality checks.
     const validTasks = [];
     for (const task of pending) {
         const name = normalizeName(task.name);
@@ -270,21 +255,28 @@ async function handleSweepDispatch(config, dependencies, computationManifest, re
         if (doc.exists) {
             const data = doc.data();
             const stage = data.error?.stage;
-             if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
+            // A. QUALITY CHECK: If it failed logic, DO NOT RETRY.
+            if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
                  logger.log('WARN', `[Sweep] 🛑 Skipping deterministic failure for ${name} (${stage}).`);
                  continue;
              }
+             // B. DEAD END CHECK: If it failed High-Mem already, DO NOT RETRY.
+             if (data.resourceTier === 'high-mem' && data.status === 'FAILED') {
+                 logger.log('WARN', `[Sweep] 🛑 Skipping ${name} - Already failed on High-Mem.`);
+                 continue;
+             }
         }
         validTasks.push(task);
     }
     if (validTasks.length === 0) {
-        logger.log('INFO', `[Sweep] ${date} only has deterministic failures. No dispatch.`);
+        logger.log('INFO', `[Sweep] ${date} has no retryable tasks. Ignoring.`);
         return { dispatched: 0 };
     }
-    // 2. FORCE High Mem & Skip Zombie Check
-    // We use validTasks now
+    // 2. FORCE High Mem for remaining valid tasks
     const currentDispatchId = crypto.randomUUID();
     const tasksPayload = validTasks.map(t => ({
@@ -367,8 +359,9 @@ async function handleStandardDispatch(config, dependencies, computationManifest,
             }
             if (selectedTasks.length > 0) {
-                const reroutes = await getHighMemReroutes(db, selectedDate, passToRun, selectedTasks);
-                if (reroutes.length > 0) selectedTasks = reroutes;
+                // Split Logic: Moves OOMs to High-Mem, drops dead letters
+                const { standard, highMem } = await splitRoutes(db, selectedDate, passToRun, selectedTasks, logger);
+                selectedTasks = [...standard, ...highMem];
             }
         }
     }
@@ -428,21 +421,58 @@ async function handleStandardDispatch(config, dependencies, computationManifest,
     };
 }
-async function getHighMemReroutes(db, date, pass, tasks) {
-    const reroutes = [];
+// =============================================================================
+// HELPER: Route Splitting (One-Shot Enforcement)
+// =============================================================================
+async function splitRoutes(db, date, pass, tasks, logger) {
+    const standard = [];
+    const highMem = [];
     for (const task of tasks) {
         const name = normalizeName(task.name);
         const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${name}`;
         const doc = await db.doc(ledgerPath).get();
-        if (doc.exists) {
-            const data = doc.data();
-            const isOOM = (data.status === 'FAILED' || data.status === 'CRASH') &&
-                          (data.resourceTier !== 'high-mem') &&
-                          ((data.peakMemoryMB > OOM_THRESHOLD_MB) || (data.error && /memory/i.test(data.error.message)));
-            if (isOOM) reroutes.push({ ...task, resources: 'high-mem' });
+        if (!doc.exists) {
+            // New task -> Standard
+            standard.push(task);
+            continue;
+        }
+        const data = doc.data();
+        // If it FAILED, we check if we can escalate it.
+        if (data.status === 'FAILED') {
+            const stage = data.error?.stage;
+            // 1. QUALITY / LOGIC FAIL: Dead Letter (Drop it)
+            if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE'].includes(stage)) {
+                logger.log('WARN', `[Dispatcher] 🛑 Dropping ${name} - Deterministic Failure (${stage}).`);
+                continue;
+            }
+            // 2. PREVIOUSLY HIGH MEM FAIL: Dead Letter (Drop it)
+            if (data.resourceTier === 'high-mem') {
+                logger.log('WARN', `[Dispatcher] 🛑 Dropping ${name} - Failed on High-Mem already.`);
+                continue;
+            }
+            // 3. STANDARD FAIL (Crash/OOM): Promote to High Mem (Retry)
+            // If it failed standard, we give it ONE shot on high-mem.
+            // Note: Even if it was an "Unknown" error, we promote to High-Mem to cover OOMs that looked like crashes.
+            highMem.push({
+                ...task,
+                resources: 'high-mem',
+                reason: `Retry: ${data.error?.message || 'Standard Failure'}`
+            });
+        } else {
+            // If status is not FAILED (e.g. was Zombie and filterActiveTasks passed it), retry Standard.
+            standard.push(task);
         }
     }
-    return reroutes;
+    return { standard, highMem };
 }
 module.exports = { dispatchComputationPass };

package/functions/computation-system/helpers/computation_worker.js CHANGED Viewed

@@ -1,7 +1,10 @@
 /**
  * FILENAME: computation-system/helpers/computation_worker.js
- * UPDATED: Fixed Firestore 'undefined' field error for dispatchId.
- * UPDATED: Writes structured Error objects (with stage) to Ledger to prevent retry loops.
+ * UPDATED: Implemented Strict Idempotency Gate (The "One-Shot" Policy).
+ * BEHAVIOR:
+ * 1. Checks Ledger via Transaction before execution.
+ * 2. If Status is COMPLETED or FAILED, immediately ACKs (returns) to stop Pub/Sub loops.
+ * 3. Preserves Error 'stage' to ensure logic errors are not retried.
  */
 const { executeDispatchTask } = require('../WorkflowOrchestrator.js');
@@ -28,6 +31,62 @@ function startMemoryHeartbeat(db, ledgerPath, intervalMs = 2000) {
     return { timer, getPeak: () => peakRss };
 }
+/**
+ * STRICT IDEMPOTENCY GATE
+ * Uses a transaction to verify this task hasn't already been processed.
+ * Returns { shouldRun: boolean, leaseData: object }
+ */
+async function checkIdempotencyAndClaimLease(db, ledgerPath, dispatchId, workerId) {
+    const docRef = db.doc(ledgerPath);
+    try {
+        return await db.runTransaction(async (t) => {
+            const doc = await t.get(docRef);
+            if (doc.exists) {
+                const data = doc.data();
+                // 1. TERMINAL STATE CHECK
+                // If the task is already finalized, we MUST NOT run it again.
+                // This stops the infinite loop if Pub/Sub redelivers a FAILED task.
+                if (['COMPLETED', 'FAILED', 'CRASH'].includes(data.status)) {
+                    return { shouldRun: false, reason: `Task already in terminal state: ${data.status}` };
+                }
+                // 2. DUPLICATE DELIVERY CHECK
+                // If it's IN_PROGRESS with the SAME dispatchId, we are likely seeing a Pub/Sub redelivery
+                // while the code is actually running. Ignore it.
+                if (data.status === 'IN_PROGRESS' && data.dispatchId === dispatchId) {
+                    return { shouldRun: false, reason: 'Duplicate delivery: Task already IN_PROGRESS with same ID.' };
+                }
+                // 3. ZOMBIE CHECK (Optional safety, usually handled by Dispatcher)
+                // If it's IN_PROGRESS but clearly stale (older than 15 mins), we might steal it,
+                // but generally we let the Dispatcher handle zombies. For now, we block collision.
+                if (data.status === 'IN_PROGRESS') {
+                    return { shouldRun: false, reason: 'Collision: Task currently IN_PROGRESS by another worker.' };
+                }
+            }
+            // 4. CLAIM LEASE
+            // If we get here, the task is either new (doesn't exist) or PENDING.
+            const lease = {
+                status: 'IN_PROGRESS',
+                workerId: workerId,
+                dispatchId: dispatchId || 'unknown',
+                startedAt: new Date()
+            };
+            t.set(docRef, lease, { merge: true });
+            return { shouldRun: true, leaseData: lease };
+        });
+    } catch (e) {
+        console.error(`[Idempotency] Transaction failed: ${e.message}`);
+        // If transaction fails (contention), assume we shouldn't run
+        return { shouldRun: false, reason: `Transaction Error: ${e.message}` };
+    }
+}
 async function handleComputationTask(message, config, dependencies) {
     const logger = new StructuredLogger({ minLevel: config.minLevel || 'INFO', enableStructured: true, ...config });
     const runDeps = { ...dependencies, logger };
@@ -45,17 +104,19 @@ async function handleComputationTask(message, config, dependencies) {
     const resourceTier = resources || 'standard';
     const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${computation}`;
-    logger.log('INFO', `[Worker] 📥 Task: ${computation} (${date}) [Tier: ${resourceTier}]`);
+    const workerId = process.env.K_REVISION || os.hostname();
-    // [FIX] Build document object and only add dispatchId if it is defined
-    const leaseData = {
-        status: 'IN_PROGRESS',
-        workerId: process.env.K_REVISION || os.hostname(),
-        startedAt: new Date()
-    };
-    if (dispatchId) leaseData.dispatchId = dispatchId;
+    // --- STEP 1: IDEMPOTENCY CHECK ---
+    const gate = await checkIdempotencyAndClaimLease(db, ledgerPath, dispatchId, workerId);
+    if (!gate.shouldRun) {
+        // [CRITICAL] We return successfully (ACK) to remove the message from Pub/Sub.
+        // We do NOT throw an error, because that would cause a retry.
+        logger.log('WARN', `[Worker] 🛑 Idempotency Gate: Skipping ${computation}. Reason: ${gate.reason}`);
+        return;
+    }
-    await db.doc(ledgerPath).set(leaseData, { merge: true });
+    logger.log('INFO', `[Worker] 📥 Task: ${computation} (${date}) [Tier: ${resourceTier}] [ID: ${dispatchId}]`);
     const heartbeat = startMemoryHeartbeat(db, ledgerPath);
@@ -72,7 +133,13 @@ async function handleComputationTask(message, config, dependencies) {
         const failureReport = result?.updates?.failureReport || [];
         const successUpdates = result?.updates?.successUpdates || {};
-        if (failureReport.length > 0) throw new Error(failureReport[0].error.message);
+        // [CRITICAL] Propagate Error Stage from inner logic
+        if (failureReport.length > 0) {
+            const reportedError = failureReport[0].error;
+            const errorObj = new Error(reportedError.message);
+            errorObj.stage = reportedError.stage;
+            throw errorObj;
+        }
         const calcUpdate = successUpdates[normalizeName(computation)] || {};
         const metrics = {
@@ -90,16 +157,19 @@ async function handleComputationTask(message, config, dependencies) {
     } catch (err) {
         clearInterval(heartbeat.timer);
         const isDeterministic = ['SHARDING_LIMIT_EXCEEDED', 'QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE'].includes(err.stage);
+        // --- STEP 2: ERROR HANDLING ---
+        // If Logic Error OR Max Retries reached, mark FAILED and ACK.
         if (isDeterministic || (message.deliveryAttempt || 1) >= MAX_RETRIES) {
-            // [FIX] Write structured error payload so Dispatcher can see the 'stage'
-            // This prevents the Dispatcher from retrying Quality Broken tasks.
             const errorPayload = {
                 message: err.message,
                 stage: err.stage || 'FATAL'
             };
+            // This write ensures the Idempotency Gate blocks future retries
             await db.doc(ledgerPath).set({
                 status: 'FAILED',
                 error: errorPayload,
@@ -107,8 +177,10 @@ async function handleComputationTask(message, config, dependencies) {
             }, { merge: true });
             await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', { message: err.message, stage: err.stage || 'FATAL' }, { peakMemoryMB: heartbeat.getPeak() }, triggerReason, resourceTier);
-            return;
+            return; // ACK
         }
+        // Only throw (NACK) for transient system errors (Network, etc)
         throw err;
     }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bulltrackers-module",
-  "version": "1.0.337",
+  "version": "1.0.339",
   "description": "Helper Functions for Bulltrackers.",
   "main": "index.js",
   "files": [