npm - bulltrackers-module - Versions diffs - 1.0.338 → 1.0.339 - Mend

bulltrackers-module 1.0.338 → 1.0.339

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/functions/computation-system/helpers/computation_dispatcher.js CHANGED Viewed

@@ -1,9 +1,8 @@
 /**
  * FILENAME: computation-system/helpers/computation_dispatcher.js
  * PURPOSE: Sequential Cursor-Based Dispatcher.
- * BEHAVIOR: Dispatch -> Wait ETA -> Next Date.
- * UPDATED: Added "Sweep" Protocol for OOM recovery & High-Mem Verification.
- * UPDATED: Added Safety Checks to permanently skip Deterministic Failures.
+ * UPDATED: Enforces Strict One-Shot Policy (Standard -> HighMem -> Dead Letter).
+ * UPDATED: Prevents infinite loops by permanently ignoring deterministic failures.
  */
 const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
@@ -23,7 +22,7 @@ const STALE_LOCK_THRESHOLD_MS = 1000 * 60 * 15;
 // =============================================================================
 async function filterActiveTasks(db, date, pass, tasks, logger, forceRun = false) {
     if (!tasks || tasks.length === 0) return [];
-    if (forceRun) return tasks; // Bypass check for Sweep Mode (Handled separately)
+    if (forceRun) return tasks;
     const checkPromises = tasks.map(async (t) => {
         const taskName = normalizeName(t.name);
@@ -49,21 +48,14 @@ async function filterActiveTasks(db, date, pass, tasks, logger, forceRun = false
                 return null;
             }
-            // 2. GHOST CHECK (Debounce immediate re-runs)
-            const isJustFinished = data.status === 'COMPLETED' &&
-                                   data.completedAt &&
-                                   (Date.now() - new Date(data.completedAt).getTime() < 60 * 1000);
-            if (isJustFinished) return null;
+            // 2. COMPLETED CHECK (Ignore)
+            if (data.status === 'COMPLETED') return null;
-            // 3. DETERMINISTIC FAILURE CHECK (Break Infinite Loops)
-            // If the task failed due to Logic/Quality issues, never retry it automatically.
+            // 3. FAILED CHECK (Pass through to Route Splitter)
+            // We do NOT filter FAILED tasks here. We pass them to splitRoutes()
+            // which decides if they get promoted to High-Mem or dropped forever.
             if (data.status === 'FAILED') {
-                 const stage = data.error?.stage;
-                 if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
-                     if (logger) logger.log('WARN', `[Dispatcher] 🛑 Skipping deterministic failure for ${taskName} (${stage}).`);
-                     return null;
-                 }
+                 return t;
             }
         }
         return t;
@@ -202,7 +194,6 @@ async function handlePassVerification(config, dependencies, computationManifest,
         const pending = [...report.runnable, ...report.reRuns];
         if (pending.length > 0) {
-            // Calculate ETA
             const totalWeight = pending.reduce((sum, t) => sum + (weightMap.get(normalizeName(t.name)) || 1.0), 0);
             const eta = Math.max(30, Math.ceil(totalWeight * BASE_SECONDS_PER_WEIGHT_UNIT));
@@ -254,8 +245,8 @@ async function handleSweepDispatch(config, dependencies, computationManifest, re
         return { dispatched: 0 };
     }
-    // [FIX] Filter out deterministic failures from Sweep.
-    // If it failed due to 'QUALITY_CIRCUIT_BREAKER', High-Mem won't fix it.
+    // [CRITICAL] FILTER FOR SWEEP:
+    // Only dispatch if it hasn't failed High-Mem or Quality checks.
     const validTasks = [];
     for (const task of pending) {
         const name = normalizeName(task.name);
@@ -264,20 +255,28 @@ async function handleSweepDispatch(config, dependencies, computationManifest, re
         if (doc.exists) {
             const data = doc.data();
             const stage = data.error?.stage;
-             if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
+            // A. QUALITY CHECK: If it failed logic, DO NOT RETRY.
+            if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
                  logger.log('WARN', `[Sweep] 🛑 Skipping deterministic failure for ${name} (${stage}).`);
                  continue;
              }
+             // B. DEAD END CHECK: If it failed High-Mem already, DO NOT RETRY.
+             if (data.resourceTier === 'high-mem' && data.status === 'FAILED') {
+                 logger.log('WARN', `[Sweep] 🛑 Skipping ${name} - Already failed on High-Mem.`);
+                 continue;
+             }
         }
         validTasks.push(task);
     }
     if (validTasks.length === 0) {
-        logger.log('INFO', `[Sweep] ${date} only has deterministic failures. No dispatch.`);
+        logger.log('INFO', `[Sweep] ${date} has no retryable tasks. Ignoring.`);
         return { dispatched: 0 };
     }
-    // 2. FORCE High Mem & Skip Zombie Check
+    // 2. FORCE High Mem for remaining valid tasks
     const currentDispatchId = crypto.randomUUID();
     const tasksPayload = validTasks.map(t => ({
@@ -360,8 +359,9 @@ async function handleStandardDispatch(config, dependencies, computationManifest,
             }
             if (selectedTasks.length > 0) {
-                const reroutes = await getHighMemReroutes(db, selectedDate, passToRun, selectedTasks);
-                if (reroutes.length > 0) selectedTasks = reroutes;
+                // Split Logic: Moves OOMs to High-Mem, drops dead letters
+                const { standard, highMem } = await splitRoutes(db, selectedDate, passToRun, selectedTasks, logger);
+                selectedTasks = [...standard, ...highMem];
             }
         }
     }
@@ -421,21 +421,58 @@ async function handleStandardDispatch(config, dependencies, computationManifest,
     };
 }
-async function getHighMemReroutes(db, date, pass, tasks) {
-    const reroutes = [];
+// =============================================================================
+// HELPER: Route Splitting (One-Shot Enforcement)
+// =============================================================================
+async function splitRoutes(db, date, pass, tasks, logger) {
+    const standard = [];
+    const highMem = [];
     for (const task of tasks) {
         const name = normalizeName(task.name);
         const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${name}`;
         const doc = await db.doc(ledgerPath).get();
-        if (doc.exists) {
-            const data = doc.data();
-            const isOOM = (data.status === 'FAILED' || data.status === 'CRASH') &&
-                          (data.resourceTier !== 'high-mem') &&
-                          ((data.peakMemoryMB > OOM_THRESHOLD_MB) || (data.error && /memory/i.test(data.error.message)));
-            if (isOOM) reroutes.push({ ...task, resources: 'high-mem' });
+        if (!doc.exists) {
+            // New task -> Standard
+            standard.push(task);
+            continue;
+        }
+        const data = doc.data();
+        // If it FAILED, we check if we can escalate it.
+        if (data.status === 'FAILED') {
+            const stage = data.error?.stage;
+            // 1. QUALITY / LOGIC FAIL: Dead Letter (Drop it)
+            if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE'].includes(stage)) {
+                logger.log('WARN', `[Dispatcher] 🛑 Dropping ${name} - Deterministic Failure (${stage}).`);
+                continue;
+            }
+            // 2. PREVIOUSLY HIGH MEM FAIL: Dead Letter (Drop it)
+            if (data.resourceTier === 'high-mem') {
+                logger.log('WARN', `[Dispatcher] 🛑 Dropping ${name} - Failed on High-Mem already.`);
+                continue;
+            }
+            // 3. STANDARD FAIL (Crash/OOM): Promote to High Mem (Retry)
+            // If it failed standard, we give it ONE shot on high-mem.
+            // Note: Even if it was an "Unknown" error, we promote to High-Mem to cover OOMs that looked like crashes.
+            highMem.push({
+                ...task,
+                resources: 'high-mem',
+                reason: `Retry: ${data.error?.message || 'Standard Failure'}`
+            });
+        } else {
+            // If status is not FAILED (e.g. was Zombie and filterActiveTasks passed it), retry Standard.
+            standard.push(task);
         }
     }
-    return reroutes;
+    return { standard, highMem };
 }
 module.exports = { dispatchComputationPass };

package/functions/computation-system/helpers/computation_worker.js CHANGED Viewed

@@ -1,7 +1,10 @@
 /**
  * FILENAME: computation-system/helpers/computation_worker.js
- * UPDATED: Fixed Error Propagation Bug. Preserves 'stage' property when re-throwing logic errors.
- * UPDATED: Fixed Firestore 'undefined' field error for dispatchId.
+ * UPDATED: Implemented Strict Idempotency Gate (The "One-Shot" Policy).
+ * BEHAVIOR:
+ * 1. Checks Ledger via Transaction before execution.
+ * 2. If Status is COMPLETED or FAILED, immediately ACKs (returns) to stop Pub/Sub loops.
+ * 3. Preserves Error 'stage' to ensure logic errors are not retried.
  */
 const { executeDispatchTask } = require('../WorkflowOrchestrator.js');
@@ -28,6 +31,62 @@ function startMemoryHeartbeat(db, ledgerPath, intervalMs = 2000) {
     return { timer, getPeak: () => peakRss };
 }
+/**
+ * STRICT IDEMPOTENCY GATE
+ * Uses a transaction to verify this task hasn't already been processed.
+ * Returns { shouldRun: boolean, leaseData: object }
+ */
+async function checkIdempotencyAndClaimLease(db, ledgerPath, dispatchId, workerId) {
+    const docRef = db.doc(ledgerPath);
+    try {
+        return await db.runTransaction(async (t) => {
+            const doc = await t.get(docRef);
+            if (doc.exists) {
+                const data = doc.data();
+                // 1. TERMINAL STATE CHECK
+                // If the task is already finalized, we MUST NOT run it again.
+                // This stops the infinite loop if Pub/Sub redelivers a FAILED task.
+                if (['COMPLETED', 'FAILED', 'CRASH'].includes(data.status)) {
+                    return { shouldRun: false, reason: `Task already in terminal state: ${data.status}` };
+                }
+                // 2. DUPLICATE DELIVERY CHECK
+                // If it's IN_PROGRESS with the SAME dispatchId, we are likely seeing a Pub/Sub redelivery
+                // while the code is actually running. Ignore it.
+                if (data.status === 'IN_PROGRESS' && data.dispatchId === dispatchId) {
+                    return { shouldRun: false, reason: 'Duplicate delivery: Task already IN_PROGRESS with same ID.' };
+                }
+                // 3. ZOMBIE CHECK (Optional safety, usually handled by Dispatcher)
+                // If it's IN_PROGRESS but clearly stale (older than 15 mins), we might steal it,
+                // but generally we let the Dispatcher handle zombies. For now, we block collision.
+                if (data.status === 'IN_PROGRESS') {
+                    return { shouldRun: false, reason: 'Collision: Task currently IN_PROGRESS by another worker.' };
+                }
+            }
+            // 4. CLAIM LEASE
+            // If we get here, the task is either new (doesn't exist) or PENDING.
+            const lease = {
+                status: 'IN_PROGRESS',
+                workerId: workerId,
+                dispatchId: dispatchId || 'unknown',
+                startedAt: new Date()
+            };
+            t.set(docRef, lease, { merge: true });
+            return { shouldRun: true, leaseData: lease };
+        });
+    } catch (e) {
+        console.error(`[Idempotency] Transaction failed: ${e.message}`);
+        // If transaction fails (contention), assume we shouldn't run
+        return { shouldRun: false, reason: `Transaction Error: ${e.message}` };
+    }
+}
 async function handleComputationTask(message, config, dependencies) {
     const logger = new StructuredLogger({ minLevel: config.minLevel || 'INFO', enableStructured: true, ...config });
     const runDeps = { ...dependencies, logger };
@@ -45,17 +104,19 @@ async function handleComputationTask(message, config, dependencies) {
     const resourceTier = resources || 'standard';
     const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${computation}`;
-    logger.log('INFO', `[Worker] 📥 Task: ${computation} (${date}) [Tier: ${resourceTier}]`);
+    const workerId = process.env.K_REVISION || os.hostname();
-    // [FIX] Build document object and only add dispatchId if it is defined (prevents Firestore "undefined" error)
-    const leaseData = {
-        status: 'IN_PROGRESS',
-        workerId: process.env.K_REVISION || os.hostname(),
-        startedAt: new Date()
-    };
-    if (dispatchId) leaseData.dispatchId = dispatchId;
+    // --- STEP 1: IDEMPOTENCY CHECK ---
+    const gate = await checkIdempotencyAndClaimLease(db, ledgerPath, dispatchId, workerId);
+    if (!gate.shouldRun) {
+        // [CRITICAL] We return successfully (ACK) to remove the message from Pub/Sub.
+        // We do NOT throw an error, because that would cause a retry.
+        logger.log('WARN', `[Worker] 🛑 Idempotency Gate: Skipping ${computation}. Reason: ${gate.reason}`);
+        return;
+    }
-    await db.doc(ledgerPath).set(leaseData, { merge: true });
+    logger.log('INFO', `[Worker] 📥 Task: ${computation} (${date}) [Tier: ${resourceTier}] [ID: ${dispatchId}]`);
     const heartbeat = startMemoryHeartbeat(db, ledgerPath);
@@ -72,14 +133,11 @@ async function handleComputationTask(message, config, dependencies) {
         const failureReport = result?.updates?.failureReport || [];
         const successUpdates = result?.updates?.successUpdates || {};
-        // [CRITICAL FIX] Correctly propagate the Error Stage.
-        // Previously, 'throw new Error(msg)' stripped the 'stage' property, causing the
-        // catch block to treat Deterministic errors (Quality/Logic) as System errors (Transient),
-        // triggering infinite Pub/Sub retries.
+        // [CRITICAL] Propagate Error Stage from inner logic
         if (failureReport.length > 0) {
             const reportedError = failureReport[0].error;
             const errorObj = new Error(reportedError.message);
-            errorObj.stage = reportedError.stage; // Preserve stage (e.g. 'QUALITY_CIRCUIT_BREAKER')
+            errorObj.stage = reportedError.stage;
             throw errorObj;
         }
@@ -99,18 +157,19 @@ async function handleComputationTask(message, config, dependencies) {
     } catch (err) {
         clearInterval(heartbeat.timer);
         const isDeterministic = ['SHARDING_LIMIT_EXCEEDED', 'QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE'].includes(err.stage);
-        // If error is deterministic (Logic/Quality), we record FAILURE and RETURN.
-        // This ACKs the message and stops the retry loop.
+        // --- STEP 2: ERROR HANDLING ---
+        // If Logic Error OR Max Retries reached, mark FAILED and ACK.
         if (isDeterministic || (message.deliveryAttempt || 1) >= MAX_RETRIES) {
-            // Write structured error to Ledger so Dispatcher can see the 'stage' later
             const errorPayload = {
                 message: err.message,
                 stage: err.stage || 'FATAL'
             };
+            // This write ensures the Idempotency Gate blocks future retries
             await db.doc(ledgerPath).set({
                 status: 'FAILED',
                 error: errorPayload,
@@ -118,10 +177,10 @@ async function handleComputationTask(message, config, dependencies) {
             }, { merge: true });
             await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', { message: err.message, stage: err.stage || 'FATAL' }, { peakMemoryMB: heartbeat.getPeak() }, triggerReason, resourceTier);
-            return;
+            return; // ACK
         }
-        // If non-deterministic (Network/System), throw to trigger Pub/Sub Retry
+        // Only throw (NACK) for transient system errors (Network, etc)
         throw err;
     }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bulltrackers-module",
-  "version": "1.0.338",
+  "version": "1.0.339",
   "description": "Helper Functions for Bulltrackers.",
   "main": "index.js",
   "files": [