npm - bulltrackers-module - Versions diffs - 1.0.274 → 1.0.276 - Mend

bulltrackers-module 1.0.274 → 1.0.276

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/functions/computation-system/WorkflowOrchestrator.js CHANGED Viewed

@@ -1,6 +1,6 @@
 /**
  * @fileoverview Main Orchestrator. Coordinates the topological execution.
- * UPDATED: Removed 'Permanently Impossible' optimization to ensure full visibility/recovery.
+ * UPDATED: Includes Content-Based Dependency Short-Circuiting.
  * UPDATED: Includes 'Audit Upgrade' check.
  * UPDATED: Detailed Dependency Reporting for Impossible Chains.
  */
@@ -18,21 +18,41 @@ function groupByPass(manifest) {  return manifest.reduce((acc, calc) => { (acc[c
 /**
  * Analyzes whether calculations should run, be skipped, or are blocked.
+ * [NEW] Implements ResultHash short-circuit logic.
  */
 function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus = null) {
     const report           = { runnable: [], blocked: [], impossible: [], failedDependency: [], reRuns: [], skipped: [] };
     const simulationStatus = { ...dailyStatus };
     const isTargetToday    = (dateStr === new Date().toISOString().slice(0, 10));
-    const isDepSatisfied = (depName, currentStatusMap, manifestMap) => {
+    // Helper: Validates if a dependency is satisfied, either by Code Match OR Content Match
+    const isDepSatisfied = (depName, currentStatusMap, manifestMap, dependentStoredStatus) => {
         const norm        = normalizeName(depName);
-        const stored      = currentStatusMap[norm];
+        const storedDep   = currentStatusMap[norm];
         const depManifest = manifestMap.get(norm);
-        if (!stored)                           return false;
-        if (typeof stored.hash === 'string' && stored.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) return false;
-        if (!depManifest)                      return false;
-        if (stored.hash !== depManifest.hash)  return false;
-        return true;
+        // 1. Basic Existence Checks
+        if (!storedDep) return false;
+        if (typeof storedDep.hash === 'string' && storedDep.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) return false;
+        if (!depManifest) return false;
+        // 2. Code Hash Check (The Standard Check)
+        if (storedDep.hash === depManifest.hash) return true;
+        // 3. [NEW] Content-Based Short-Circuit Check
+        // If Code Hash mismatch, check if the *Result Hash* is identical to what we used last time.
+        // dependentStoredStatus = The status of the calculation (B) that depends on this (A).
+        // dependentStoredStatus.dependencyResultHashes[depName] = The ResultHash of A when B last ran.
+        // storedDep.resultHash = The current ResultHash of A.
+        if (dependentStoredStatus &&
+            dependentStoredStatus.dependencyResultHashes &&
+            dependentStoredStatus.dependencyResultHashes[depName] &&
+            storedDep.resultHash &&
+            storedDep.resultHash === dependentStoredStatus.dependencyResultHashes[depName]) {
+            return true; // Short-circuit: The output didn't change, so we are safe.
+        }
+        return false;
     };
     for (const calc of calcsInPass) {
@@ -42,6 +62,17 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
         const storedCategory = stored ? stored.category : null;
         const currentHash    = calc.hash;
+        // Collect current result hashes of dependencies for the next run
+        const currentDependencyResultHashes = {};
+        if (calc.dependencies) {
+            calc.dependencies.forEach(d => {
+                const normD = normalizeName(d);
+                if (simulationStatus[normD] && simulationStatus[normD].resultHash) {
+                    currentDependencyResultHashes[d] = simulationStatus[normD].resultHash;
+                }
+            });
+        }
         const markImpossible = (reason, type = 'GENERIC') => {
             report.impossible.push({ name: cName, reason });
             const statusHash = `${STATUS_IMPOSSIBLE_PREFIX}:${type}`;
@@ -49,25 +80,31 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
         };
         const markRunnable = (isReRun = false, reRunDetails = null) => {
-            if (isReRun) report.reRuns.push(reRunDetails);
-            else report.runnable.push({ name: cName, ...reRunDetails });
+            const payload = {
+                name: cName,
+                ...reRunDetails,
+                dependencyResultHashes: currentDependencyResultHashes // Pass forward
+            };
+            if (isReRun) report.reRuns.push(payload);
+            else report.runnable.push(payload);
             // Simulate success so dependents can pass their check
-            simulationStatus[cName] = { hash: currentHash, category: calc.category, composition: calc.composition };
+            simulationStatus[cName] = {
+                hash: currentHash,
+                resultHash: 'SIMULATED',
+                category: calc.category,
+                composition: calc.composition
+            };
         };
         let migrationOldCategory = null;
         if (storedCategory && storedCategory !== calc.category) { migrationOldCategory = storedCategory; }
-        // [REMOVED] The "Permanently Impossible" optimization block was here.
-        // Removal ensures we re-check Root Data every time, allowing for visibility and recovery.
-        // 1. Check Root Data (The Primary Gate)
+        // 1. Check Root Data
         const rootCheck = checkRootDependencies(calc, rootDataStatus);
         if (!rootCheck.canRun) {
             const missingStr = rootCheck.missing.join(', ');
             if (!isTargetToday) {
-                // If previously impossible, this confirms it. If previously run, this is a regression.
                 markImpossible(`Missing Root Data: ${missingStr} (Historical)`, 'NO_DATA');
             } else {
                 report.blocked.push({ name: cName, reason: `Missing Root Data: ${missingStr} (Waiting)` });
@@ -85,15 +122,15 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
                 const depStored = simulationStatus[normDep];
                 if (depStored && typeof depStored.hash === 'string' && depStored.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) {
                     dependencyIsImpossible = true;
-                    impossibleDepCause = dep; // Capture the culprit
+                    impossibleDepCause = dep;
                     break;
                 }
-                if (!isDepSatisfied(dep, simulationStatus, manifestMap)) { missingDeps.push(dep); }
+                // Pass 'stored' (this calc's status) to check short-circuiting
+                if (!isDepSatisfied(dep, simulationStatus, manifestMap, stored)) { missingDeps.push(dep); }
             }
         }
         if (dependencyIsImpossible) {
-            // [UPDATED] Include the name of the failing dependency in the reason string
             markImpossible(`Dependency is Impossible (${impossibleDepCause})`, 'UPSTREAM');
             continue;
         }
@@ -117,7 +154,12 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
             markRunnable(false, { reason: "New Calculation" });
         }
         else if (storedHash !== currentHash) {
-            // Smart Audit Logic
+            // [NEW] Check if Dependencies caused this, and if their content is actually same
+            // Note: If we are here, it means code changed.
+            // Short-circuiting logic was handled in 'isDepSatisfied' for upstream checks.
+            // But if *my* code changed, I must re-run unless I implement output-caching which is dangerous.
+            // So we strictly re-run if code changes.
             let changeReason = "Hash Mismatch (Unknown)";
             const oldComp = stored.composition;
             const newComp = calc.composition;
@@ -134,6 +176,11 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
                     changeReason = `Layer Update: [${changedLayers.join(', ')}]`;
                 }
                 else if (JSON.stringify(oldComp.deps) !== JSON.stringify(newComp.deps)) {
+                    // Dependency Hash Mismatch.
+                    // This is where we COULD have short-circuited if we weren't enforcing code-hash strictness here.
+                    // But typically if code hash mismatches, we re-run.
+                    // The "Short-Circuit" benefit is mainly that *dependents* of this calculation
+                    // won't need to re-run if *this* calculation produces the same output.
                     const changedDeps = [];
                     for(const dKey in newComp.deps) {
                         if (newComp.deps[dKey] !== oldComp.deps[dKey]) changedDeps.push(dKey);
@@ -158,7 +205,6 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
         else if (migrationOldCategory) {
             markRunnable(true, { name: cName, reason: 'Category Migration', previousCategory: migrationOldCategory, newCategory: calc.category });
         }
-        // Audit Upgrade Check
         else if (!stored.composition) {
             markRunnable(true, {
                 name: cName,
@@ -177,8 +223,9 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
 /**
  * DIRECT EXECUTION PIPELINE (For Workers)
+ * [UPDATED] Accepts dependencyResultHashes
  */
-async function executeDispatchTask(dateStr, pass, targetComputation, config, dependencies, computationManifest, previousCategory = null) {
+async function executeDispatchTask(dateStr, pass, targetComputation, config, dependencies, computationManifest, previousCategory = null, dependencyResultHashes = {}) {
     const { logger } = dependencies;
     const pid = generateProcessId(PROCESS_TYPES.EXECUTOR, targetComputation, dateStr);
@@ -187,6 +234,9 @@ async function executeDispatchTask(dateStr, pass, targetComputation, config, dep
     if (!calcManifest) { throw new Error(`Calculation '${targetComputation}' not found in manifest.`); }
+    // [NEW] Attach the dependency result hashes to the manifest so ResultCommitter can save them
+    calcManifest.dependencyResultHashes = dependencyResultHashes;
     if (previousCategory) {
         calcManifest.previousCategory = previousCategory;
         logger.log('INFO', `[Executor] Migration detected for ${calcManifest.name}. Old data will be cleaned from: ${previousCategory}`);

package/functions/computation-system/helpers/computation_dispatcher.js CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * FILENAME: computation-system/helpers/computation_dispatcher.js
  * PURPOSE: "Smart Dispatcher" - Analyzes state and only dispatches valid, runnable tasks.
- * UPDATED: Fixed "undefined" reason crash for failed dependencies.
+ * UPDATED: Implements Zombie Task Recovery & Dependency Result Hash Passing.
  */
 const { getExpectedDateStrings, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
@@ -11,13 +11,13 @@ const { fetchComputationStatus, updateComputationStatus } = require('../persiste
 const { checkRootDataAvailability }             = require('../data/AvailabilityChecker');
 const { generateCodeHash }                      = require('../topology/HashManager');
 const pLimit                                    = require('p-limit');
+const crypto                                    = require('crypto');
 const TOPIC_NAME = 'computation-tasks';
 const STATUS_IMPOSSIBLE = 'IMPOSSIBLE';
 /**
  * Dispatches computation tasks for a specific pass.
- * Performs full pre-flight checks (Root Data, Dependencies, History) before emitting.
  */
 async function dispatchComputationPass(config, dependencies, computationManifest) {
     const { logger, db } = dependencies;
@@ -26,43 +26,36 @@ async function dispatchComputationPass(config, dependencies, computationManifest
     if (!passToRun) { return logger.log('ERROR', '[Dispatcher] No pass defined (COMPUTATION_PASS_TO_RUN). Aborting.'); }
-    // 1. Get Calculations for this Pass
+    const currentManifestHash = generateCodeHash(
+        computationManifest.map(c => c.hash).sort().join('|')
+    );
     const passes          = groupByPass(computationManifest);
     const calcsInThisPass = passes[passToRun] || [];
     if (!calcsInThisPass.length) { return logger.log('WARN', `[Dispatcher] No calcs for Pass ${passToRun}. Exiting.`); }
-    // --- [NEW] OPTIMIZATION 1: PREEMPTIVE HASH CHECK ---
-    const currentManifestHash = generateCodeHash(
-        computationManifest.map(c => c.hash).sort().join('|')
-    );
-    // ---------------------------------------------------
     const calcNames = calcsInThisPass.map(c => c.name);
     logger.log('INFO', `🚀 [Dispatcher] Smart-Dispatching PASS ${passToRun}`);
     logger.log('INFO', `[Dispatcher] Target Calculations: [${calcNames.join(', ')}]`);
-    // 2. Determine Date Range
     const passEarliestDate = Object.values(DEFINITIVE_EARLIEST_DATES).reduce((a, b) => a < b ? a : b);
     const endDateUTC       = new Date(Date.UTC(new Date().getUTCFullYear(), new Date().getUTCMonth(), new Date().getUTCDate() - 1));
     const allExpectedDates = getExpectedDateStrings(passEarliestDate, endDateUTC);
     const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
     const tasksToDispatch = [];
-    const limit = pLimit(20); // Process 20 days in parallel
+    const limit = pLimit(20);
     logger.log('INFO', `[Dispatcher] Analyzing ${allExpectedDates.length} dates for viability...`);
-    // 3. Analyze Each Date (Concurrent)
     const analysisPromises = allExpectedDates.map(dateStr => limit(async () => {
         try {
-            // [NEW] OPTIMIZATION 3: PARALLEL STATUS FETCH
             const fetchPromises = [
-                fetchComputationStatus(dateStr, config, dependencies), // A. Current Status
-                checkRootDataAvailability(dateStr, config, dependencies, DEFINITIVE_EARLIEST_DATES) // C. Root Data
+                fetchComputationStatus(dateStr, config, dependencies),
+                checkRootDataAvailability(dateStr, config, dependencies, DEFINITIVE_EARLIEST_DATES)
             ];
-            // B. Fetch Status (Yesterday) - Only if historical continuity is needed
             let prevDateStr = null;
             if (calcsInThisPass.some(c => c.isHistorical)) {
                 const prevDate = new Date(dateStr + 'T00:00:00Z');
@@ -83,25 +76,20 @@ async function dispatchComputationPass(config, dependencies, computationManifest
                 hasPortfolio: false, hasHistory: false, hasSocial: false, hasInsights: false, hasPrices: false
             };
-            // D. Run Core Analysis Logic
             const report = analyzeDateExecution(dateStr, calcsInThisPass, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus);
-            // E. Handle Non-Runnable States (Write directly to DB, don't dispatch)
             const statusUpdates = {};
-            // Mark Impossible (Permanent Failure)
             report.impossible.forEach(item => {
                 if (dailyStatus[item.name]?.hash !== STATUS_IMPOSSIBLE) {
                     statusUpdates[item.name] = { hash: STATUS_IMPOSSIBLE, category: 'unknown', reason: item.reason };
                 }
             });
-            // Mark Blocked (Explicit Block)
             report.blocked.forEach(item => {
                  statusUpdates[item.name] = { hash: false, category: 'unknown', reason: item.reason };
             });
-            // [FIX] Mark Failed Dependencies (Implicit Block) - Safely generate reason string
             report.failedDependency.forEach(item => {
                  const missingStr = item.missing ? item.missing.join(', ') : 'unknown';
                  statusUpdates[item.name] = {
@@ -115,17 +103,21 @@ async function dispatchComputationPass(config, dependencies, computationManifest
                 await updateComputationStatus(dateStr, statusUpdates, config, dependencies);
             }
-            // F. Queue Runnables
             const validToRun = [...report.runnable, ...report.reRuns];
             validToRun.forEach(item => {
+                const uniqueDispatchId = crypto.randomUUID();
                 tasksToDispatch.push({
                     action: 'RUN_COMPUTATION_DATE',
+                    dispatchId: uniqueDispatchId,
                     date: dateStr,
                     pass: passToRun,
                     computation: normalizeName(item.name),
                     hash: item.hash || item.newHash,
                     previousCategory: item.previousCategory || null,
-                    triggerReason: item.reason || "Unknown",
+                    triggerReason: item.reason || "Unknown",
+                    // [NEW] Pass Content-Based hashes provided by analyzeDateExecution
+                    dependencyResultHashes: item.dependencyResultHashes || {},
                     timestamp: Date.now()
                 });
             });
@@ -137,13 +129,11 @@ async function dispatchComputationPass(config, dependencies, computationManifest
     await Promise.all(analysisPromises);
-    // 4. Dispatch Valid Tasks with Atomic Ledger Check
     if (tasksToDispatch.length > 0) {
         logger.log('INFO', `[Dispatcher] 📝 Creating Audit Ledger entries (Transactional) for ${tasksToDispatch.length} tasks...`);
-        // --- [NEW] OPTIMIZATION 2: ATOMIC TRANSACTION FOR LEDGER ---
         const finalDispatched = [];
-        const txnLimit = pLimit(20); // Limit concurrent transactions
+        const txnLimit = pLimit(20);
         const txnPromises = tasksToDispatch.map(task => txnLimit(async () => {
             const ledgerRef = db.collection(`computation_audit_ledger/${task.date}/passes/${task.pass}/tasks`).doc(task.computation);
@@ -151,23 +141,35 @@ async function dispatchComputationPass(config, dependencies, computationManifest
             try {
                 await db.runTransaction(async (t) => {
                     const doc = await t.get(ledgerRef);
-                    if (doc.exists && doc.data().status === 'PENDING') {
-                        // Task is already pending from another dispatcher, Skip.
-                        return false;
+                    // [NEW] Zombie Task Recovery Check
+                    if (doc.exists) {
+                        const data = doc.data();
+                        const now = Date.now();
+                        const isPending = data.status === 'PENDING';
+                        // A task is a zombie if it is PENDING and the lease has expired (or lease is missing but it's been > 1h)
+                        const isLeaseExpired = data.leaseExpiresAt && data.leaseExpiresAt < now;
+                        // Fallback: If no lease exists, assume 1 hour timeout for legacy zombie detection
+                        const isLegacyZombie = !data.leaseExpiresAt && data.createdAt && (now - data.createdAt.toMillis() > 3600000);
+                        if (isPending && !isLeaseExpired && !isLegacyZombie) {
+                            return false; // Valid active pending task, do not double dispatch
+                        }
                     }
                     t.set(ledgerRef, {
                         status: 'PENDING',
+                        dispatchId: task.dispatchId,
                         computation: task.computation,
                         expectedHash: task.hash || 'unknown',
                         createdAt: new Date(),
-                        dispatcherHash: currentManifestHash, // Tracking source
-                        triggerReason: task.triggerReason, // Track trigger in ledger too
+                        dispatcherHash: currentManifestHash,
+                        triggerReason: task.triggerReason,
                         retries: 0
                     }, { merge: true });
                     return true;
                 });
-                // Only dispatch if we successfully reserved the PENDING state
                 finalDispatched.push(task);
             } catch (txnErr) {
@@ -176,7 +178,6 @@ async function dispatchComputationPass(config, dependencies, computationManifest
         }));
         await Promise.all(txnPromises);
-        // ---------------------------------------------------
         if (finalDispatched.length > 0) {
             logger.log('INFO', `[Dispatcher] ✅ Publishing ${finalDispatched.length} unique tasks to Pub/Sub...`);

package/functions/computation-system/helpers/computation_worker.js CHANGED Viewed

@@ -1,8 +1,7 @@
 /**
  * FILENAME: computation-system/helpers/computation_worker.js
- * PURPOSE: Consumes computation tasks from Pub/Sub and executes them.
- * UPDATED: Added Deterministic Error Short-Circuit to prevent infinite retry storms on data limits.
- * UPDATED: Integrated Run Ledger for per-run/per-date success/failure tracking.
+ * PURPOSE: Consumes computation tasks from Pub/Sub.
+ * UPDATED: Implements Lease Claiming and passes Dependency Hashes.
  */
 const { executeDispatchTask } = require('../WorkflowOrchestrator.js');
@@ -14,18 +13,14 @@ let calculationPackage;
 try { calculationPackage = require('aiden-shared-calculations-unified');
 } catch (e) {console.error("FATAL: Could not load 'aiden-shared-calculations-unified'."); throw e; }
 const calculations = calculationPackage.calculations;
-const MAX_RETRIES = 3;
+const MAX_RETRIES = 0;
-/**
- * Handles a single Pub/Sub message.
- */
 async function handleComputationTask(message, config, dependencies) {
     const systemLogger = new StructuredLogger({ minLevel: config.minLevel || 'INFO', enableStructured: true, ...config });
     const runDependencies = { ...dependencies, logger: systemLogger };
     const { logger, db }  = runDependencies;
     let data;
-    // ----------------------------------- Parse message -----------------------------------
     try {
         if (message.data && message.data.message && message.data.message.data) { data = JSON.parse(Buffer.from(message.data.message.data, 'base64').toString());
         } else if (message.data && typeof message.data === 'string')           { data = JSON.parse(Buffer.from(message.data, 'base64').toString());
@@ -33,24 +28,42 @@ async function handleComputationTask(message, config, dependencies) {
         } else { data = message; }
     } catch (parseError) { logger.log('ERROR', `[Worker] Failed to parse Pub/Sub payload.`, { error: parseError.message }); return; }
-    // ----------------------------------- Validate & Execute -----------------------------------
     if (!data || data.action !== 'RUN_COMPUTATION_DATE') { return; }
-    // Extract Trigger Reason
-    const { date, pass, computation, previousCategory, triggerReason } = data;
+    // Extract fields including new dependencyResultHashes
+    const { date, pass, computation, previousCategory, triggerReason, dispatchId, dependencyResultHashes } = data;
-    if (!date || !pass || !computation) { logger.log('ERROR', `[Worker] Invalid payload: Missing date, pass, or computation.`, data); return; }
+    if (!date || !pass || !computation) { logger.log('ERROR', `[Worker] Invalid payload.`, data); return; }
+    logger.log('INFO', `[Worker] 📥 Received Task: ${computation} (${date})`, {
+        dispatchId: dispatchId || 'legacy',
+        reason: triggerReason
+    });
+    // [NEW] LEASE CLAIMING
+    // Mark task as IN_PROGRESS and set a lease timeout (e.g., 20 minutes) to prevent Zombies
+    try {
+        const leaseTimeMs = (config.workerLeaseMinutes || 20) * 60 * 1000;
+        await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).set({
+            status: 'IN_PROGRESS',
+            workerId: process.env.K_REVISION || 'unknown',
+            startedAt: new Date(),
+            leaseExpiresAt: Date.now() + leaseTimeMs,
+            dispatchId: dispatchId
+        }, { merge: true });
+    } catch (leaseErr) {
+        logger.log('WARN', `[Worker] Failed to claim lease for ${computation}. Continuing anyway...`, leaseErr);
+    }
     let computationManifest;
     try { computationManifest = getManifest(config.activeProductLines || [], calculations, runDependencies);
-    } catch (manifestError) { logger.log('FATAL', `[Worker] Failed to load Manifest: ${manifestError.message}`);
-        // FIX: Passing { durationMs: 0 } instead of {} to satisfy type requirements
+    } catch (manifestError) {
+        logger.log('FATAL', `[Worker] Failed to load Manifest: ${manifestError.message}`);
         await recordRunAttempt(db, { date, computation, pass }, 'CRASH', { message: manifestError.message, stage: 'MANIFEST_LOAD' }, { durationMs: 0 }, triggerReason);
         return;
     }
     try {
-        logger.log('INFO', `[Worker] 📥 Received: ${computation} for ${date} [Reason: ${triggerReason || 'Unknown'}]`);
         const startTime = Date.now();
         const result    = await executeDispatchTask(
             date,
@@ -59,7 +72,8 @@ async function handleComputationTask(message, config, dependencies) {
             config,
             runDependencies,
             computationManifest,
-            previousCategory
+            previousCategory,
+            dependencyResultHashes // [NEW] Pass hashes to executor
         );
         const duration = Date.now() - startTime;
@@ -69,7 +83,7 @@ async function handleComputationTask(message, config, dependencies) {
         if (failureReport.length > 0) {
             const failReason = failureReport[0];
             logger.log('ERROR', `[Worker] ❌ Failed logic/storage for ${computation}`, failReason.error);
-            const metrics      = failReason.metrics || {};
+            const metrics = failReason.metrics || {};
             metrics.durationMs = duration;
             await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', failReason.error, metrics, triggerReason);
             throw new Error(failReason.error.message || 'Computation Logic Failed');
@@ -78,9 +92,14 @@ async function handleComputationTask(message, config, dependencies) {
             const successData  = successUpdates[computation];
             const metrics      = successData.metrics || {};
             metrics.durationMs = duration;
+            logger.log('INFO', `[Worker] ✅ Stored: ${computation}. ID: ${dispatchId}`);
-            logger.log('INFO', `[Worker] ✅ Stored: ${computation}. Processed: ${metrics.execution?.processedUsers || metrics.execution?.processedItems || '?'} items.`);
+            // Mark Ledger as COMPLETED
+            await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).update({
+                status: 'COMPLETED',
+                completedAt: new Date()
+            }).catch(() => {});
             await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', null, metrics, triggerReason);
         }
         else {
@@ -88,41 +107,32 @@ async function handleComputationTask(message, config, dependencies) {
             await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', { message: 'Empty Result' }, { durationMs: duration }, triggerReason);
         }
     } catch (err) {
-        // ----------------------------------- ERROR HANDLING & RETRY LOGIC -----------------------------------
-        // 1. DETERMINISTIC ERROR CHECK (Short-Circuit)
-        // If the error is permanent (like "Too Big" or "Validation Failed"), DO NOT RETRY.
-        // This stops the "Retry Storm" where we pay for 3-4 retries of a task that will never succeed.
         const isDeterministicError = err.stage === 'SHARDING_LIMIT_EXCEEDED' ||
                                      err.stage === 'QUALITY_CIRCUIT_BREAKER' ||
                                      (err.message && (err.message.includes('INVALID_ARGUMENT') || err.message.includes('Transaction too big')));
         if (isDeterministicError) {
-             logger.log('ERROR', `[Worker] 🛑 Permanent Failure (Data/Limit Issue). Sending to DLQ immediately: ${computation} ${date}`);
+             logger.log('ERROR', `[Worker] 🛑 Permanent Failure (Limit Issue). Sending to DLQ immediately: ${dispatchId}`);
              try {
                  await db.collection('computation_dead_letter_queue').add({
                      originalData: data,
+                     dispatchId: dispatchId,
                      error: { message: err.message, stack: err.stack, stage: err.stage || 'UNKNOWN' },
                      finalAttemptAt: new Date(),
                      failureReason: 'PERMANENT_DETERMINISTIC_ERROR'
                  });
-                 // CRITICAL: We record the failure but return successfully to Pub/Sub to ACK the message and stop retries.
-                 // This ensures the task is marked as Failed in run history, but does NOT block the queue.
                  await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', { message: err.message, stage: err.stage || 'PERMANENT_FAIL' }, { durationMs: 0 }, triggerReason);
                  return;
-             } catch (dlqErr) {
-                 logger.log('FATAL', `[Worker] Failed to write to DLQ for deterministic error`, dlqErr);
-             }
+             } catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
         }
-        // 2. STANDARD RETRY LOGIC (Timeout / Crash)
         const retryCount = message.deliveryAttempt || 0;
         if (retryCount >= MAX_RETRIES) {
-             logger.log('ERROR', `[Worker] ☠️ Task POISONED. Moved to DLQ: ${computation} ${date} (Attempt ${retryCount})`);
+             logger.log('ERROR', `[Worker] ☠️ Task POISONED. Moved to DLQ: ${computation}`);
              try {
                  await db.collection('computation_dead_letter_queue').add({
                      originalData: data,
+                     dispatchId: dispatchId,
                      error: { message: err.message, stack: err.stack },
                      finalAttemptAt: new Date(),
                      failureReason: 'MAX_RETRIES_EXCEEDED'
@@ -131,8 +141,7 @@ async function handleComputationTask(message, config, dependencies) {
              } catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
         }
-        // If it's not deterministic and not max retries, we throw to let Pub/Sub retry it.
-        logger.log('ERROR', `[Worker] ❌ Crash: ${computation} for ${date}: ${err.message}`);
+        logger.log('ERROR', `[Worker] ❌ Crash: ${computation}: ${err.message}`);
         await recordRunAttempt(db, { date, computation, pass }, 'CRASH', {  message: err.message, stack: err.stack, stage: 'SYSTEM_CRASH' }, { durationMs: 0 }, triggerReason);
         throw err;
     }

package/functions/computation-system/persistence/ResultCommitter.js CHANGED Viewed

@@ -1,10 +1,8 @@
 /**
  * @fileoverview Handles saving computation results with observability and Smart Cleanup.
- * UPDATED: Added "Strategy 4" (50 keys) to handle 'too many index entries' errors.
- * UPDATED: Supports Incremental (Flush) Commits to prevent OOM.
- * FIX: Throws proper Error objects.
+ * UPDATED: Implements Content-Based Hashing (ResultHash) for dependency short-circuiting.
  */
-const { commitBatchInChunks }              = require('./FirestoreUtils');
+const { commitBatchInChunks, generateDataHash } = require('../utils/utils'); // [UPDATED] Import generateDataHash
 const { updateComputationStatus }          = require('./StatusRepository');
 const { batchStoreSchemas }                = require('../utils/schema_capture');
 const { generateProcessId, PROCESS_TYPES } = require('../logger/logger');
@@ -14,13 +12,10 @@ const pLimit                               = require('p-limit');
 const NON_RETRYABLE_ERRORS = [
     'PERMISSION_DENIED',     'DATA_LOSS',             'FAILED_PRECONDITION'
-    // removed INVALID_ARGUMENT from here as it covers 'too many index entries' which IS retryable via sharding
 ];
 /**
  * Commits results to Firestore.
- * @param {Object} options.flushMode - 'STANDARD', 'INTERMEDIATE', 'FINAL'
- * @param {Object} options.shardIndexes - Map of { calcName: currentShardIndex }
  */
 async function commitResults(stateObj, dStr, passName, config, deps, skipStatusWrite = false, options = {}) {
     const successUpdates = {};
@@ -52,7 +47,7 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
             const result = await calc.getResult();
             const overrides   = validationOverrides[calc.manifest.name] || {};
-            // Only validate if we have data or if it's the final flush
+            // Validation
             if (result && Object.keys(result).length > 0) {
                 const healthCheck = HeuristicValidator.analyze(calc.manifest.name, result, overrides);
                 if (!healthCheck.valid) {
@@ -66,16 +61,20 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
             const isEmpty = !result || (typeof result === 'object' && Object.keys(result).length === 0);
-            // If empty and standard mode, record 0-byte success.
-            // If empty and INTERMEDIATE flush, just skip this calc for this flush.
+            // [NEW] Calculate Result Hash (Content-Based)
+            const resultHash = isEmpty ? 'empty' : generateDataHash(result);
+            // Handle Empty Results
             if (isEmpty) {
                 if (flushMode === 'INTERMEDIATE') {
-                    nextShardIndexes[name] = currentShardIndex; // No change
+                    nextShardIndexes[name] = currentShardIndex;
                     continue;
                 }
                 if (calc.manifest.hash) {
                     successUpdates[name] = {
-                        hash:        calc.manifest.hash,
+                        hash:        calc.manifest.hash,
+                        resultHash:  resultHash, // [NEW] Store result hash
+                        dependencyResultHashes: calc.manifest.dependencyResultHashes || {}, // [NEW] Capture dep context
                         category:    calc.manifest.category,
                         composition: calc.manifest.composition,
                         metrics: runMetrics
@@ -90,7 +89,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
             const isMultiDate = resultKeys.length > 0 && resultKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k));
             if (isMultiDate) {
-                // Multi-Date Fan-Out Logic (Not optimized for incremental yet, falls back to standard per-date write)
                 const datePromises = resultKeys.map((historicalDate) => fanOutLimit(async () => {
                     const dailyData = result[historicalDate];
                     if (!dailyData || Object.keys(dailyData).length === 0) return;
@@ -102,7 +100,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
                         .collection(config.computationsSubcollection)
                         .doc(name);
-                    // For historical Fan-Out, we assume standard flush mode (not incremental) for now
                     await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, logger, config, deps, 0, 'STANDARD');
                 }));
                 await Promise.all(datePromises);
@@ -110,6 +107,8 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
                 if (calc.manifest.hash) {
                     successUpdates[name] = {
                         hash:        calc.manifest.hash,
+                        resultHash:  resultHash, // [NEW]
+                        dependencyResultHashes: calc.manifest.dependencyResultHashes || {}, // [NEW]
                         category:    calc.manifest.category,
                         composition: calc.manifest.composition,
                         metrics: runMetrics
@@ -117,7 +116,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
                 }
             } else {
-                // --- STANDARD / INCREMENTAL MODE ---
                 const mainDocRef = db.collection(config.resultsCollection)
                     .doc(dStr)
                     .collection(config.resultsSubcollection)
@@ -131,12 +129,13 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
                 runMetrics.storage.isSharded  = writeStats.isSharded;
                 runMetrics.storage.shardCount = writeStats.shardCount;
-                // Track next index for subsequent flushes
                 nextShardIndexes[name] = writeStats.nextShardIndex;
                 if (calc.manifest.hash) {
                     successUpdates[name] = {
                         hash:        calc.manifest.hash,
+                        resultHash:  resultHash, // [NEW]
+                        dependencyResultHashes: calc.manifest.dependencyResultHashes || {}, // [NEW]
                         category:    calc.manifest.category,
                         composition: calc.manifest.composition,
                         metrics: runMetrics
@@ -171,10 +170,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
 }
 async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD') {
-    // Strategy 1: Standard (900KB, no key limit)
-    // Strategy 2: Aggressive Bytes (450KB, 10k keys)
-    // Strategy 3: Very Aggressive (200KB, 2k keys)
-    // Strategy 4: [NEW] Index Explosion Protection (100KB, 50 keys) - Handles "too many index entries"
     const strategies = [
         { bytes: 900 * 1024, keys: null },
         { bytes: 450 * 1024, keys: 10000 },
@@ -190,14 +185,10 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
         const constraints = strategies[attempt];
         try {
             const updates = await prepareAutoShardedWrites(result, docRef, logger, constraints.bytes, constraints.keys, startShardIndex, flushMode);
+            const pointer = updates.find(u => u.data._completed !== undefined || u.data._sharded !== undefined);
-            // Analyze the update batch
-            const pointer = updates.find(u => u.data._completed !== undefined || u.data._sharded !== undefined); // Pointer is on the main doc
-            // Calculate stats
             finalStats.totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
-            // Logic to determine next shard index
             let maxIndex = startShardIndex;
             updates.forEach(u => {
                 const segs = u.ref.path.split('/');
@@ -224,8 +215,6 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
             lastError = commitErr;
             const msg = commitErr.message || '';
             const code = commitErr.code || '';
-            // Check for explicit "too many index entries" or transaction size issues
             const isIndexError = msg.includes('too many index entries') || msg.includes('INVALID_ARGUMENT');
             const isSizeError  = msg.includes('Transaction too big') || msg.includes('payload is too large');
@@ -233,14 +222,11 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
                 logger.log('ERROR', `[SelfHealing] ${name} FATAL error: ${msg}.`);
                 throw commitErr;
             }
             if (isIndexError || isSizeError) {
                 logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} failed attempt ${attempt+1}/${strategies.length}. Strategy: ${JSON.stringify(constraints)}. Error: ${msg}. Retrying with stricter limits...`);
                 continue;
-            }
-            else {
+            } else {
                 logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} unknown error. Retrying...`, { error: msg });
-                // We typically retry same strategy for unknown transient errors, but here we iterate to be safe.
                 continue;
             }
         }
@@ -261,32 +247,28 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
     let currentChunk = {}; let currentChunkSize = 0; let currentKeyCount = 0;
     let shardIndex = startShardIndex;
-    // Small Data Optimization (Only valid if we are not in an incremental flow or if it's the first standard run)
     if (!maxKeys && (totalSize + docPathSize) < CHUNK_LIMIT && flushMode === 'STANDARD' && startShardIndex === 0) {
         const data = { ...result, _completed: true, _sharded: false, _lastUpdated: new Date().toISOString() };
         return [{ ref: docRef, data, options: { merge: true } }];
     }
-    // Sharding Logic
     for (const [key, value] of Object.entries(result)) {
         if (key.startsWith('_')) continue;
         const keySize = Buffer.byteLength(key, 'utf8') + 1; const valueSize = calculateFirestoreBytes(value); const itemSize = keySize + valueSize;
         const byteLimitReached = (currentChunkSize + itemSize > CHUNK_LIMIT); const keyLimitReached = (maxKeys && currentKeyCount + 1 >= maxKeys);
         if (byteLimitReached || keyLimitReached) {
-            writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } }); // Overwrite shard doc
+            writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } });
             shardIndex++; currentChunk = {}; currentChunkSize = 0; currentKeyCount = 0;
         }
         currentChunk[key] = value; currentChunkSize += itemSize; currentKeyCount++;
     }
-    // Push remaining chunk
     if (Object.keys(currentChunk).length > 0) {
         writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } });
-        shardIndex++; // Increment so count is correct (0-based index means count is index+1)
+        shardIndex++;
     }
-    // Pointer Logic
     if (flushMode !== 'INTERMEDIATE') {
         const pointerData = {
             _completed: true,
@@ -294,13 +276,12 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
             _shardCount: shardIndex,
             _lastUpdated: new Date().toISOString()
         };
-        writes.push({ ref: docRef, data: pointerData, options: { merge: true } }); // Merge pointer
+        writes.push({ ref: docRef, data: pointerData, options: { merge: true } });
     }
     return writes;
 }
-// ... (Rest of file: deleteOldCalculationData, calculateFirestoreBytes remains unchanged) ...
 async function deleteOldCalculationData(dateStr, oldCategory, calcName, config, deps) {
     const { db, logger, calculationUtils } = deps;
     const { withRetry } = calculationUtils || { withRetry: (fn) => fn() };

package/functions/computation-system/tools/BuildReporter.js CHANGED Viewed

@@ -3,6 +3,7 @@
  * Generates a "Pre-Flight" report of what the computation system WILL do.
  * REFACTORED: Strict 5-category reporting with date-based exclusion logic.
  * UPDATED: Added transactional locking to prevent duplicate reports on concurrent cold starts.
+ * UPDATED: Adds 'pass' number to detail records for better waterfall visibility.
  */
 const { analyzeDateExecution }                                             = require('../WorkflowOrchestrator');
@@ -164,12 +165,18 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
             dateSummary.meta.totalExpected = expectedCount;
             // Helper to push only if date is valid for this specific calc
+            // [UPDATED] Adds 'pass' number to the record
             const pushIfValid = (targetArray, item, extraReason = null) => {
                 const calcManifest = manifestMap.get(item.name);
                 if (calcManifest && isDateBeforeAvailability(dateStr, calcManifest)) {
                     return; // EXCLUDED: Date is before data exists
                 }
-                targetArray.push({ name: item.name, reason: item.reason || extraReason });
+                targetArray.push({
+                    name:   item.name,
+                    reason: item.reason || extraReason,
+                    pass:   calcManifest ? calcManifest.pass : '?'
+                });
             };
             // 1. RUN (New)

package/functions/computation-system/utils/utils.js CHANGED Viewed

@@ -28,6 +28,34 @@ function generateCodeHash(codeString) {
     return crypto.createHash('sha256').update(clean).digest('hex');
 }
+/**
+ * [NEW] Generates a stable SHA-256 hash of a data object.
+ * Keys are sorted to ensure determinism.
+ */
+function generateDataHash(data) {
+    if (data === undefined) return 'undefined';
+    // Recursive stable stringify
+    const stableStringify = (obj) => {
+        if (typeof obj !== 'object' || obj === null) {
+            return JSON.stringify(obj);
+        }
+        if (Array.isArray(obj)) {
+            return '[' + obj.map(stableStringify).join(',') + ']';
+        }
+        return '{' + Object.keys(obj).sort().map(k =>
+            JSON.stringify(k) + ':' + stableStringify(obj[k])
+        ).join(',') + '}';
+    };
+    try {
+        const str = stableStringify(data);
+        return crypto.createHash('sha256').update(str).digest('hex');
+    } catch (e) {
+        return 'hash_error';
+    }
+}
 /**
  * Executes a function with exponential backoff retry logic.
  * @param {Function} fn - Async function to execute
@@ -49,7 +77,7 @@ async function withRetry(fn, operationName, maxRetries = 3) {
     }
 }
-/** * Stage 2: Commit a batch of writes in chunks
+/** Stage 2: Commit a batch of writes in chunks
  * FIXED: Now respects write.options (e.g. { merge: false }) to allow overwrites/deletes.
  */
 async function commitBatchInChunks(config, deps, writes, operationName) {
@@ -220,6 +248,7 @@ module.exports = {
     getExpectedDateStrings,
     getEarliestDataDates,
     generateCodeHash,
+    generateDataHash, // Exported
     withRetry,
     DEFINITIVE_EARLIEST_DATES
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bulltrackers-module",
-  "version": "1.0.274",
+  "version": "1.0.276",
   "description": "Helper Functions for Bulltrackers.",
   "main": "index.js",
   "files": [