npm - bulltrackers-module - Versions diffs - 1.0.322 → 1.0.324 - Mend

bulltrackers-module 1.0.322 → 1.0.324

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/functions/computation-system/WorkflowOrchestrator.js CHANGED Viewed

@@ -121,7 +121,8 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
         }
         if (missingDeps.length > 0) {
-            const isImpossible = missingDeps.some(d => simulationStatus[normalizeName(d)]?.hash?.startsWith(STATUS_IMPOSSIBLE_PREFIX));
+            // Cast to string to solve some weird null bug
+            const isImpossible = missingDeps.some(d => String(simulationStatus[normalizeName(d)]?.hash).startsWith(STATUS_IMPOSSIBLE_PREFIX));
             if (isImpossible) {
                 report.impossible.push({ name: cName, reason: 'Upstream Impossible' });
                 simulationStatus[cName] = { hash: `${STATUS_IMPOSSIBLE_PREFIX}:UPSTREAM` };

package/functions/computation-system/helpers/computation_dispatcher.js CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * FILENAME: computation-system/helpers/computation_dispatcher.js
- * PURPOSE: Sequential Cursor-Based Dispatcher with Hyper-Verbose Telemetry.
- * UPDATED: Sweep mode now auto-upgrades missed tasks to high-mem due to potential silent OOMs.
+ * PURPOSE: Sequential Cursor-Based Dispatcher with Ledger Awareness, SimHash Stability, and Session Caching.
+ * UPDATED: Fixed Ledger Blindness, Cursor Shifting, and Live Analysis Disconnect.
  */
 const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
@@ -9,138 +9,269 @@ const { groupByPass, analyzeDateExecution } = require('../WorkflowOrchestrator.j
 const { PubSubUtils } = require('../../core/utils/pubsub_utils');
 const { fetchComputationStatus } = require('../persistence/StatusRepository');
 const { checkRootDataAvailability } = require('../data/AvailabilityChecker');
-const crypto = require('crypto');
+const crypto = require('crypto');
-const OOM_THRESHOLD_MB = 1500;
+const OOM_THRESHOLD_MB = 1500;
 const BASE_SECONDS_PER_WEIGHT_UNIT = 3;
+const SESSION_CACHE_DURATION_MS = 1000 * 60 * 30; // 30 Minutes
-async function getHighMemReroutes(db, date, pass, tasks) {
-    const reroutes = [];
-    for (const task of tasks) {
-        const name       = normalizeName(task.name);
-        const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${name}`;
-        const doc        = await db.doc(ledgerPath).get();
+// =============================================================================
+// HELPER: Ledger Awareness (Prevents Race Conditions)
+// =============================================================================
+async function filterActiveTasks(db, date, pass, tasks) {
+    if (!tasks || tasks.length === 0) return [];
+    const checkPromises = tasks.map(async (t) => {
+        const taskName = normalizeName(t.name);
+        const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${taskName}`;
+        const snap = await db.doc(ledgerPath).get();
-        if (doc.exists) {
-            const data = doc.data();
-            const isOOM = (data.status === 'FAILED' || data.status === 'CRASH') &&
-                          (data.resourceTier !== 'high-mem') &&
-                          ((data.peakMemoryMB > OOM_THRESHOLD_MB) || (data.error && /memory/i.test(data.error.message)));
+        if (snap.exists) {
+            const data = snap.data();
+            // Check PENDING, IN_PROGRESS, or "Ghost" (Completed < 1 min ago)
+            const isActive = ['PENDING', 'IN_PROGRESS'].includes(data.status);
+            const isJustFinished = data.status === 'COMPLETED' &&
+                                   data.completedAt &&
+                                   (Date.now() - new Date(data.completedAt).getTime() < 60 * 1000);
-            if (isOOM) {
-                reroutes.push({ ...task, resources: 'high-mem' });
+            if (isActive || isJustFinished) return null;
+        }
+        return t;
+    });
+    const results = await Promise.all(checkPromises);
+    return results.filter(t => t !== null);
+}
+// =============================================================================
+// HELPER: SimHash Stability (Solves Live Analysis Disconnect)
+// =============================================================================
+async function attemptSimHashResolution(dependencies, date, tasks, dailyStatus, manifestMap) {
+    const { db, logger } = dependencies;
+    const resolvedTasks = [];
+    const remainingTasks = [];
+    // Cache for SimHashes to avoid redundant DB lookups in loop
+    const simHashCache = new Map();
+    for (const task of tasks) {
+        // Only apply to Re-Runs (Hash Mismatches), not fresh runs (Missing Data)
+        const currentStatus = dailyStatus ? dailyStatus[task.name] : null;
+        const manifestItem = manifestMap.get(normalizeName(task.name));
+        if (currentStatus && currentStatus.simHash && manifestItem) {
+            // 1. Get the SimHash for the NEW code (from Registry)
+            let newSimHash = simHashCache.get(manifestItem.hash);
+            if (!newSimHash) {
+                const simDoc = await db.collection('system_simhash_registry').doc(manifestItem.hash).get();
+                if (simDoc.exists) {
+                    newSimHash = simDoc.data().simHash;
+                    simHashCache.set(manifestItem.hash, newSimHash);
+                }
+            }
+            // 2. Compare
+            if (newSimHash && newSimHash === currentStatus.simHash) {
+                resolvedTasks.push({
+                    name: task.name,
+                    hash: manifestItem.hash,
+                    simHash: newSimHash,
+                    prevStatus: currentStatus // Pass previous status to preserve other fields
+                });
+                continue;
             }
         }
+        remainingTasks.push(task);
     }
-    return reroutes;
+    // 3. Apply Updates for Stable Tasks
+    if (resolvedTasks.length > 0) {
+        const updatePayload = {};
+        resolvedTasks.forEach(t => {
+            // [FIXED] Construct full nested object to avoid dot-notation issues with .set()
+            // We merge existing data (like resultHash) so we don't lose the valid calculation output
+            updatePayload[t.name] = {
+                ...(t.prevStatus || {}), // Keep existing resultHash, output, etc.
+                hash: t.hash,            // Update to new code hash
+                simHash: t.simHash,      // Confirmed stable simHash
+                reason: 'SimHash Stable (Auto-Resolved)',
+                lastUpdated: new Date().toISOString()
+            };
+        });
+        // Use set with merge: true. Now that keys are "clean" (no dots),
+        // objects will merge correctly into the document structure.
+        await db.collection('computation_status').doc(date).set(updatePayload, { merge: true });
+        logger.log('INFO', `[SimHash] ⏩ Fast-forwarded ${resolvedTasks.length} tasks for ${date} (Logic Unchanged).`);
+    }
+    return remainingTasks;
+}
+// =============================================================================
+// HELPER: Stable Session Management (Solves Cursor Shifting)
+// =============================================================================
+async function getStableDateSession(config, dependencies, passToRun, dateLimitStr, forceRebuild) {
+    const { db, logger } = dependencies;
+    const sessionId = `pass_${passToRun}_${dateLimitStr.replace(/-/g, '')}`;
+    const sessionRef = db.collection('dispatcher_sessions').doc(sessionId);
+    // 1. Try to Load Session
+    if (!forceRebuild) {
+        const sessionSnap = await sessionRef.get();
+        if (sessionSnap.exists) {
+            const data = sessionSnap.data();
+            const age = Date.now() - new Date(data.createdAt).getTime();
+            if (age < SESSION_CACHE_DURATION_MS) {
+                logger.log('INFO', `[Session] 📂 Loaded stable session for Pass ${passToRun} (${data.dates.length} dates).`);
+                return data.dates;
+            }
+        }
+    }
+    // 2. Rebuild Session (Expensive Scan)
+    logger.log('INFO', `[Session] 🔄 Rebuilding dispatch session for Pass ${passToRun}...`);
+    const earliestDates = await getEarliestDataDates(config, dependencies);
+    const allDates = getExpectedDateStrings(earliestDates.absoluteEarliest, new Date(dateLimitStr + 'T00:00:00Z'));
+    // We only want dates that *might* be dirty.
+    // Optimization: We add ALL dates to the list. The dispatcher checks them individually.
+    // Why? Because if we pre-filter here, we repeat the work of the dispatcher.
+    // Better: Store the plain list of dates sorted descending (newest first usually better for backfills, ascending for standard).
+    // Let's stick to Ascending (oldest first) as standard.
+    await sessionRef.set({
+        dates: allDates,
+        createdAt: new Date().toISOString(),
+        configHash: dateLimitStr // Simple versioning
+    });
+    return allDates;
 }
+// =============================================================================
+// MAIN DISPATCHER
+// =============================================================================
 async function dispatchComputationPass(config, dependencies, computationManifest, reqBody = {}) {
     const { logger, db } = dependencies;
     const pubsubUtils    = new PubSubUtils(dependencies);
     const passToRun     = String(reqBody.pass || config.COMPUTATION_PASS_TO_RUN || "1");
     const targetCursorN = parseInt(reqBody.cursorIndex || 1);
-    const dateLimitStr  = reqBody.date || config.date || "2025-01-01";
+    const dateLimitStr  = reqBody.date || config.date || "2025-01-01";
+    const forceRebuild  = reqBody.forceRebuild === true;
     const manifestMap     = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
     const passes          = groupByPass(computationManifest);
     const calcsInThisPass = passes[passToRun] || [];
     const manifestWeightMap = new Map(computationManifest.map(c => [normalizeName(c.name), c.weight || 1.0]));
     if (!calcsInThisPass.length) {
-        logger.log('WARN', `[Dispatcher] 🛑 No calculations found for Pass ${passToRun}.`);
         return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
     }
-    const earliestDates = await getEarliestDataDates(config, dependencies);
-    const allDates = getExpectedDateStrings(earliestDates.absoluteEarliest, new Date(dateLimitStr + 'T00:00:00Z'));
+    // 1. Get Stable Date List (Solves Shifting Cursor)
+    const sessionDates = await getStableDateSession(config, dependencies, passToRun, dateLimitStr, forceRebuild);
-    if (allDates.length === 0) {
-        logger.log('ERROR', `[Dispatcher] ❌ Date range is empty.`);
+    if (!sessionDates || sessionDates.length === 0) {
         return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
     }
-    // 1. Identify all "Dirty" dates (dates that actually have work to do)
-    const dirtyDates = [];
-    for (const d of allDates) {
-        const dailyStatus  = await fetchComputationStatus(d, config, dependencies);
-        const availability = await checkRootDataAvailability(d, config, dependencies, DEFINITIVE_EARLIEST_DATES);
-        if (!availability || !availability.status.hasPrices) continue;
+    // 2. Select Date based on Cursor
+    let selectedDate = null;
+    let selectedTasks = [];
+    let isReroute = false;
+    let isSweep = false;
-        const report = analyzeDateExecution(d, calcsInThisPass, availability.status, dailyStatus, manifestMap, null);
-        const tasks  = [...report.runnable, ...report.reRuns];
-        if (tasks.length > 0) {
-            dirtyDates.push({ date: d, tasks });
-        }
+    // Check bounds
+    if (targetCursorN <= sessionDates.length) {
+        // Normal Operation
+        selectedDate = sessionDates[targetCursorN - 1];
+    } else {
+        // End of list
+        return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
     }
-    let selectedDate  = null;
-    let selectedTasks = [];
-    let isReroute     = false;
-    let isSweep       = false;
-    // Logic for Reroutes (Known OOM handling)
-    if (targetCursorN > 1 && (targetCursorN - 2) < dirtyDates.length) {
-        const prevEntry = dirtyDates[targetCursorN - 2];
-        const reroutes  = await getHighMemReroutes(db, prevEntry.date, passToRun, prevEntry.tasks);
-        if (reroutes.length > 0) {
-            selectedDate  = prevEntry.date;
-            selectedTasks = reroutes;
-            isReroute     = true;
+    // 3. Analyze SPECIFIC Date (Live Analysis)
+    // We only fetch status for the ONE date we are looking at + context
+    if (selectedDate) {
+        // A. Fetch Context
+        const needsHistory = calcsInThisPass.some(c => c.isHistorical);
+        const earliestDates = await getEarliestDataDates(config, dependencies);
+        let prevDailyStatusPromise = Promise.resolve(null);
+        if (needsHistory) {
+            const prevD = new Date(selectedDate + 'T00:00:00Z');
+            prevD.setUTCDate(prevD.getUTCDate() - 1);
+            if (prevD >= earliestDates.absoluteEarliest) {
+                prevDailyStatusPromise = fetchComputationStatus(prevD.toISOString().slice(0, 10), config, dependencies);
+            }
         }
-    }
-    // Logic for standard cursor progression or Sweep (Recovery) mode
-    if (!selectedDate) {
-        if (targetCursorN <= dirtyDates.length) {
-            const entry   = dirtyDates[targetCursorN - 1];
-            selectedDate  = entry.date;
-            selectedTasks = entry.tasks;
-        } else if (dirtyDates.length > 0) {
-            // RECOVERY/SWEEP MODE:
-            // The cursor has passed the number of dirty dates, but tasks still remain.
-            // These tasks are automatically routed to 'high-mem' with recovery reasoning.
-            isSweep       = true;
-            selectedDate  = dirtyDates[0].date;
-            selectedTasks = dirtyDates[0].tasks.map(t => ({
-                ...t,
-                resources: 'high-mem',
-                reason: `${t.reason || 'Missed Computation'} [RECOVERY_AUTO_UPGRADE: Potential Silent OOM]`
-            }));
+        const [dailyStatus, prevDailyStatus, availability] = await Promise.all([
+            fetchComputationStatus(selectedDate, config, dependencies),
+            prevDailyStatusPromise,
+            checkRootDataAvailability(selectedDate, config, dependencies, DEFINITIVE_EARLIEST_DATES)
+        ]);
+        if (availability && availability.status.hasPrices) {
+            const report = analyzeDateExecution(selectedDate, calcsInThisPass, availability.status, dailyStatus, manifestMap, prevDailyStatus);
+            let rawTasks = [...report.runnable, ...report.reRuns];
+            // B. Apply SimHash Resolution (Problem #1)
+            if (rawTasks.length > 0) {
+                rawTasks = await attemptSimHashResolution(dependencies, selectedDate, rawTasks, dailyStatus, manifestMap);
+            }
+            // C. Apply Ledger Filter (Problem #2)
+            if (rawTasks.length > 0) {
+                selectedTasks = await filterActiveTasks(db, selectedDate, passToRun, rawTasks);
+            }
+            // D. Check for High-Mem Reroutes (OOM handling)
+            if (selectedTasks.length > 0) {
+                const reroutes = await getHighMemReroutes(db, selectedDate, passToRun, selectedTasks);
+                if (reroutes.length > 0) {
+                    selectedTasks = reroutes;
+                    isReroute = true;
+                }
+            }
+        } else {
+            logger.log('WARN', `[Dispatcher] Date ${selectedDate} skipped (Data Unavailable).`);
         }
     }
-    if (!selectedDate) {
-        logger.log('INFO', `[Dispatcher] 🏁 Pass ${passToRun} is fully satiated. No work remaining.`);
-        return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0, etaSeconds: 0 };
+    // 4. Dispatch Logic
+    if (selectedTasks.length === 0) {
+        // Nothing to do for this date.
+        // CRITICAL: We return dispatched: 0, but n_cursor_ignored: FALSE.
+        // This tells workflow to increment cursor and check the next date in the Stable Session.
+        return {
+            status: 'CONTINUE_PASS',
+            dateProcessed: selectedDate,
+            dispatched: 0,
+            n_cursor_ignored: false, // Proceed to next date
+            etaSeconds: 0,
+            remainingDates: sessionDates.length - targetCursorN
+        };
     }
-    const totalweight = selectedTasks.reduce((sum, t) => {
-        const weight = manifestWeightMap.get(normalizeName(t.name)) || 1.0;
-        return sum + weight;
-    }, 0);
-    // 2. Prepare Payload and Telemetry
+    // 5. Publish Tasks
+    const totalweight = selectedTasks.reduce((sum, t) => sum + (manifestWeightMap.get(normalizeName(t.name)) || 1.0), 0);
     const currentDispatchId = crypto.randomUUID();
     const etaSeconds = Math.max(20, Math.ceil(totalweight * BASE_SECONDS_PER_WEIGHT_UNIT));
-    const remainingDatesCount = Math.max(0, dirtyDates.length - targetCursorN);
-    // [UPDATED] Capture both name and reason for transparency
     const taskDetails = selectedTasks.map(t => `${t.name} (${t.reason})`);
-    logger.log('INFO', `[Dispatcher] ✅ Dispatching ${selectedTasks.length} tasks for ${selectedDate}. ETA: ${etaSeconds}s. [Mode: ${isSweep ? 'RECOVERY' : 'NORMAL'}]`, {
-        date:                 selectedDate,
-        pass:                 passToRun,
-        dispatchedCount:      selectedTasks.length,
-        remainingCursorDates: remainingDatesCount,
-        totalweight:          totalweight,
-        etaSeconds:           etaSeconds,
-        dispatchId:           currentDispatchId,
-        tasks:                taskDetails // [UPDATED] Now logs "calc-name (Reason)"
+    logger.log('INFO', `[Dispatcher] ✅ Dispatching ${selectedTasks.length} tasks for ${selectedDate}.`, {
+        date: selectedDate,
+        pass: passToRun,
+        dispatchedCount: selectedTasks.length,
+        cursor: targetCursorN,
+        etaSeconds: etaSeconds,
+        dispatchId: currentDispatchId,
+        tasks: taskDetails
     });
     const mapToTaskPayload = (t) => ({
@@ -150,7 +281,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
         date: selectedDate,
         pass: passToRun,
         dispatchId: currentDispatchId,
-        triggerReason: t.reason, // Already passed to worker
+        triggerReason: t.reason,
         resources: t.resources || 'standard'
     });
@@ -161,27 +292,49 @@ async function dispatchComputationPass(config, dependencies, computationManifest
     if (standardTasks.length > 0) {
         pubPromises.push(pubsubUtils.batchPublishTasks(dependencies, {
             topicName: config.computationTopicStandard || 'computation-tasks',
-            tasks    : standardTasks,
-            taskType : `pass-${passToRun}-std`
+            tasks: standardTasks,
+            taskType: `pass-${passToRun}-std`
         }));
     }
     if (highMemTasks.length > 0) {
         pubPromises.push(pubsubUtils.batchPublishTasks(dependencies, {
             topicName: config.computationTopicHighMem || 'computation-tasks-highmem',
-            tasks    : highMemTasks,
-            taskType : `pass-${passToRun}-high`
+            tasks: highMemTasks,
+            taskType: `pass-${passToRun}-high`
         }));
     }
     await Promise.all(pubPromises);
+    // CRITICAL: We dispatched work. We want to check THIS date again next time
+    // to ensure tasks completed. So we IGNORE cursor increment.
     return {
-        status          : isSweep ? 'RECOVERY' : 'CONTINUE_PASS',
-        dateProcessed   : selectedDate,
-        dispatched      : selectedTasks.length,
-        n_cursor_ignored: isReroute,
-        etaSeconds      : etaSeconds,
-        remainingDates  : remainingDatesCount
+        status: 'CONTINUE_PASS',
+        dateProcessed: selectedDate,
+        dispatched: selectedTasks.length,
+        n_cursor_ignored: true, // Hold cursor until this date is clean
+        etaSeconds: etaSeconds,
+        remainingDates: sessionDates.length - targetCursorN
     };
 }
+async function getHighMemReroutes(db, date, pass, tasks) {
+    const reroutes = [];
+    for (const task of tasks) {
+        const name = normalizeName(task.name);
+        const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${name}`;
+        const doc = await db.doc(ledgerPath).get();
+        if (doc.exists) {
+            const data = doc.data();
+            const isOOM = (data.status === 'FAILED' || data.status === 'CRASH') &&
+                          (data.resourceTier !== 'high-mem') &&
+                          ((data.peakMemoryMB > OOM_THRESHOLD_MB) || (data.error && /memory/i.test(data.error.message)));
+            if (isOOM) {
+                reroutes.push({ ...task, resources: 'high-mem' });
+            }
+        }
+    }
+    return reroutes;
+}
 module.exports = { dispatchComputationPass };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bulltrackers-module",
-  "version": "1.0.322",
+  "version": "1.0.324",
   "description": "Helper Functions for Bulltrackers.",
   "main": "index.js",
   "files": [