npm - bulltrackers-module - Versions diffs - 1.0.306 → 1.0.308 - Mend

bulltrackers-module 1.0.306 → 1.0.308

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/functions/computation-system/WorkflowOrchestrator.js +100 -212
package/functions/computation-system/helpers/computation_worker.js +56 -267
package/functions/computation-system/utils/utils.js +54 -171
package/package.json +1 -1
package/functions/computation-system/features.md +0 -395
package/functions/computation-system/paper.md +0 -93

package/functions/computation-system/WorkflowOrchestrator.js CHANGED Viewed

@@ -1,277 +1,165 @@
 /**
- * @fileoverview Main Orchestrator. Coordinates the topological execution.
- * UPDATED: Includes Content-Based Dependency Short-Circuiting.
- * UPDATED: Includes 'Audit Upgrade' check.
- * UPDATED: Detailed Dependency Reporting for Impossible Chains.
+ * FILENAME: computation-system/WorkflowOrchestrator.js
+ * UPDATED: Added missing groupByPass export.
+ * Includes Content-Based Short-Circuiting for both Upstream and Historical dependencies.
  */
 const { normalizeName, DEFINITIVE_EARLIEST_DATES }         = require('./utils/utils');
 const { checkRootDataAvailability, checkRootDependencies } = require('./data/AvailabilityChecker');
 const { fetchExistingResults }                             = require('./data/DependencyFetcher');
 const { fetchComputationStatus, updateComputationStatus }  = require('./persistence/StatusRepository');
 const { StandardExecutor }                                 = require('./executors/StandardExecutor');
 const { MetaExecutor }                                     = require('./executors/MetaExecutor');
-const { generateProcessId, PROCESS_TYPES }                 = require('./logger/logger');
 const STATUS_IMPOSSIBLE_PREFIX = 'IMPOSSIBLE';
-function groupByPass(manifest) {  return manifest.reduce((acc, calc) => { (acc[calc.pass] = acc[calc.pass] || []).push(calc);  return acc;  }, {}); }
 /**
- * Analyzes whether calculations should run, be skipped, or are blocked.
- * [NEW] Implements ResultHash short-circuit logic.
+ * [FIX] Groups manifest entries by their pass number.
+ * Required by the Dispatcher to identify current work-sets.
  */
-function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus = null) {
-    const report           = { runnable: [], blocked: [], impossible: [], failedDependency: [], reRuns: [], skipped: [] };
-    const simulationStatus = { ...dailyStatus };
-    const isTargetToday    = (dateStr === new Date().toISOString().slice(0, 10));
-    // Helper: Validates if a dependency is satisfied, either by Code Match OR Content Match
-    const isDepSatisfied = (depName, currentStatusMap, manifestMap, dependentStoredStatus) => {
-        const norm        = normalizeName(depName);
-        const storedDep   = currentStatusMap[norm];
-        const depManifest = manifestMap.get(norm);
-        // 1. Basic Existence Checks
-        if (!storedDep) return false;
-        if (typeof storedDep.hash === 'string' && storedDep.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) return false;
-        if (!depManifest) return false;
+function groupByPass(manifest) {
+    const passes = {};
+    manifest.forEach(calc => {
+        if (!passes[calc.pass]) passes[calc.pass] = [];
+        passes[calc.pass].push(calc);
+    });
+    return passes;
+}
-        // 2. Code Hash Check (The Standard Check)
-        if (storedDep.hash === depManifest.hash) return true;
+/**
+ * [NEW] Core Short-Circuit Logic.
+ * Checks if a dependency (either a different node or "yesterday's self") is satisfied.
+ */
+function isDependencyReady(depName, isHistoricalSelf, currentStatusMap, prevStatusMap, manifestMap, storedStatus) {
+    const norm = normalizeName(depName);
+    const targetStatus = isHistoricalSelf ? (prevStatusMap ? prevStatusMap[norm] : null) : currentStatusMap[norm];
+    const depManifest = manifestMap.get(norm);
+    if (!targetStatus) return { ready: false, reason: 'Missing' };
+    if (String(targetStatus.hash).startsWith(STATUS_IMPOSSIBLE_PREFIX)) return { ready: false, reason: 'Impossible Upstream' };
+    // 1. Code Hash Match (Strict)
+    if (targetStatus.hash === depManifest.hash) return { ready: true };
+    // 2. Content Hash Match (Short-Circuit)
+    // If our code didn't change, check if the output of the dependency is what we expect.
+    const lastSeenResultHash = storedStatus?.dependencyResultHashes?.[depName];
+    if (lastSeenResultHash && targetStatus.resultHash === lastSeenResultHash) {
+        return { ready: true, shortCircuited: true };
+    }
-        // 3. [NEW] Content-Based Short-Circuit Check
-        // If Code Hash mismatch, check if the *Result Hash* is identical to what we used last time.
-        // dependentStoredStatus = The status of the calculation (B) that depends on this (A).
-        // dependentStoredStatus.dependencyResultHashes[depName] = The ResultHash of A when B last ran.
-        // storedDep.resultHash = The current ResultHash of A.
-        if (dependentStoredStatus &&
-            dependentStoredStatus.dependencyResultHashes &&
-            dependentStoredStatus.dependencyResultHashes[depName] &&
-            storedDep.resultHash &&
-            storedDep.resultHash === dependentStoredStatus.dependencyResultHashes[depName]) {
-            return true; // Short-circuit: The output didn't change, so we are safe.
-        }
+    return { ready: false, reason: 'Hash Mismatch' };
+}
-        return false;
-    };
+function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus = null) {
+    const report = { runnable: [], blocked: [], impossible: [], failedDependency: [], reRuns: [], skipped: [] };
+    const simulationStatus = { ...dailyStatus };
     for (const calc of calcsInPass) {
-        const cName          = normalizeName(calc.name);
-        const stored         = simulationStatus[cName];
-        const storedHash     = stored ? stored.hash : null;
-        const storedCategory = stored ? stored.category : null;
-        const currentHash    = calc.hash;
-        // Collect current result hashes of dependencies for the next run
-        const currentDependencyResultHashes = {};
-        if (calc.dependencies) {
-            calc.dependencies.forEach(d => {
-                const normD = normalizeName(d);
-                if (simulationStatus[normD] && simulationStatus[normD].resultHash) {
-                    currentDependencyResultHashes[d] = simulationStatus[normD].resultHash;
-                }
-            });
-        }
-        const markImpossible = (reason, type = 'GENERIC') => {
-            report.impossible.push({ name: cName, reason });
-            const statusHash = `${STATUS_IMPOSSIBLE_PREFIX}:${type}`;
-            simulationStatus[cName] = { hash: statusHash, category: calc.category };
-        };
+        const cName = normalizeName(calc.name);
+        const stored = simulationStatus[cName];
+        const currentHash = calc.hash;
-        const markRunnable = (isReRun = false, reRunDetails = null) => {
-            const payload = {
-                name: cName,
-                ...reRunDetails,
-                dependencyResultHashes: currentDependencyResultHashes // Pass forward
-            };
-            if (isReRun) report.reRuns.push(payload);
-            else report.runnable.push(payload);
-            // Simulate success so dependents can pass their check
-            simulationStatus[cName] = {
-                hash: currentHash,
-                resultHash: 'SIMULATED',
-                category: calc.category,
-                composition: calc.composition
-            };
-        };
-        let migrationOldCategory = null;
-        if (storedCategory && storedCategory !== calc.category) { migrationOldCategory = storedCategory; }
-        // 1. Check Root Data
+        // 1. Root Data Check
         const rootCheck = checkRootDependencies(calc, rootDataStatus);
         if (!rootCheck.canRun) {
-            const missingStr = rootCheck.missing.join(', ');
-            if (!isTargetToday) {
-                markImpossible(`Missing Root Data: ${missingStr} (Historical)`, 'NO_DATA');
+            if (dateStr !== new Date().toISOString().slice(0, 10)) {
+                report.impossible.push({ name: cName, reason: `Missing Root: ${rootCheck.missing.join(', ')}` });
+                simulationStatus[cName] = { hash: `${STATUS_IMPOSSIBLE_PREFIX}:NO_DATA` };
             } else {
-                report.blocked.push({ name: cName, reason: `Missing Root Data: ${missingStr} (Waiting)` });
+                report.blocked.push({ name: cName, reason: `Waiting for Root Data` });
             }
             continue;
         }
-        // 2. Check Dependencies
-        let dependencyIsImpossible = false;
-        let impossibleDepCause = null;
+        // 2. Dependency & Temporal Check
         const missingDeps = [];
+        let isBlockedByHistory = false;
+        // A. Standard Upstream Dependencies
         if (calc.dependencies) {
             for (const dep of calc.dependencies) {
-                const normDep   = normalizeName(dep);
-                const depStored = simulationStatus[normDep];
-                if (depStored && typeof depStored.hash === 'string' && depStored.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) {
-                    dependencyIsImpossible = true;
-                    impossibleDepCause = dep;
-                    break;
-                }
-                // Pass 'stored' (this calc's status) to check short-circuiting
-                if (!isDepSatisfied(dep, simulationStatus, manifestMap, stored)) { missingDeps.push(dep); }
+                const check = isDependencyReady(dep, false, simulationStatus, null, manifestMap, stored);
+                if (!check.ready) missingDeps.push(dep);
             }
         }
-        if (dependencyIsImpossible) {
-            markImpossible(`Dependency is Impossible (${impossibleDepCause})`, 'UPSTREAM');
-            continue;
-        }
-        if (missingDeps.length > 0) { report.failedDependency.push({ name: cName, missing: missingDeps }); continue; }
-        // 3. Check Historical Continuity
-        if (calc.isHistorical && prevDailyStatus) {
+        // B. [UPGRADED] Temporal Dependency (Yesterday's Self)
+        if (calc.isHistorical) {
             const yesterday = new Date(dateStr + 'T00:00:00Z');
             yesterday.setUTCDate(yesterday.getUTCDate() - 1);
+            // Only block if yesterday is a valid data date.
             if (yesterday >= DEFINITIVE_EARLIEST_DATES.absoluteEarliest) {
-                const prevStored = prevDailyStatus[cName];
-                if (!prevStored || prevStored.hash !== currentHash) {
-                    report.blocked.push({ name: cName, reason: `Waiting for historical continuity (Yesterday ${!prevStored ? 'Missing' : 'Hash Mismatch'})` });
-                    continue;
-                }
+                const check = isDependencyReady(calc.name, true, null, prevDailyStatus, manifestMap, stored);
+                if (!check.ready) isBlockedByHistory = true;
             }
         }
-        // 4. Check Hash / Composition (The Audit Gate)
-        if (!storedHash) {
-            markRunnable(false, { reason: "New Calculation" });
-        }
-        else if (storedHash !== currentHash) {
-            // [NEW] Check if Dependencies caused this, and if their content is actually same
-            // Note: If we are here, it means code changed.
-            // Short-circuiting logic was handled in 'isDepSatisfied' for upstream checks.
-            // But if *my* code changed, I must re-run unless I implement output-caching which is dangerous.
-            // So we strictly re-run if code changes.
-            let changeReason = "Hash Mismatch (Unknown)";
-            const oldComp = stored.composition;
-            const newComp = calc.composition;
-            if (oldComp && newComp) {
-                if (oldComp.code !== newComp.code) {
-                    changeReason = "Code Changed";
-                }
-                else if (JSON.stringify(oldComp.layers) !== JSON.stringify(newComp.layers)) {
-                    const changedLayers = [];
-                    for(const lKey in newComp.layers) {
-                        if (newComp.layers[lKey] !== oldComp.layers[lKey]) changedLayers.push(lKey);
-                    }
-                    changeReason = `Layer Update: [${changedLayers.join(', ')}]`;
-                }
-                else if (JSON.stringify(oldComp.deps) !== JSON.stringify(newComp.deps)) {
-                    // Dependency Hash Mismatch.
-                    // This is where we COULD have short-circuited if we weren't enforcing code-hash strictness here.
-                    // But typically if code hash mismatches, we re-run.
-                    // The "Short-Circuit" benefit is mainly that *dependents* of this calculation
-                    // won't need to re-run if *this* calculation produces the same output.
-                    const changedDeps = [];
-                    for(const dKey in newComp.deps) {
-                        if (newComp.deps[dKey] !== oldComp.deps[dKey]) changedDeps.push(dKey);
-                    }
-                    changeReason = `Upstream Change: [${changedDeps.join(', ')}]`;
-                }
-                else {
-                    changeReason = "Logic/Epoch Change";
-                }
+        if (missingDeps.length > 0) {
+            const isImpossible = missingDeps.some(d => simulationStatus[normalizeName(d)]?.hash?.startsWith(STATUS_IMPOSSIBLE_PREFIX));
+            if (isImpossible) {
+                report.impossible.push({ name: cName, reason: 'Upstream Impossible' });
+                simulationStatus[cName] = { hash: `${STATUS_IMPOSSIBLE_PREFIX}:UPSTREAM` };
             } else {
-                changeReason = "Hash Mismatch (No prior composition)";
+                report.failedDependency.push({ name: cName, missing: missingDeps });
             }
+            continue;
+        }
-            markRunnable(true, {
-                name: cName,
-                oldHash: storedHash,
-                newHash: currentHash,
-                previousCategory: migrationOldCategory,
-                reason: changeReason
-            });
-        }
-        else if (migrationOldCategory) {
-            markRunnable(true, { name: cName, reason: 'Category Migration', previousCategory: migrationOldCategory, newCategory: calc.category });
-        }
-        else if (!stored.composition) {
-            markRunnable(true, {
-                name: cName,
-                oldHash: storedHash,
-                newHash: currentHash,
-                reason: 'Audit Upgrade (Populating Composition Metadata)'
+        if (isBlockedByHistory) {
+            report.blocked.push({ name: cName, reason: 'Waiting for Yesterday' });
+            continue;
+        }
+        // 3. Runnable / Skip Logic
+        const currentDependencyResultHashes = {};
+        if (calc.dependencies) {
+            calc.dependencies.forEach(d => {
+                const resHash = simulationStatus[normalizeName(d)]?.resultHash;
+                if (resHash) currentDependencyResultHashes[d] = resHash;
             });
         }
-        else {
-            report.skipped.push({ name: cName, reason: "Up To Date" });
-            simulationStatus[cName] = { hash: currentHash, category: calc.category, composition: calc.composition };
+        const taskPayload = { name: cName, dependencyResultHashes: currentDependencyResultHashes };
+        if (!stored?.hash) {
+            report.runnable.push({ ...taskPayload, reason: "New Calculation" });
+            simulationStatus[cName] = { hash: currentHash, resultHash: 'SIMULATED' };
+        } else if (stored.hash !== currentHash) {
+            report.reRuns.push({ ...taskPayload, oldHash: stored.hash, newHash: currentHash, reason: "Hash Mismatch" });
+            simulationStatus[cName] = { hash: currentHash, resultHash: 'SIMULATED' };
+        } else {
+            report.skipped.push({ name: cName, reason: "Up To Date" });
         }
     }
     return report;
 }
-/**
- * DIRECT EXECUTION PIPELINE (For Workers)
- * [UPDATED] Accepts dependencyResultHashes
- */
 async function executeDispatchTask(dateStr, pass, targetComputation, config, dependencies, computationManifest, previousCategory = null, dependencyResultHashes = {}) {
     const { logger } = dependencies;
-    const pid = generateProcessId(PROCESS_TYPES.EXECUTOR, targetComputation, dateStr);
     const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
     const calcManifest = manifestMap.get(normalizeName(targetComputation));
-    if (!calcManifest) { throw new Error(`Calculation '${targetComputation}' not found in manifest.`); }
-    // [NEW] Attach the dependency result hashes to the manifest so ResultCommitter can save them
+    if (!calcManifest) throw new Error(`Calc '${targetComputation}' not found.`);
     calcManifest.dependencyResultHashes = dependencyResultHashes;
-    if (previousCategory) {
-        calcManifest.previousCategory = previousCategory;
-        logger.log('INFO', `[Executor] Migration detected for ${calcManifest.name}. Old data will be cleaned from: ${previousCategory}`);
-    }
     const rootData = await checkRootDataAvailability(dateStr, config, dependencies, DEFINITIVE_EARLIEST_DATES);
-    if (!rootData) {
-        logger.log('ERROR', `[Executor] FATAL: Root data check failed for ${targetComputation} on ${dateStr}.`);
-        return;
-    }
     const calcsToRun = [calcManifest];
-    const existingResults = await fetchExistingResults(dateStr, calcsToRun, computationManifest, config, dependencies, false);
+    const existingResults = await fetchExistingResults(dateStr, calcsToRun, computationManifest, config, dependencies, false);
     let previousResults = {};
     if (calcManifest.isHistorical) {
-        const prevDate = new Date(dateStr + 'T00:00:00Z');
-        prevDate.setUTCDate(prevDate.getUTCDate() - 1);
-        const prevDateStr = prevDate.toISOString().slice(0, 10);
-        previousResults = await fetchExistingResults(prevDateStr, calcsToRun, computationManifest, config, dependencies, true);
+        const prev = new Date(dateStr + 'T00:00:00Z'); prev.setUTCDate(prev.getUTCDate() - 1);
+        previousResults = await fetchExistingResults(prev.toISOString().slice(0, 10), calcsToRun, computationManifest, config, dependencies, true);
     }
-    logger.log('INFO', `[Executor] Running ${calcManifest.name} for ${dateStr}`, { processId: pid });
-    let resultUpdates = {};
+    const execDate = new Date(dateStr + 'T00:00:00Z');
+    const updates = (calcManifest.type === 'standard')
+        ? await StandardExecutor.run(execDate, calcsToRun, `Pass ${pass}`, config, dependencies, rootData, existingResults, previousResults)
+        : await MetaExecutor.run(execDate, calcsToRun, `Pass ${pass}`, config, dependencies, existingResults, previousResults, rootData);
-    try {
-        if (calcManifest.type === 'standard')    { resultUpdates = await StandardExecutor.run(new Date(dateStr + 'T00:00:00Z'), [calcManifest], `Pass ${pass}`, config, dependencies, rootData, existingResults, previousResults);
-        } else if (calcManifest.type === 'meta') { resultUpdates = await MetaExecutor.run    (new Date(dateStr + 'T00:00:00Z'), [calcManifest], `Pass ${pass}`, config, dependencies, existingResults, previousResults, rootData);
-        }
-        logger.log('INFO', `[Executor] Success: ${calcManifest.name} for ${dateStr}`);
-        return { date: dateStr, updates: resultUpdates };
-    } catch (err) {
-        logger.log('ERROR', `[Executor] Failed ${calcManifest.name}: ${err.message}`, { processId: pid, stack: err.stack });
-        throw err;
-    }
+    return { date: dateStr, updates };
 }
-module.exports = { executeDispatchTask, groupByPass, analyzeDateExecution };
+module.exports = { executeDispatchTask, analyzeDateExecution, groupByPass };