npm - bulltrackers-module - Versions diffs - 1.0.275 → 1.0.277 - Mend

bulltrackers-module 1.0.275 → 1.0.277

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/functions/computation-system/WorkflowOrchestrator.js CHANGED Viewed

@@ -1,6 +1,6 @@
 /**
  * @fileoverview Main Orchestrator. Coordinates the topological execution.
- * UPDATED: Removed 'Permanently Impossible' optimization to ensure full visibility/recovery.
+ * UPDATED: Includes Content-Based Dependency Short-Circuiting.
  * UPDATED: Includes 'Audit Upgrade' check.
  * UPDATED: Detailed Dependency Reporting for Impossible Chains.
  */
@@ -18,21 +18,41 @@ function groupByPass(manifest) {  return manifest.reduce((acc, calc) => { (acc[c
 /**
  * Analyzes whether calculations should run, be skipped, or are blocked.
+ * [NEW] Implements ResultHash short-circuit logic.
  */
 function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus = null) {
     const report           = { runnable: [], blocked: [], impossible: [], failedDependency: [], reRuns: [], skipped: [] };
     const simulationStatus = { ...dailyStatus };
     const isTargetToday    = (dateStr === new Date().toISOString().slice(0, 10));
-    const isDepSatisfied = (depName, currentStatusMap, manifestMap) => {
+    // Helper: Validates if a dependency is satisfied, either by Code Match OR Content Match
+    const isDepSatisfied = (depName, currentStatusMap, manifestMap, dependentStoredStatus) => {
         const norm        = normalizeName(depName);
-        const stored      = currentStatusMap[norm];
+        const storedDep   = currentStatusMap[norm];
         const depManifest = manifestMap.get(norm);
-        if (!stored)                           return false;
-        if (typeof stored.hash === 'string' && stored.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) return false;
-        if (!depManifest)                      return false;
-        if (stored.hash !== depManifest.hash)  return false;
-        return true;
+        // 1. Basic Existence Checks
+        if (!storedDep) return false;
+        if (typeof storedDep.hash === 'string' && storedDep.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) return false;
+        if (!depManifest) return false;
+        // 2. Code Hash Check (The Standard Check)
+        if (storedDep.hash === depManifest.hash) return true;
+        // 3. [NEW] Content-Based Short-Circuit Check
+        // If Code Hash mismatch, check if the *Result Hash* is identical to what we used last time.
+        // dependentStoredStatus = The status of the calculation (B) that depends on this (A).
+        // dependentStoredStatus.dependencyResultHashes[depName] = The ResultHash of A when B last ran.
+        // storedDep.resultHash = The current ResultHash of A.
+        if (dependentStoredStatus &&
+            dependentStoredStatus.dependencyResultHashes &&
+            dependentStoredStatus.dependencyResultHashes[depName] &&
+            storedDep.resultHash &&
+            storedDep.resultHash === dependentStoredStatus.dependencyResultHashes[depName]) {
+            return true; // Short-circuit: The output didn't change, so we are safe.
+        }
+        return false;
     };
     for (const calc of calcsInPass) {
@@ -42,6 +62,17 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
         const storedCategory = stored ? stored.category : null;
         const currentHash    = calc.hash;
+        // Collect current result hashes of dependencies for the next run
+        const currentDependencyResultHashes = {};
+        if (calc.dependencies) {
+            calc.dependencies.forEach(d => {
+                const normD = normalizeName(d);
+                if (simulationStatus[normD] && simulationStatus[normD].resultHash) {
+                    currentDependencyResultHashes[d] = simulationStatus[normD].resultHash;
+                }
+            });
+        }
         const markImpossible = (reason, type = 'GENERIC') => {
             report.impossible.push({ name: cName, reason });
             const statusHash = `${STATUS_IMPOSSIBLE_PREFIX}:${type}`;
@@ -49,25 +80,31 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
         };
         const markRunnable = (isReRun = false, reRunDetails = null) => {
-            if (isReRun) report.reRuns.push(reRunDetails);
-            else report.runnable.push({ name: cName, ...reRunDetails });
+            const payload = {
+                name: cName,
+                ...reRunDetails,
+                dependencyResultHashes: currentDependencyResultHashes // Pass forward
+            };
+            if (isReRun) report.reRuns.push(payload);
+            else report.runnable.push(payload);
             // Simulate success so dependents can pass their check
-            simulationStatus[cName] = { hash: currentHash, category: calc.category, composition: calc.composition };
+            simulationStatus[cName] = {
+                hash: currentHash,
+                resultHash: 'SIMULATED',
+                category: calc.category,
+                composition: calc.composition
+            };
         };
         let migrationOldCategory = null;
         if (storedCategory && storedCategory !== calc.category) { migrationOldCategory = storedCategory; }
-        // [REMOVED] The "Permanently Impossible" optimization block was here.
-        // Removal ensures we re-check Root Data every time, allowing for visibility and recovery.
-        // 1. Check Root Data (The Primary Gate)
+        // 1. Check Root Data
         const rootCheck = checkRootDependencies(calc, rootDataStatus);
         if (!rootCheck.canRun) {
             const missingStr = rootCheck.missing.join(', ');
             if (!isTargetToday) {
-                // If previously impossible, this confirms it. If previously run, this is a regression.
                 markImpossible(`Missing Root Data: ${missingStr} (Historical)`, 'NO_DATA');
             } else {
                 report.blocked.push({ name: cName, reason: `Missing Root Data: ${missingStr} (Waiting)` });
@@ -85,15 +122,15 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
                 const depStored = simulationStatus[normDep];
                 if (depStored && typeof depStored.hash === 'string' && depStored.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) {
                     dependencyIsImpossible = true;
-                    impossibleDepCause = dep; // Capture the culprit
+                    impossibleDepCause = dep;
                     break;
                 }
-                if (!isDepSatisfied(dep, simulationStatus, manifestMap)) { missingDeps.push(dep); }
+                // Pass 'stored' (this calc's status) to check short-circuiting
+                if (!isDepSatisfied(dep, simulationStatus, manifestMap, stored)) { missingDeps.push(dep); }
             }
         }
         if (dependencyIsImpossible) {
-            // [UPDATED] Include the name of the failing dependency in the reason string
             markImpossible(`Dependency is Impossible (${impossibleDepCause})`, 'UPSTREAM');
             continue;
         }
@@ -117,7 +154,12 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
             markRunnable(false, { reason: "New Calculation" });
         }
         else if (storedHash !== currentHash) {
-            // Smart Audit Logic
+            // [NEW] Check if Dependencies caused this, and if their content is actually same
+            // Note: If we are here, it means code changed.
+            // Short-circuiting logic was handled in 'isDepSatisfied' for upstream checks.
+            // But if *my* code changed, I must re-run unless I implement output-caching which is dangerous.
+            // So we strictly re-run if code changes.
             let changeReason = "Hash Mismatch (Unknown)";
             const oldComp = stored.composition;
             const newComp = calc.composition;
@@ -134,6 +176,11 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
                     changeReason = `Layer Update: [${changedLayers.join(', ')}]`;
                 }
                 else if (JSON.stringify(oldComp.deps) !== JSON.stringify(newComp.deps)) {
+                    // Dependency Hash Mismatch.
+                    // This is where we COULD have short-circuited if we weren't enforcing code-hash strictness here.
+                    // But typically if code hash mismatches, we re-run.
+                    // The "Short-Circuit" benefit is mainly that *dependents* of this calculation
+                    // won't need to re-run if *this* calculation produces the same output.
                     const changedDeps = [];
                     for(const dKey in newComp.deps) {
                         if (newComp.deps[dKey] !== oldComp.deps[dKey]) changedDeps.push(dKey);
@@ -158,7 +205,6 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
         else if (migrationOldCategory) {
             markRunnable(true, { name: cName, reason: 'Category Migration', previousCategory: migrationOldCategory, newCategory: calc.category });
         }
-        // Audit Upgrade Check
         else if (!stored.composition) {
             markRunnable(true, {
                 name: cName,
@@ -177,8 +223,9 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
 /**
  * DIRECT EXECUTION PIPELINE (For Workers)
+ * [UPDATED] Accepts dependencyResultHashes
  */
-async function executeDispatchTask(dateStr, pass, targetComputation, config, dependencies, computationManifest, previousCategory = null) {
+async function executeDispatchTask(dateStr, pass, targetComputation, config, dependencies, computationManifest, previousCategory = null, dependencyResultHashes = {}) {
     const { logger } = dependencies;
     const pid = generateProcessId(PROCESS_TYPES.EXECUTOR, targetComputation, dateStr);
@@ -187,6 +234,9 @@ async function executeDispatchTask(dateStr, pass, targetComputation, config, dep
     if (!calcManifest) { throw new Error(`Calculation '${targetComputation}' not found in manifest.`); }
+    // [NEW] Attach the dependency result hashes to the manifest so ResultCommitter can save them
+    calcManifest.dependencyResultHashes = dependencyResultHashes;
     if (previousCategory) {
         calcManifest.previousCategory = previousCategory;
         logger.log('INFO', `[Executor] Migration detected for ${calcManifest.name}. Old data will be cleaned from: ${previousCategory}`);

package/functions/computation-system/data/CachedDataLoader.js CHANGED Viewed

@@ -1,5 +1,6 @@
 /**
  * @fileoverview Execution-scoped data loader with caching.
+ * UPDATED: Handles Decompression of Shards.
  */
 const {
     loadDailyInsights,
@@ -7,6 +8,7 @@ const {
     getRelevantShardRefs,
     getPriceShardRefs
 } = require('../utils/data_loader');
+const zlib = require('zlib'); // [NEW]
 class CachedDataLoader {
     constructor(config, dependencies) {
@@ -19,6 +21,19 @@ class CachedDataLoader {
         };
     }
+    // [NEW] Decompression Helper
+    _tryDecompress(data) {
+        if (data && data._compressed === true && data.payload) {
+            try {
+                return JSON.parse(zlib.gunzipSync(data.payload).toString());
+            } catch (e) {
+                console.error('[CachedDataLoader] Decompression failed', e);
+                return {};
+            }
+        }
+        return data;
+    }
     async loadMappings() {
         if (this.cache.mappings) return this.cache.mappings;
         const { calculationUtils } = this.deps;
@@ -52,7 +67,8 @@ class CachedDataLoader {
         try {
             const snap = await docRef.get();
             if (!snap.exists) return {};
-            return snap.data();
+            // [UPDATED] Use decompression helper
+            return this._tryDecompress(snap.data());
         } catch (e) {
             console.error(`Error loading shard ${docRef.path}:`, e);
             return {};

package/functions/computation-system/data/DependencyFetcher.js CHANGED Viewed

@@ -1,7 +1,8 @@
 /**
- * @fileoverview Fetches results from previous computations, handling auto-sharding hydration.
+ * @fileoverview Fetches results from previous computations, handling auto-sharding and decompression.
  */
 const { normalizeName } = require('../utils/utils');
+const zlib = require('zlib'); // [NEW]
 async function fetchExistingResults(dateStr, calcsInPass, fullManifest, config, { db }, includeSelf = false) {
     const manifestMap = new Map(fullManifest.map(c => [normalizeName(c.name), c]));
@@ -39,7 +40,20 @@ async function fetchExistingResults(dateStr, calcsInPass, fullManifest, config,
             const name = names[i];
             if (!doc.exists) return;
             const data = doc.data();
-            if (data._sharded === true) {
+            // --- [NEW] DECOMPRESSION LOGIC ---
+            if (data._compressed === true && data.payload) {
+                try {
+                    // Firestore returns Buffers automatically
+                    const unzipped = zlib.gunzipSync(data.payload);
+                    fetched[name] = JSON.parse(unzipped.toString());
+                } catch (e) {
+                    console.error(`[Hydration] Failed to decompress ${name}:`, e);
+                    fetched[name] = {};
+                }
+            }
+            // --- END NEW LOGIC ---
+            else if (data._sharded === true) {
                 hydrationPromises.push(hydrateAutoShardedResult(doc.ref, name));
             } else if (data._completed) {
                 fetched[name] = data;

package/functions/computation-system/helpers/computation_dispatcher.js CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * FILENAME: computation-system/helpers/computation_dispatcher.js
  * PURPOSE: "Smart Dispatcher" - Analyzes state and only dispatches valid, runnable tasks.
- * UPDATED: Adds 'dispatchId' to payloads for precise tracing.
+ * UPDATED: Implements Zombie Task Recovery & Dependency Result Hash Passing.
  */
 const { getExpectedDateStrings, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
@@ -11,7 +11,7 @@ const { fetchComputationStatus, updateComputationStatus } = require('../persiste
 const { checkRootDataAvailability }             = require('../data/AvailabilityChecker');
 const { generateCodeHash }                      = require('../topology/HashManager');
 const pLimit                                    = require('p-limit');
-const crypto                                    = require('crypto'); // REQUIRED for UUID
+const crypto                                    = require('crypto');
 const TOPIC_NAME = 'computation-tasks';
 const STATUS_IMPOSSIBLE = 'IMPOSSIBLE';
@@ -105,18 +105,19 @@ async function dispatchComputationPass(config, dependencies, computationManifest
             const validToRun = [...report.runnable, ...report.reRuns];
             validToRun.forEach(item => {
-                // [NEW] Generate Unique ID
                 const uniqueDispatchId = crypto.randomUUID();
                 tasksToDispatch.push({
                     action: 'RUN_COMPUTATION_DATE',
-                    dispatchId: uniqueDispatchId, // <--- TRACKING ID
+                    dispatchId: uniqueDispatchId,
                     date: dateStr,
                     pass: passToRun,
                     computation: normalizeName(item.name),
                     hash: item.hash || item.newHash,
                     previousCategory: item.previousCategory || null,
-                    triggerReason: item.reason || "Unknown",
+                    triggerReason: item.reason || "Unknown",
+                    // [NEW] Pass Content-Based hashes provided by analyzeDateExecution
+                    dependencyResultHashes: item.dependencyResultHashes || {},
                     timestamp: Date.now()
                 });
             });
@@ -141,16 +142,24 @@ async function dispatchComputationPass(config, dependencies, computationManifest
                 await db.runTransaction(async (t) => {
                     const doc = await t.get(ledgerRef);
-                    // If task is PENDING, we assume it's running.
-                    // However, we now OVERWRITE if it's been pending for > 1 hour (stuck state)
-                    // For safety on your budget, we stick to strict "PENDING" check.
-                    if (doc.exists && doc.data().status === 'PENDING') {
-                        return false;
+                    // [NEW] Zombie Task Recovery Check
+                    if (doc.exists) {
+                        const data = doc.data();
+                        const now = Date.now();
+                        const isPending = data.status === 'PENDING';
+                        // A task is a zombie if it is PENDING and the lease has expired (or lease is missing but it's been > 1h)
+                        const isLeaseExpired = data.leaseExpiresAt && data.leaseExpiresAt < now;
+                        // Fallback: If no lease exists, assume 1 hour timeout for legacy zombie detection
+                        const isLegacyZombie = !data.leaseExpiresAt && data.createdAt && (now - data.createdAt.toMillis() > 3600000);
+                        if (isPending && !isLeaseExpired && !isLegacyZombie) {
+                            return false; // Valid active pending task, do not double dispatch
+                        }
                     }
                     t.set(ledgerRef, {
                         status: 'PENDING',
-                        dispatchId: task.dispatchId, // <--- Store ID in Ledger
+                        dispatchId: task.dispatchId,
                         computation: task.computation,
                         expectedHash: task.hash || 'unknown',
                         createdAt: new Date(),

package/functions/computation-system/helpers/computation_worker.js CHANGED Viewed

@@ -1,8 +1,7 @@
 /**
  * FILENAME: computation-system/helpers/computation_worker.js
  * PURPOSE: Consumes computation tasks from Pub/Sub.
- * UPDATED: Logs 'dispatchId' for tracing.
- * UPDATED: Includes Deterministic Error Short-Circuit (Poison Pill Protection).
+ * UPDATED: Implements Lease Claiming and passes Dependency Hashes.
  */
 const { executeDispatchTask } = require('../WorkflowOrchestrator.js');
@@ -14,7 +13,7 @@ let calculationPackage;
 try { calculationPackage = require('aiden-shared-calculations-unified');
 } catch (e) {console.error("FATAL: Could not load 'aiden-shared-calculations-unified'."); throw e; }
 const calculations = calculationPackage.calculations;
-const MAX_RETRIES = 0; // <--- CHANGED TO 0 (Application level check, though Pub/Sub config is better)
+const MAX_RETRIES = 0;
 async function handleComputationTask(message, config, dependencies) {
     const systemLogger = new StructuredLogger({ minLevel: config.minLevel || 'INFO', enableStructured: true, ...config });
@@ -31,17 +30,31 @@ async function handleComputationTask(message, config, dependencies) {
     if (!data || data.action !== 'RUN_COMPUTATION_DATE') { return; }
-    // Extract Trigger Reason and Dispatch ID
-    const { date, pass, computation, previousCategory, triggerReason, dispatchId } = data;
+    // Extract fields including new dependencyResultHashes
+    const { date, pass, computation, previousCategory, triggerReason, dispatchId, dependencyResultHashes } = data;
     if (!date || !pass || !computation) { logger.log('ERROR', `[Worker] Invalid payload.`, data); return; }
-    // LOG THE ID FOR TRACING
     logger.log('INFO', `[Worker] 📥 Received Task: ${computation} (${date})`, {
         dispatchId: dispatchId || 'legacy',
         reason: triggerReason
     });
+    // [NEW] LEASE CLAIMING
+    // Mark task as IN_PROGRESS and set a lease timeout (e.g., 20 minutes) to prevent Zombies
+    try {
+        const leaseTimeMs = (config.workerLeaseMinutes || 20) * 60 * 1000;
+        await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).set({
+            status: 'IN_PROGRESS',
+            workerId: process.env.K_REVISION || 'unknown',
+            startedAt: new Date(),
+            leaseExpiresAt: Date.now() + leaseTimeMs,
+            dispatchId: dispatchId
+        }, { merge: true });
+    } catch (leaseErr) {
+        logger.log('WARN', `[Worker] Failed to claim lease for ${computation}. Continuing anyway...`, leaseErr);
+    }
     let computationManifest;
     try { computationManifest = getManifest(config.activeProductLines || [], calculations, runDependencies);
     } catch (manifestError) {
@@ -59,7 +72,8 @@ async function handleComputationTask(message, config, dependencies) {
             config,
             runDependencies,
             computationManifest,
-            previousCategory
+            previousCategory,
+            dependencyResultHashes // [NEW] Pass hashes to executor
         );
         const duration = Date.now() - startTime;
@@ -79,6 +93,13 @@ async function handleComputationTask(message, config, dependencies) {
             const metrics      = successData.metrics || {};
             metrics.durationMs = duration;
             logger.log('INFO', `[Worker] ✅ Stored: ${computation}. ID: ${dispatchId}`);
+            // Mark Ledger as COMPLETED
+            await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).update({
+                status: 'COMPLETED',
+                completedAt: new Date()
+            }).catch(() => {});
             await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', null, metrics, triggerReason);
         }
         else {
@@ -86,7 +107,6 @@ async function handleComputationTask(message, config, dependencies) {
             await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', { message: 'Empty Result' }, { durationMs: duration }, triggerReason);
         }
     } catch (err) {
-        // --- DETERMINISTIC ERROR SHORT-CIRCUIT ---
         const isDeterministicError = err.stage === 'SHARDING_LIMIT_EXCEEDED' ||
                                      err.stage === 'QUALITY_CIRCUIT_BREAKER' ||
                                      (err.message && (err.message.includes('INVALID_ARGUMENT') || err.message.includes('Transaction too big')));
@@ -101,15 +121,12 @@ async function handleComputationTask(message, config, dependencies) {
                      finalAttemptAt: new Date(),
                      failureReason: 'PERMANENT_DETERMINISTIC_ERROR'
                  });
-                 // Return success to Pub/Sub to STOP retries
                  await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', { message: err.message, stage: err.stage || 'PERMANENT_FAIL' }, { durationMs: 0 }, triggerReason);
                  return;
              } catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
         }
-        // --- STANDARD RETRY ---
         const retryCount = message.deliveryAttempt || 0;
-        // NOTE: If you configure Pub/Sub Max Attempts = 1, this logic is redundant but safe.
         if (retryCount >= MAX_RETRIES) {
              logger.log('ERROR', `[Worker] ☠️ Task POISONED. Moved to DLQ: ${computation}`);
              try {

package/functions/computation-system/persistence/ResultCommitter.js CHANGED Viewed

@@ -1,26 +1,23 @@
 /**
  * @fileoverview Handles saving computation results with observability and Smart Cleanup.
- * UPDATED: Added "Strategy 4" (50 keys) to handle 'too many index entries' errors.
- * UPDATED: Supports Incremental (Flush) Commits to prevent OOM.
- * FIX: Throws proper Error objects.
+ * UPDATED: Implements GZIP Compression for efficient storage.
+ * UPDATED: Implements Content-Based Hashing (ResultHash) for dependency short-circuiting.
  */
-const { commitBatchInChunks }              = require('./FirestoreUtils');
+const { commitBatchInChunks, generateDataHash } = require('../utils/utils');
 const { updateComputationStatus }          = require('./StatusRepository');
 const { batchStoreSchemas }                = require('../utils/schema_capture');
 const { generateProcessId, PROCESS_TYPES } = require('../logger/logger');
 const { HeuristicValidator }               = require('./ResultsValidator');
 const validationOverrides                  = require('../config/validation_overrides');
 const pLimit                               = require('p-limit');
+const zlib                                 = require('zlib'); // [NEW] Compression Lib
 const NON_RETRYABLE_ERRORS = [
     'PERMISSION_DENIED',     'DATA_LOSS',             'FAILED_PRECONDITION'
-    // removed INVALID_ARGUMENT from here as it covers 'too many index entries' which IS retryable via sharding
 ];
 /**
  * Commits results to Firestore.
- * @param {Object} options.flushMode - 'STANDARD', 'INTERMEDIATE', 'FINAL'
- * @param {Object} options.shardIndexes - Map of { calcName: currentShardIndex }
  */
 async function commitResults(stateObj, dStr, passName, config, deps, skipStatusWrite = false, options = {}) {
     const successUpdates = {};
@@ -52,7 +49,7 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
             const result = await calc.getResult();
             const overrides   = validationOverrides[calc.manifest.name] || {};
-            // Only validate if we have data or if it's the final flush
+            // Validation
             if (result && Object.keys(result).length > 0) {
                 const healthCheck = HeuristicValidator.analyze(calc.manifest.name, result, overrides);
                 if (!healthCheck.valid) {
@@ -66,16 +63,20 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
             const isEmpty = !result || (typeof result === 'object' && Object.keys(result).length === 0);
-            // If empty and standard mode, record 0-byte success.
-            // If empty and INTERMEDIATE flush, just skip this calc for this flush.
+            // Calculate Result Hash (Content-Based)
+            const resultHash = isEmpty ? 'empty' : generateDataHash(result);
+            // Handle Empty Results
             if (isEmpty) {
                 if (flushMode === 'INTERMEDIATE') {
-                    nextShardIndexes[name] = currentShardIndex; // No change
+                    nextShardIndexes[name] = currentShardIndex;
                     continue;
                 }
                 if (calc.manifest.hash) {
                     successUpdates[name] = {
-                        hash:        calc.manifest.hash,
+                        hash:        calc.manifest.hash,
+                        resultHash:  resultHash,
+                        dependencyResultHashes: calc.manifest.dependencyResultHashes || {},
                         category:    calc.manifest.category,
                         composition: calc.manifest.composition,
                         metrics: runMetrics
@@ -90,7 +91,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
             const isMultiDate = resultKeys.length > 0 && resultKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k));
             if (isMultiDate) {
-                // Multi-Date Fan-Out Logic (Not optimized for incremental yet, falls back to standard per-date write)
                 const datePromises = resultKeys.map((historicalDate) => fanOutLimit(async () => {
                     const dailyData = result[historicalDate];
                     if (!dailyData || Object.keys(dailyData).length === 0) return;
@@ -102,7 +102,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
                         .collection(config.computationsSubcollection)
                         .doc(name);
-                    // For historical Fan-Out, we assume standard flush mode (not incremental) for now
                     await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, logger, config, deps, 0, 'STANDARD');
                 }));
                 await Promise.all(datePromises);
@@ -110,6 +109,8 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
                 if (calc.manifest.hash) {
                     successUpdates[name] = {
                         hash:        calc.manifest.hash,
+                        resultHash:  resultHash,
+                        dependencyResultHashes: calc.manifest.dependencyResultHashes || {},
                         category:    calc.manifest.category,
                         composition: calc.manifest.composition,
                         metrics: runMetrics
@@ -117,7 +118,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
                 }
             } else {
-                // --- STANDARD / INCREMENTAL MODE ---
                 const mainDocRef = db.collection(config.resultsCollection)
                     .doc(dStr)
                     .collection(config.resultsSubcollection)
@@ -131,12 +131,13 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
                 runMetrics.storage.isSharded  = writeStats.isSharded;
                 runMetrics.storage.shardCount = writeStats.shardCount;
-                // Track next index for subsequent flushes
                 nextShardIndexes[name] = writeStats.nextShardIndex;
                 if (calc.manifest.hash) {
                     successUpdates[name] = {
                         hash:        calc.manifest.hash,
+                        resultHash:  resultHash,
+                        dependencyResultHashes: calc.manifest.dependencyResultHashes || {},
                         category:    calc.manifest.category,
                         composition: calc.manifest.composition,
                         metrics: runMetrics
@@ -171,10 +172,44 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
 }
 async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD') {
-    // Strategy 1: Standard (900KB, no key limit)
-    // Strategy 2: Aggressive Bytes (450KB, 10k keys)
-    // Strategy 3: Very Aggressive (200KB, 2k keys)
-    // Strategy 4: [NEW] Index Explosion Protection (100KB, 50 keys) - Handles "too many index entries"
+    // --- [NEW] COMPRESSION STRATEGY ---
+    // Try to compress before falling back to complex sharding
+    try {
+        const jsonString = JSON.stringify(result);
+        const rawBuffer = Buffer.from(jsonString);
+        // Only attempt if meaningful size (> 50KB)
+        if (rawBuffer.length > 50 * 1024) {
+            const compressedBuffer = zlib.gzipSync(rawBuffer);
+            // If compressed fits in one document (< 900KB safety limit)
+            if (compressedBuffer.length < 900 * 1024) {
+                logger.log('INFO', `[Compression] ${name}: Compressed ${(rawBuffer.length/1024).toFixed(0)}KB -> ${(compressedBuffer.length/1024).toFixed(0)}KB. Saved as Blob.`);
+                const compressedPayload = {
+                    _compressed: true,
+                    _completed: true,
+                    _lastUpdated: new Date().toISOString(),
+                    payload: compressedBuffer
+                };
+                // Write immediately
+                await docRef.set(compressedPayload, { merge: true });
+                return {
+                    totalSize: compressedBuffer.length,
+                    isSharded: false,
+                    shardCount: 1,
+                    nextShardIndex: startShardIndex
+                };
+            }
+        }
+    } catch (compErr) {
+        logger.log('WARN', `[Compression] Failed to compress ${name}. Falling back to standard sharding.`, compErr);
+    }
+    // --- END COMPRESSION STRATEGY ---
     const strategies = [
         { bytes: 900 * 1024, keys: null },
         { bytes: 450 * 1024, keys: 10000 },
@@ -190,14 +225,10 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
         const constraints = strategies[attempt];
         try {
             const updates = await prepareAutoShardedWrites(result, docRef, logger, constraints.bytes, constraints.keys, startShardIndex, flushMode);
+            const pointer = updates.find(u => u.data._completed !== undefined || u.data._sharded !== undefined);
-            // Analyze the update batch
-            const pointer = updates.find(u => u.data._completed !== undefined || u.data._sharded !== undefined); // Pointer is on the main doc
-            // Calculate stats
             finalStats.totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
-            // Logic to determine next shard index
             let maxIndex = startShardIndex;
             updates.forEach(u => {
                 const segs = u.ref.path.split('/');
@@ -224,8 +255,6 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
             lastError = commitErr;
             const msg = commitErr.message || '';
             const code = commitErr.code || '';
-            // Check for explicit "too many index entries" or transaction size issues
             const isIndexError = msg.includes('too many index entries') || msg.includes('INVALID_ARGUMENT');
             const isSizeError  = msg.includes('Transaction too big') || msg.includes('payload is too large');
@@ -233,14 +262,11 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
                 logger.log('ERROR', `[SelfHealing] ${name} FATAL error: ${msg}.`);
                 throw commitErr;
             }
             if (isIndexError || isSizeError) {
                 logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} failed attempt ${attempt+1}/${strategies.length}. Strategy: ${JSON.stringify(constraints)}. Error: ${msg}. Retrying with stricter limits...`);
                 continue;
-            }
-            else {
+            } else {
                 logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} unknown error. Retrying...`, { error: msg });
-                // We typically retry same strategy for unknown transient errors, but here we iterate to be safe.
                 continue;
             }
         }
@@ -261,32 +287,28 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
     let currentChunk = {}; let currentChunkSize = 0; let currentKeyCount = 0;
     let shardIndex = startShardIndex;
-    // Small Data Optimization (Only valid if we are not in an incremental flow or if it's the first standard run)
     if (!maxKeys && (totalSize + docPathSize) < CHUNK_LIMIT && flushMode === 'STANDARD' && startShardIndex === 0) {
         const data = { ...result, _completed: true, _sharded: false, _lastUpdated: new Date().toISOString() };
         return [{ ref: docRef, data, options: { merge: true } }];
     }
-    // Sharding Logic
     for (const [key, value] of Object.entries(result)) {
         if (key.startsWith('_')) continue;
         const keySize = Buffer.byteLength(key, 'utf8') + 1; const valueSize = calculateFirestoreBytes(value); const itemSize = keySize + valueSize;
         const byteLimitReached = (currentChunkSize + itemSize > CHUNK_LIMIT); const keyLimitReached = (maxKeys && currentKeyCount + 1 >= maxKeys);
         if (byteLimitReached || keyLimitReached) {
-            writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } }); // Overwrite shard doc
+            writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } });
             shardIndex++; currentChunk = {}; currentChunkSize = 0; currentKeyCount = 0;
         }
         currentChunk[key] = value; currentChunkSize += itemSize; currentKeyCount++;
     }
-    // Push remaining chunk
     if (Object.keys(currentChunk).length > 0) {
         writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } });
-        shardIndex++; // Increment so count is correct (0-based index means count is index+1)
+        shardIndex++;
     }
-    // Pointer Logic
     if (flushMode !== 'INTERMEDIATE') {
         const pointerData = {
             _completed: true,
@@ -294,13 +316,12 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
             _shardCount: shardIndex,
             _lastUpdated: new Date().toISOString()
         };
-        writes.push({ ref: docRef, data: pointerData, options: { merge: true } }); // Merge pointer
+        writes.push({ ref: docRef, data: pointerData, options: { merge: true } });
     }
     return writes;
 }
-// ... (Rest of file: deleteOldCalculationData, calculateFirestoreBytes remains unchanged) ...
 async function deleteOldCalculationData(dateStr, oldCategory, calcName, config, deps) {
     const { db, logger, calculationUtils } = deps;
     const { withRetry } = calculationUtils || { withRetry: (fn) => fn() };

package/functions/computation-system/utils/data_loader.js CHANGED Viewed

@@ -4,7 +4,22 @@
  * --- NEW: Added streamPortfolioData async generator ---
  * --- FIXED: streamPortfolioData and streamHistoryData now accept optional 'providedRefs' ---
  * --- UPDATE: Added Smart Shard Indexing for specific ticker lookups ---
+ * --- UPDATE: Added GZIP Decompression Support for robust data loading ---
  */
+const zlib = require('zlib'); // [NEW]
+// [NEW] Helper for decompressing any doc if needed
+function tryDecompress(data) {
+    if (data && data._compressed === true && data.payload) {
+        try {
+            return JSON.parse(zlib.gunzipSync(data.payload).toString());
+        } catch (e) {
+            console.error('[DataLoader] Decompression failed', e);
+            return {};
+        }
+    }
+    return data;
+}
 /** --- Data Loader Sub-Pipes (Stateless, Dependency-Injection) --- */
@@ -39,7 +54,10 @@ async function loadDataByRefs(config, deps, refs) {
         const snapshots = await withRetry(() => db.getAll(...batchRefs), `getAll(batch ${Math.floor(i / batchSize)})`);
         for (const doc of snapshots) {
             if (!doc.exists) continue;
-            const data = doc.data();
+            const rawData = doc.data();
+            // [UPDATED] Decompress if needed
+            const data = tryDecompress(rawData);
             if (data && typeof data === 'object') Object.assign(mergedPortfolios, data);
             else logger.log('WARN', `Doc ${doc.id} exists but data is not an object`, data);
         }
@@ -68,7 +86,8 @@ async function loadDailyInsights(config, deps, dateString) {
         const docSnap = await withRetry(() => docRef.get(), `getInsights(${dateString})`);
         if (!docSnap.exists) { logger.log('WARN', `Insights not found for ${dateString}`); return null; }
         logger.log('TRACE', `Successfully loaded insights for ${dateString}`);
-        return docSnap.data();
+        // [UPDATED] Decompress
+        return tryDecompress(docSnap.data());
     } catch (error) {
         logger.log('ERROR', `Failed to load daily insights for ${dateString}`, { errorMessage: error.message });
         return null;
@@ -86,7 +105,10 @@ async function loadDailySocialPostInsights(config, deps, dateString) {
         const querySnapshot = await withRetry(() => postsCollectionRef.get(), `getSocialPosts(${dateString})`);
         if (querySnapshot.empty) { logger.log('WARN', `No social post insights for ${dateString}`); return null; }
         const postsMap = {};
-        querySnapshot.forEach(doc => { postsMap[doc.id] = doc.data(); });
+        querySnapshot.forEach(doc => {
+            // [UPDATED] Decompress individual posts if needed
+            postsMap[doc.id] = tryDecompress(doc.data());
+        });
         logger.log('TRACE', `Loaded ${Object.keys(postsMap).length} social post insights`);
         return postsMap;
     } catch (error) {
@@ -168,12 +190,6 @@ async function getPriceShardRefs(config, deps) {
  * when only specific tickers are needed.
  */
-/**
- * Ensures the Price Shard Index exists. If not, builds it by scanning all shards.
- * @param {object} config
- * @param {object} deps
- * @returns {Promise<Object>} The lookup map { "instrumentId": "shardDocId" }
- */
 /**
  * Ensures the Price Shard Index exists. If not, builds it by scanning all shards.
  * [FIX] Added TTL check to ensure new instruments are discovered.
@@ -205,7 +221,10 @@ async function ensurePriceShardIndex(config, deps) {
     snapshot.forEach(doc => {
         shardCount++;
-        const data = doc.data();
+        // [UPDATED] Robustly handle compressed shards during indexing
+        const rawData = doc.data();
+        const data = tryDecompress(rawData);
         if (data.history) {
             Object.keys(data.history).forEach(instId => {
                 index[instId] = doc.id;
@@ -273,4 +292,4 @@ module.exports = {
     getPriceShardRefs,
     ensurePriceShardIndex,
     getRelevantShardRefs
-};
+};

package/functions/computation-system/utils/utils.js CHANGED Viewed

@@ -28,6 +28,34 @@ function generateCodeHash(codeString) {
     return crypto.createHash('sha256').update(clean).digest('hex');
 }
+/**
+ * [NEW] Generates a stable SHA-256 hash of a data object.
+ * Keys are sorted to ensure determinism.
+ */
+function generateDataHash(data) {
+    if (data === undefined) return 'undefined';
+    // Recursive stable stringify
+    const stableStringify = (obj) => {
+        if (typeof obj !== 'object' || obj === null) {
+            return JSON.stringify(obj);
+        }
+        if (Array.isArray(obj)) {
+            return '[' + obj.map(stableStringify).join(',') + ']';
+        }
+        return '{' + Object.keys(obj).sort().map(k =>
+            JSON.stringify(k) + ':' + stableStringify(obj[k])
+        ).join(',') + '}';
+    };
+    try {
+        const str = stableStringify(data);
+        return crypto.createHash('sha256').update(str).digest('hex');
+    } catch (e) {
+        return 'hash_error';
+    }
+}
 /**
  * Executes a function with exponential backoff retry logic.
  * @param {Function} fn - Async function to execute
@@ -49,7 +77,7 @@ async function withRetry(fn, operationName, maxRetries = 3) {
     }
 }
-/** * Stage 2: Commit a batch of writes in chunks
+/** Stage 2: Commit a batch of writes in chunks
  * FIXED: Now respects write.options (e.g. { merge: false }) to allow overwrites/deletes.
  */
 async function commitBatchInChunks(config, deps, writes, operationName) {
@@ -220,6 +248,7 @@ module.exports = {
     getExpectedDateStrings,
     getEarliestDataDates,
     generateCodeHash,
+    generateDataHash, // Exported
     withRetry,
     DEFINITIVE_EARLIEST_DATES
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bulltrackers-module",
-  "version": "1.0.275",
+  "version": "1.0.277",
   "description": "Helper Functions for Bulltrackers.",
   "main": "index.js",
   "files": [