npm - bulltrackers-module - Versions diffs - 1.0.280 → 1.0.281 - Mend

bulltrackers-module 1.0.280 → 1.0.281

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/functions/computation-system/executors/MetaExecutor.js CHANGED Viewed

@@ -2,7 +2,7 @@
  * @fileoverview Executor for "Meta" (global) calculations.
  * UPDATED: Uses CachedDataLoader for all data access.
  * UPDATED: Tracks processed shard/item counts.
- * UPDATED: Removed global.gc() calls.
+ * UPDATED: Sends 'isInitialWrite: true' for robust cleanup.
  */
 const { normalizeName }    = require('../utils/utils');
 const { CachedDataLoader } = require('../data/CachedDataLoader');
@@ -27,7 +27,9 @@ class MetaExecutor {
                 deps.logger.log('ERROR', `Meta calc failed ${mCalc.name}: ${e.message}`);
             }
         }
-        return await commitResults(state, dStr, passName, config, deps, skipStatusWrite);
+        // [UPDATED] Meta Calcs run once per day, so isInitialWrite is always true
+        return await commitResults(state, dStr, passName, config, deps, skipStatusWrite, { isInitialWrite: true });
     }
     static async executeOncePerDay(calcInstance, metadata, dateStr, computedDeps, prevDeps, config, deps, loader) {
@@ -35,7 +37,6 @@ class MetaExecutor {
         const { logger } = deps;
         const stats = { processedShards: 0, processedItems: 0 };
-        // Lazy fetch insights/social using the loader
         const insights   = metadata.rootDataDependencies?.includes('insights') ? { today: await loader.loadInsights(dateStr) } : null;
         const social     = metadata.rootDataDependencies?.includes('social')   ? { today: await loader.loadSocial(dateStr)   } : null;
@@ -59,12 +60,9 @@ class MetaExecutor {
                 stats.processedShards++;
                 stats.processedItems += Object.keys(shardData).length;
-                // Removed global.gc()
             }
             logger.log('INFO', `[Executor] Finished Batched Execution for ${metadata.name} (${processedCount} shards).`);
-            // Attach stats
             calcInstance._executionStats = stats;
             return calcInstance.getResult ? await calcInstance.getResult() : {};
         } else {
@@ -75,7 +73,7 @@ class MetaExecutor {
             });
             const res = await calcInstance.process(context);
-            stats.processedItems = 1; // "Global" item
+            stats.processedItems = 1;
             calcInstance._executionStats = stats;
             return res;
@@ -83,4 +81,4 @@ class MetaExecutor {
     }
 }
-module.exports = { MetaExecutor };
+module.exports = { MetaExecutor };

package/functions/computation-system/executors/StandardExecutor.js CHANGED Viewed

@@ -3,7 +3,7 @@
  * UPDATED: Implements Batch Flushing to prevent OOM on large datasets.
  * UPDATED: Removes manual global.gc() calls.
  * UPDATED: Manages incremental sharding states.
- * UPDATED (IDEA 2): Implemented Computation Profiler (timings).
+ * UPDATED: Implements 'isInitialWrite' flag for robust cleanup.
  */
 const { normalizeName }                                                = require('../utils/utils');
 const { streamPortfolioData, streamHistoryData, getPortfolioPartRefs } = require('../utils/data_loader');
@@ -42,7 +42,6 @@ class StandardExecutor {
         }
         // 3. Stream, Process & Batch Flush
-        // The return value contains the aggregated success/failure reports from all flushes
         return await StandardExecutor.streamAndProcess(dStr, state, passName, config, deps, fullRoot, rootData.portfolioRefs, rootData.historyRefs, fetchedDeps, previousFetchedDeps, skipStatusWrite);
     }
@@ -55,29 +54,28 @@ class StandardExecutor {
         logger.log('INFO', `[${passName}] Streaming for ${streamingCalcs.length} computations...`);
-        // [IDEA 2] Metrics & State Tracking
         const executionStats = {};
-        const shardIndexMap  = {}; // Tracks sharding offsets per calculation
+        const shardIndexMap  = {};
         const aggregatedSuccess = {};
         const aggregatedFailures = [];
-        // Initialize Timing Stats per calculation
         Object.keys(state).forEach(name => {
             executionStats[name] = {
                 processedUsers: 0,
                 skippedUsers: 0,
-                timings: { setup: 0, stream: 0, processing: 0 } // New
+                timings: { setup: 0, stream: 0, processing: 0 }
             };
             shardIndexMap[name]  = 0;
         });
-        // [IDEA 2] Measure Setup Time
+        // Track if we have performed a flush yet (for cleanup logic)
+        let hasFlushed = false;
         const startSetup = performance.now();
         const cachedLoader = new CachedDataLoader(config, deps);
         await cachedLoader.loadMappings();
         const setupDuration = performance.now() - startSetup;
-        // Distribute setup time
         Object.keys(executionStats).forEach(name => executionStats[name].timings.setup += setupDuration);
         const prevDate    = new Date(dateStr + 'T00:00:00Z'); prevDate.setUTCDate(prevDate.getUTCDate() - 1);
@@ -91,14 +89,11 @@ class StandardExecutor {
         let yP_chunk = {}, tH_chunk = {};
-        // OOM Protection: Batch Flushing Configuration
-        const BATCH_SIZE = 5000; // Flush every 5000 users
+        const BATCH_SIZE = 5000;
         let usersSinceLastFlush = 0;
         try {
-            // [IDEA 2] Loop wrapper for profiling
             for await (const tP_chunk of tP_iter) {
-                // [IDEA 2] Measure Streaming Time (Gap between processing chunks)
                 const startStream = performance.now();
                 if (yP_iter) yP_chunk = (await yP_iter.next()).value || {};
                 if (tH_iter) tH_chunk = (await tH_iter.next()).value || {};
@@ -107,7 +102,6 @@ class StandardExecutor {
                 const chunkSize = Object.keys(tP_chunk).length;
-                // [IDEA 2] Measure Processing Time
                 const startProcessing = performance.now();
                 const promises = streamingCalcs.map(calc =>
                     StandardExecutor.executePerUser(
@@ -119,20 +113,18 @@ class StandardExecutor {
                 await Promise.all(promises);
                 const procDuration = performance.now() - startProcessing;
-                // Assign processing time (Note: Parallel execution means total wall time is shared)
                 Object.keys(executionStats).forEach(name => executionStats[name].timings.processing += procDuration);
                 usersSinceLastFlush += chunkSize;
-                // --- BATCH FLUSH CHECK ---
                 if (usersSinceLastFlush >= BATCH_SIZE) {
                     logger.log('INFO', `[${passName}] 🛁 Flushing buffer after ${usersSinceLastFlush} users...`);
-                    const flushResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'INTERMEDIATE', true);
+                    // [UPDATED] Pass isInitialWrite: true only on the first flush
+                    const flushResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'INTERMEDIATE', true, !hasFlushed);
-                    // Aggregate metrics
+                    hasFlushed = true;
                     StandardExecutor.mergeReports(aggregatedSuccess, aggregatedFailures, flushResult);
                     usersSinceLastFlush = 0;
                 }
             }
@@ -141,34 +133,27 @@ class StandardExecutor {
             if (tH_iter && tH_iter.return) await tH_iter.return();
         }
-        // --- FINAL FLUSH ---
         logger.log('INFO', `[${passName}] Streaming complete. Performing final commit.`);
-        const finalResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'FINAL', skipStatusWrite);
+        // [UPDATED] If we never flushed in the loop, this is the initial write
+        const finalResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'FINAL', skipStatusWrite, !hasFlushed);
         StandardExecutor.mergeReports(aggregatedSuccess, aggregatedFailures, finalResult);
         return { successUpdates: aggregatedSuccess, failureReport: aggregatedFailures };
     }
-    static async flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, mode, skipStatusWrite) {
+    static async flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, mode, skipStatusWrite, isInitialWrite = false) {
         const transformedState = {};
-        const { logger } = deps;
-        // 1. Prepare and Clear Instances
         for (const [name, inst] of Object.entries(state)) {
-            // Get data from the standard storage location
             const rawResult = inst.results || {};
-            // Handle Multi-Date Fan-Out (Transposition)
-            // Logic: Checks if result is { userId: { date: data } }
             const firstUser = Object.keys(rawResult)[0];
             let dataToCommit = rawResult;
-            let isMultiDate = false;
             if (firstUser && rawResult[firstUser] && typeof rawResult[firstUser] === 'object') {
                 const innerKeys = Object.keys(rawResult[firstUser]);
                 if (innerKeys.length > 0 && innerKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k))) {
-                    isMultiDate = true;
                     const transposed = {};
                     for (const [userId, dateMap] of Object.entries(rawResult)) {
                         for (const [dateKey, dailyData] of Object.entries(dateMap)) {
@@ -180,24 +165,22 @@ class StandardExecutor {
                 }
             }
-            // Create a mock instance for the committer that returns just this batch
             transformedState[name] = {
                 manifest: inst.manifest,
                 getResult: async () => dataToCommit,
-                _executionStats: executionStats[name] // Attach current stats including timings
+                _executionStats: executionStats[name]
             };
-            // ⚠️ CRITICAL: CLEAR MEMORY
             inst.results = {};
         }
-        // 2. Commit Batch
+        // [UPDATED] Pass isInitialWrite to ResultCommitter
         const result = await commitResults(transformedState, dateStr, passName, config, deps, skipStatusWrite, {
-            flushMode: mode,      // 'INTERMEDIATE' or 'FINAL'
-            shardIndexes: shardIndexMap // Pass the tracking map
+            flushMode: mode,
+            shardIndexes: shardIndexMap,
+            isInitialWrite: isInitialWrite
         });
-        // 3. Update Shard Indexes from result
         if (result.shardIndexes) {
             Object.assign(shardIndexMap, result.shardIndexes);
         }
@@ -208,19 +191,16 @@ class StandardExecutor {
     static mergeReports(successAcc, failureAcc, newResult) {
         if (!newResult) return;
-        // Merge Success Updates (Sums metrics)
         for (const [name, update] of Object.entries(newResult.successUpdates)) {
             if (!successAcc[name]) {
                 successAcc[name] = update;
             } else {
-                // Sum storage metrics
                 if (update.metrics?.storage) {
                     successAcc[name].metrics.storage.sizeBytes += (update.metrics.storage.sizeBytes || 0);
                     successAcc[name].metrics.storage.keys      += (update.metrics.storage.keys || 0);
                     successAcc[name].metrics.storage.shardCount = Math.max(successAcc[name].metrics.storage.shardCount, update.metrics.storage.shardCount || 1);
                 }
-                // [IDEA 2] Sum timing metrics
                 if (update.metrics?.execution?.timings) {
                      if (!successAcc[name].metrics.execution) successAcc[name].metrics.execution = { timings: { setup:0, stream:0, processing:0 }};
                      const tDest = successAcc[name].metrics.execution.timings;
@@ -230,13 +210,10 @@ class StandardExecutor {
                      tDest.stream += (tSrc.stream || 0);
                      tDest.processing += (tSrc.processing || 0);
                 }
-                // Keep the latest hash/composition info
                 successAcc[name].hash = update.hash;
             }
         }
-        // Merge Failures
         if (newResult.failureReport) {
             failureAcc.push(...newResult.failureReport);
         }
@@ -278,4 +255,4 @@ class StandardExecutor {
     }
 }
-module.exports = { StandardExecutor };
+module.exports = { StandardExecutor };

package/functions/computation-system/persistence/ResultCommitter.js CHANGED Viewed

@@ -2,7 +2,8 @@
  * @fileoverview Handles saving computation results with observability and Smart Cleanup.
  * UPDATED: Implements GZIP Compression for efficient storage.
  * UPDATED: Implements Content-Based Hashing (ResultHash) for dependency short-circuiting.
- * UPDATED: Auto-enforces Weekend Mode validation for 'Price-Only' computations.
+ * UPDATED: Auto-enforces Weekend Mode validation.
+ * UPDATED: Implements "Initial Write" logic to wipe stale data/shards on a fresh run.
  */
 const { commitBatchInChunks, generateDataHash } = require('../utils/utils');
 const { updateComputationStatus }          = require('./StatusRepository');
@@ -30,6 +31,7 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
     // Options defaults
     const flushMode        = options.flushMode || 'STANDARD';
+    const isInitialWrite   = options.isInitialWrite === true; // [NEW] Flag for clean run
     const shardIndexes     = options.shardIndexes || {};
     const nextShardIndexes = {};
@@ -50,29 +52,22 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
             const result = await calc.getResult();
             const configOverrides = validationOverrides[calc.manifest.name] || {};
-            // --- [NEW] AUTO-ENFORCE WEEKEND MODE FOR PRICE-ONLY CALCS ---
-            // If a calculation depends SOLELY on 'price', we assume market closures
-            // will cause 0s/Flatlines on weekends, so we enforce lenient validation.
             const dataDeps = calc.manifest.rootDataDependencies || [];
             const isPriceOnly = (dataDeps.length === 1 && dataDeps[0] === 'price');
             let effectiveOverrides = { ...configOverrides };
             if (isPriceOnly && !effectiveOverrides.weekend) {
-                // Apply strict leniency for weekend/holiday price actions
                 effectiveOverrides.weekend = {
                     maxZeroPct: 100,
                     maxFlatlinePct: 100,
                     maxNullPct: 100
                 };
             }
-            // -----------------------------------------------------------
             // Validation
             if (result && Object.keys(result).length > 0) {
-                // [FIX] Added 'dStr' as 3rd argument to match HeuristicValidator signature
                 const healthCheck = HeuristicValidator.analyze(calc.manifest.name, result, dStr, effectiveOverrides);
                 if (!healthCheck.valid) {
                     runMetrics.validation.isValid = false;
                     runMetrics.validation.anomalies.push(healthCheck.reason);
@@ -83,11 +78,8 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
             }
             const isEmpty = !result || (typeof result === 'object' && Object.keys(result).length === 0);
-            // Calculate Result Hash (Content-Based)
             const resultHash = isEmpty ? 'empty' : generateDataHash(result);
-            // Handle Empty Results
             if (isEmpty) {
                 if (flushMode === 'INTERMEDIATE') {
                     nextShardIndexes[name] = currentShardIndex;
@@ -123,7 +115,9 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
                         .collection(config.computationsSubcollection)
                         .doc(name);
-                    await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, logger, config, deps, 0, 'STANDARD');
+                    // Note: Multi-date fan-out rarely hits sharding, and tracking isInitialWrite per-date is complex.
+                    // We assume standard merging here.
+                    await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, logger, config, deps, 0, 'STANDARD', false);
                 }));
                 await Promise.all(datePromises);
@@ -146,7 +140,7 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
                     .collection(config.computationsSubcollection)
                     .doc(name);
-                const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, logger, config, deps, currentShardIndex, flushMode);
+                const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, logger, config, deps, currentShardIndex, flushMode, isInitialWrite);
                 runMetrics.storage.sizeBytes  = writeStats.totalSize;
                 runMetrics.storage.isSharded  = writeStats.isSharded;
@@ -192,8 +186,32 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
     return { successUpdates, failureReport, shardIndexes: nextShardIndexes };
 }
-async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD') {
+async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD', isInitialWrite = false) {
+    // [NEW] Transition & Cleanup Logic
+    // If this is the initial write of a run, we verify the existing state to prevent "Ghost Data".
+    let wasSharded = false;
+    let hadRootData = false;
+    let shouldWipeShards = false;
+    // Default: Merge updates. But if Initial Write, overwrite (merge: false) to clear stale fields.
+    let rootMergeOption = !isInitialWrite;
+    if (isInitialWrite) {
+        try {
+            const currentSnap = await docRef.get();
+            if (currentSnap.exists) {
+                const d = currentSnap.data();
+                wasSharded = (d._sharded === true);
+                // If it was sharded, we MUST wipe the old shards because we are re-writing from scratch.
+                // Even if we write new shards, we want to ensure shard_10 doesn't persist if we only write up to shard_5.
+                if (wasSharded) shouldWipeShards = true;
+                // If it wasn't sharded, it had root data. overwriting (merge: false) handles that automatically.
+            }
+        } catch (e) { /* ignore read error */ }
+    }
     // --- COMPRESSION STRATEGY ---
     try {
         const jsonString = JSON.stringify(result);
@@ -212,7 +230,22 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
                     payload: compressedBuffer
                 };
-                await docRef.set(compressedPayload, { merge: true });
+                // Cleanup: If it was sharded, or if we are wiping shards on initial write
+                if (shouldWipeShards) {
+                    logger.log('INFO', `[Cleanup] ${name}: Wiping old shards before Compressed Write.`);
+                    const updates = [];
+                    const shardCol = docRef.collection('_shards');
+                    const shardDocs = await shardCol.listDocuments();
+                    shardDocs.forEach(d => updates.push({ type: 'DELETE', ref: d }));
+                    // Root update with merge: false (overwrites everything)
+                    updates.push({ ref: docRef, data: compressedPayload, options: { merge: false } });
+                    await commitBatchInChunks(config, deps, updates, `${name}::Cleanup+Compress`);
+                } else {
+                    // Standard update (respecting calculated rootMergeOption)
+                    await docRef.set(compressedPayload, { merge: rootMergeOption });
+                }
                 return {
                     totalSize: compressedBuffer.length,
@@ -242,12 +275,29 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
         const constraints = strategies[attempt];
         try {
             const updates = await prepareAutoShardedWrites(result, docRef, logger, constraints.bytes, constraints.keys, startShardIndex, flushMode);
-            const pointer = updates.find(u => u.data._completed !== undefined || u.data._sharded !== undefined);
+            // [NEW] Inject Cleanup Ops
+            if (shouldWipeShards) {
+                 logger.log('INFO', `[Cleanup] ${name}: Wiping old shards before Write (Initial).`);
+                 const shardCol = docRef.collection('_shards');
+                 const shardDocs = await shardCol.listDocuments();
+                 // Prepend DELETEs
+                 shardDocs.forEach(d => updates.unshift({ type: 'DELETE', ref: d }));
+                 shouldWipeShards = false; // Done for this loop
+            }
+            // Ensure the root document write respects our merge option
+            const rootUpdate = updates.find(u => u.ref.path === docRef.path && u.type !== 'DELETE');
+            if (rootUpdate) {
+                rootUpdate.options = { merge: rootMergeOption };
+            }
+            const pointer = updates.find(u => u.data && (u.data._completed !== undefined || u.data._sharded !== undefined));
             finalStats.totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
             let maxIndex = startShardIndex;
             updates.forEach(u => {
+                if (u.type === 'DELETE') return;
                 const segs = u.ref.path.split('/');
                 const last = segs[segs.length - 1];
                 if (last.startsWith('shard_')) {
@@ -360,4 +410,4 @@ function calculateFirestoreBytes(value) {
     if (typeof value === 'object') { let sum = 0; for (const k in value) { if (Object.prototype.hasOwnProperty.call(value, k)) { sum += (Buffer.byteLength(k, 'utf8') + 1) + calculateFirestoreBytes(value[k]); } } return sum; } return 0;
 }
-module.exports = { commitResults };
+module.exports = { commitResults };

package/functions/computation-system/utils/utils.js CHANGED Viewed

@@ -78,7 +78,7 @@ async function withRetry(fn, operationName, maxRetries = 3) {
 }
 /** Stage 2: Commit a batch of writes in chunks
- * FIXED: Now respects write.options (e.g. { merge: false }) to allow overwrites/deletes.
+ * UPDATED: Now supports { type: 'DELETE' } in the write object.
  */
 async function commitBatchInChunks(config, deps, writes, operationName) {
     const { db, logger } = deps;
@@ -118,6 +118,17 @@ async function commitBatchInChunks(config, deps, writes, operationName) {
     };
     for (const write of writes) {
+        // [NEW] Handle DELETE operations
+        if (write.type === 'DELETE') {
+            if ((currentOpsCount + 1 > MAX_BATCH_OPS)) {
+                await commitAndReset();
+            }
+            currentBatch.delete(write.ref);
+            currentOpsCount++;
+            continue;
+        }
+        // Standard SET/UPDATE operations
         let docSize = 100;
         try { if (write.data) docSize = JSON.stringify(write.data).length; } catch (e) { }
@@ -251,4 +262,4 @@ module.exports = {
     generateDataHash, // Exported
     withRetry,
     DEFINITIVE_EARLIEST_DATES
-};
+};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bulltrackers-module",
-  "version": "1.0.280",
+  "version": "1.0.281",
   "description": "Helper Functions for Bulltrackers.",
   "main": "index.js",
   "files": [