npm - bulltrackers-module - Versions diffs - 1.0.660 → 1.0.662 - Mend

bulltrackers-module 1.0.660 → 1.0.662

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/functions/alert-system/helpers/alert_helpers.js CHANGED Viewed

@@ -5,9 +5,12 @@
 const { FieldValue } = require('@google-cloud/firestore');
 const zlib = require('zlib');
+const { Storage } = require('@google-cloud/storage');
 const { getAlertTypeByComputation, generateAlertMessage } = require('./alert_type_registry');
 // Migration helpers removed - write directly to new path
+const storage = new Storage(); // Singleton GCS Client
 /**
  * Process alerts for a specific PI from computation results
  */
@@ -474,10 +477,48 @@ function readComputationResults(docData) {
 }
 /**
- * Read computation results, handling sharded data
+ * Read computation results, handling GCS pointers, sharded data, and compressed data
+ * UPDATED: Added GCS pointer support to read from GCS when data is offloaded
  */
-async function readComputationResultsWithShards(db, docData, docRef) {
+async function readComputationResultsWithShards(db, docData, docRef, logger = null) {
   try {
+    // -------------------------------------------------------------------------
+    // 1. GCS POINTER HANDLER (Check first - highest priority)
+    // -------------------------------------------------------------------------
+    if (docData.gcsUri || (docData._gcs && docData.gcsBucket && docData.gcsPath)) {
+      try {
+        const bucketName = docData.gcsBucket || docData.gcsUri.split('/')[2];
+        const fileName = docData.gcsPath || docData.gcsUri.split('/').slice(3).join('/');
+        if (logger) {
+          logger.log('INFO', `[AlertSystem] Reading computation results from GCS: ${fileName}`);
+        }
+        // Stream download is memory efficient for large files
+        const [fileContent] = await storage.bucket(bucketName).file(fileName).download();
+        // Assume Gzip (as writer does it), if fails try plain
+        let decompressedData;
+        try {
+          decompressedData = JSON.parse(zlib.gunzipSync(fileContent).toString('utf8'));
+        } catch (gzipErr) {
+          // Fallback for uncompressed GCS files
+          decompressedData = JSON.parse(fileContent.toString('utf8'));
+        }
+        // Process the decompressed data through readComputationResults
+        return readComputationResults(decompressedData);
+      } catch (gcsErr) {
+        if (logger) {
+          logger.log('ERROR', `[AlertSystem] GCS fetch failed, falling back to Firestore: ${gcsErr.message}`);
+        }
+        // Fall through to Firestore logic below
+      }
+    }
+    // -------------------------------------------------------------------------
+    // 2. FIRESTORE SHARDED HANDLER
+    // -------------------------------------------------------------------------
     if (docData._sharded === true && docData._shardCount) {
       const shardsCol = docRef.collection('_shards');
       const shardsSnapshot = await shardsCol.get();
@@ -492,9 +533,17 @@ async function readComputationResultsWithShards(db, docData, docRef) {
         return readComputationResults(mergedData);
       }
     }
+    // -------------------------------------------------------------------------
+    // 3. FIRESTORE COMPRESSED OR DIRECT DATA HANDLER
+    // -------------------------------------------------------------------------
     return readComputationResults(docData);
   } catch (error) {
-    console.error('[readComputationResultsWithShards] Error reading sharded results', error);
+    if (logger) {
+      logger.log('ERROR', `[AlertSystem] Error reading computation results: ${error.message}`);
+    } else {
+      console.error('[readComputationResultsWithShards] Error reading sharded results', error);
+    }
     return { cids: [], metadata: {}, perUserData: {} };
   }
 }

package/functions/alert-system/index.js CHANGED Viewed

@@ -72,9 +72,9 @@ async function handleAlertTrigger(message, context, config, dependencies) {
       return;
     }
-    // 3. Read and decompress computation results
+    // 3. Read and decompress computation results (handling GCS, shards, and compression)
     const docData = docSnapshot.data();
-    const results = readComputationResults(docData);
+    const results = await readComputationResultsWithShards(db, docData, docRef, logger);
     if (!results.cids || results.cids.length === 0) {
       logger.log('INFO', `[AlertTrigger] No PIs found in computation results for ${computationName}`);
@@ -188,7 +188,7 @@ async function handleComputationResultWrite(change, context, config, dependencie
     // If it's PopularInvestorProfileMetrics, check for all-clear notifications only
     if (isProfileMetrics) {
       const docData = change.after.data();
-      const results = await readComputationResultsWithShards(db, docData, change.after.ref);
+      const results = await readComputationResultsWithShards(db, docData, change.after.ref, logger);
       if (results.cids && results.cids.length > 0) {
         await checkAndSendAllClearNotifications(db, logger, results.cids, date, config, dependencies);
       }
@@ -203,9 +203,9 @@ async function handleComputationResultWrite(change, context, config, dependencie
     logger.log('INFO', `[AlertTrigger] Processing alert computation: ${computationName} for date ${date}`);
-    // 2. Read and decompress computation results (handling shards)
+    // 2. Read and decompress computation results (handling GCS, shards, and compression)
     const docData = change.after.data();
-    const results = await readComputationResultsWithShards(db, docData, change.after.ref);
+    const results = await readComputationResultsWithShards(db, docData, change.after.ref, logger);
     if (!results.cids || results.cids.length === 0) {
       logger.log('INFO', `[AlertTrigger] No PIs found in computation results for ${computationName}`);

package/functions/api-v2/helpers/data-fetchers/firestore.js CHANGED Viewed

@@ -1,10 +1,13 @@
 // Firestore helper functions for fetching data from collections
 const { FieldValue, Timestamp } = require('@google-cloud/firestore');
+const { Storage } = require('@google-cloud/storage');
 const { dispatchSyncRequest } = require('../task_engine_helper.js');
 const { sanitizeCid, sanitizeDocId } = require('../security_utils.js');
 const crypto = require('crypto');
 const zlib = require('zlib');
+const storage = new Storage(); // Singleton GCS Client
 // 1. Fetch latest stored snapshots of user data from a user-centric collection
 // Examples
@@ -1210,14 +1213,40 @@ const getComputationResults = async (db, computationName, dateStr, userId = null
         const pointerData = pointerSnap.data();
-        // 2. Strategy: Compressed Data
+        // 2. Strategy: GCS Pointer (Check first - highest priority)
+        // If _gcs is true or gcsUri exists, the data is stored in GCS
+        // Note: Page mode is exempt from GCS logic (handled separately below)
+        if (pointerData._isPageMode !== true && (pointerData.gcsUri || (pointerData._gcs && pointerData.gcsBucket && pointerData.gcsPath))) {
+            try {
+                const bucketName = pointerData.gcsBucket || pointerData.gcsUri.split('/')[2];
+                const fileName = pointerData.gcsPath || pointerData.gcsUri.split('/').slice(3).join('/');
+                console.log(`[Computation] Reading from GCS: ${fileName} for ${computationName}`);
+                // Stream download is memory efficient for large files
+                const [fileContent] = await storage.bucket(bucketName).file(fileName).download();
+                // Assume Gzip (as writer does it), if fails try plain
+                try {
+                    return JSON.parse(zlib.gunzipSync(fileContent).toString('utf8'));
+                } catch (gzipErr) {
+                    // Fallback for uncompressed GCS files
+                    return JSON.parse(fileContent.toString('utf8'));
+                }
+            } catch (gcsErr) {
+                console.error(`[Computation] GCS fetch failed for ${computationName}, falling back to Firestore: ${gcsErr.message}`);
+                // Fall through to Firestore strategies below
+            }
+        }
+        // 3. Strategy: Compressed Data
         // If _compressed is true, the data is inside the payload field, just zipped.
         if (pointerData._compressed === true) {
             console.log(`[Computation] Reading compressed data for ${computationName}`);
             return tryDecompress(pointerData);
         }
-        // 3. Strategy: Sharded Data
+        // 4. Strategy: Sharded Data
         // If _sharded is true, we must fetch N documents from the _shards subcollection.
         if (pointerData._sharded === true) {
             const shardCount = pointerData._shardCount || 0;
@@ -1260,14 +1289,15 @@ const getComputationResults = async (db, computationName, dateStr, userId = null
             return reassembledData;
         }
-        // 4. Strategy: Page Mode (User Centric)
+        // 5. Strategy: Page Mode (User Centric)
         // If _isPageMode is true, we delegate to the pageCollection helper.
+        // Note: Page mode is exempt from GCS logic (uses individual user documents)
         if (pointerData._isPageMode === true) {
             console.log(`[Computation] Fetching page mode data for ${computationName} / User: ${userId}`);
             return await pageCollection(db, dateStr, computationName, userId);
         }
-        // 5. Strategy: Standard (Direct Read)
+        // 6. Strategy: Standard (Direct Read)
         // If no flags are set, the data is in the pointer document itself.
         console.log(`[Computation] Returning direct pointer data for ${computationName}`);
         return pointerData;

package/functions/computation-system/helpers/computation_dispatcher.js CHANGED Viewed

@@ -133,15 +133,70 @@ async function getStableDateSession(config, dependencies, pass, dateLimitStr, fo
 // 2. NEW SNAPSHOT HANDLER
 async function handleSnapshot(config, dependencies, reqBody) {
     const { logger } = dependencies;
-    const date = reqBody.date;
+    const targetDate = reqBody.date; // Optional: if provided, only process up to this date
-    if (!date) throw new Error('Snapshot action requires a "date"');
     try {
-        logger.log('INFO', `[Dispatcher] 📸 Triggering Snapshot Service for ${date}`);
-        // Calls the service we created earlier
-        const result = await generateDailySnapshots(date, config, dependencies);
-        return result;
+        // Get earliest available root data date
+        const earliestDates = await getEarliestDataDates(config, dependencies);
+        const earliestDate = earliestDates.absoluteEarliest;
+        if (!earliestDate) {
+            throw new Error('Could not determine earliest available root data date');
+        }
+        // Determine end date: use targetDate if provided, otherwise use today
+        const endDate = targetDate ? new Date(targetDate + 'T00:00:00Z') : new Date();
+        endDate.setUTCHours(0, 0, 0, 0);
+        // Generate all dates from earliest to end date
+        const startDate = new Date(earliestDate);
+        startDate.setUTCHours(0, 0, 0, 0);
+        const dateStrings = getExpectedDateStrings(startDate, endDate);
+        if (dateStrings.length === 0) {
+            logger.log('WARN', '[Dispatcher] No dates to process for snapshot');
+            return { status: 'OK', processed: 0, skipped: 0 };
+        }
+        logger.log('INFO', `[Dispatcher] 📸 Processing snapshots for ${dateStrings.length} dates from ${dateStrings[0]} to ${dateStrings[dateStrings.length - 1]}`);
+        // Process each date (snapshot service will skip if already exists)
+        const results = [];
+        const BATCH_SIZE = 5; // Process 5 dates in parallel to avoid overwhelming the system
+        for (let i = 0; i < dateStrings.length; i += BATCH_SIZE) {
+            const batch = dateStrings.slice(i, i + BATCH_SIZE);
+            const batchResults = await Promise.allSettled(
+                batch.map(dateStr => generateDailySnapshots(dateStr, config, dependencies))
+            );
+            batchResults.forEach((result, idx) => {
+                const dateStr = batch[idx];
+                if (result.status === 'fulfilled') {
+                    const value = result.value;
+                    results.push({ date: dateStr, status: value.status || 'OK' });
+                } else {
+                    logger.log('ERROR', `[Dispatcher] Snapshot failed for ${dateStr}: ${result.reason?.message || result.reason}`);
+                    results.push({ date: dateStr, status: 'ERROR', error: result.reason?.message || String(result.reason) });
+                }
+            });
+        }
+        const successful = results.filter(r => r.status === 'OK').length;
+        const skipped = results.filter(r => r.status === 'SKIPPED').length;
+        const failed = results.filter(r => r.status === 'ERROR').length;
+        logger.log('INFO', `[Dispatcher] 📸 Snapshot batch complete: ${successful} processed, ${skipped} skipped, ${failed} failed out of ${results.length} total`);
+        return {
+            status: failed === 0 ? 'OK' : 'PARTIAL',
+            processed: successful,
+            skipped: skipped,
+            failed: failed,
+            total: results.length,
+            results: results
+        };
     } catch (e) {
         logger.log('ERROR', `[Dispatcher] Snapshot failed: ${e.message}`);
         // Return error object so workflow can see failure

package/functions/computation-system/persistence/ResultCommitter.js CHANGED Viewed

@@ -247,6 +247,8 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
             if (calc.manifest.class.getSchema && flushMode !== 'INTERMEDIATE') {
                 const { class: _cls, ...safeMetadata } = calc.manifest;
+                // Ensure ttlDays is set to the resolved value (defaults to 90 if undefined)
+                safeMetadata.ttlDays = ttlDays;
                 schemas.push({ name, category: calc.manifest.category, schema: calc.manifest.class.getSchema(), metadata: safeMetadata });
             }
             if (calc.manifest.previousCategory && calc.manifest.previousCategory !== calc.manifest.category && flushMode !== 'INTERMEDIATE') {

package/functions/computation-system/services/SnapshotService.js CHANGED Viewed

@@ -10,11 +10,30 @@ const dataLoader = require('../utils/data_loader');
 async function generateDailySnapshots(dateStr, config, deps) {
     const { logger } = deps;
-    logger.log('INFO', `[SnapshotService] 📸 Starting Full System Snapshot for ${dateStr}`);
     const bucketName = config.gcsBucketName || 'bulltrackers';
     const bucket = storage.bucket(bucketName);
+    // Quick check: if all main snapshots exist, skip entirely
+    const mainFiles = [
+        `${dateStr}/snapshots/portfolios.json.gz`,
+        `${dateStr}/snapshots/social.json.gz`,
+        `${dateStr}/snapshots/history.jsonl.gz`,
+        `${dateStr}/snapshots/ratings.json.gz`,
+        `${dateStr}/snapshots/rankings.json.gz`
+    ];
+    if (!config.forceSnapshot) {
+        const existenceChecks = await Promise.all(mainFiles.map(path => bucket.file(path).exists()));
+        const allExist = existenceChecks.every(([exists]) => exists);
+        if (allExist) {
+            logger.log('INFO', `[SnapshotService] ⏭️  All snapshots already exist for ${dateStr}, skipping`);
+            return { status: 'SKIPPED', date: dateStr, reason: 'all_exist' };
+        }
+    }
+    logger.log('INFO', `[SnapshotService] 📸 Starting Full System Snapshot for ${dateStr}`);
     // parallelize independent fetches
     await Promise.all([
         snapshotPortfolios(dateStr, bucket, config, deps),  // Heavy
@@ -26,7 +45,7 @@ async function generateDailySnapshots(dateStr, config, deps) {
         snapshotMetadata(dateStr, bucket, config, deps)     // Small Docs (Insights, Alerts, Watchlist)
     ]);
-    logger.log('INFO', `[SnapshotService] ✅ Full System Snapshot Complete.`);
+    logger.log('INFO', `[SnapshotService] ✅ Full System Snapshot Complete for ${dateStr}`);
     return { status: 'OK', date: dateStr };
 }
@@ -109,10 +128,10 @@ async function snapshotRankings(dateStr, bucket, config, deps) {
 async function snapshotMetadata(dateStr, bucket, config, deps) {
     // Bundle small files into one "metadata.json" or keep separate. Separate is safer for loaders.
     const ops = [
-        { name: 'insights', fn: () => dataLoader.loadDailyInsights(config, deps, dateStr) },
-        { name: 'page_views', fn: () => dataLoader.loadPIPageViews(config, deps, dateStr) },
-        { name: 'watchlist', fn: () => dataLoader.loadWatchlistMembership(config, deps, dateStr) },
-        { name: 'alerts', fn: () => dataLoader.loadPIAlertHistory(config, deps, dateStr) },
+        { name: 'insights',    fn: () => dataLoader.loadDailyInsights(config, deps, dateStr) },
+        { name: 'page_views',  fn: () => dataLoader.loadPIPageViews(config, deps, dateStr) },
+        { name: 'watchlist',   fn: () => dataLoader.loadWatchlistMembership(config, deps, dateStr) },
+        { name: 'alerts',      fn: () => dataLoader.loadPIAlertHistory(config, deps, dateStr) },
         { name: 'master_list', fn: () => dataLoader.loadPopularInvestorMasterList(config, deps) } // Not date bound usually, but good to snapshot state
     ];

package/functions/computation-system/utils/schema_capture.js CHANGED Viewed

@@ -4,6 +4,35 @@
  * UPDATED: Added schema validation to prevent silent batch failures.
  */
+/**
+ * Recursively removes undefined values from an object.
+ * Firestore doesn't allow undefined values, so we filter them out entirely.
+ * @param {any} data - Data to sanitize
+ * @returns {any} Sanitized data with undefined values removed
+ */
+function removeUndefinedValues(data) {
+    if (data === undefined) return undefined; // Will be filtered out
+    if (data === null) return null;
+    if (data instanceof Date) return data;
+    if (Array.isArray(data)) {
+        return data.map(item => removeUndefinedValues(item)).filter(item => item !== undefined);
+    }
+    if (typeof data === 'object') {
+        const sanitized = {};
+        for (const [key, value] of Object.entries(data)) {
+            const sanitizedValue = removeUndefinedValues(value);
+            if (sanitizedValue !== undefined) {
+                sanitized[key] = sanitizedValue;
+            }
+        }
+        return sanitized;
+    }
+    return data;
+}
 /**
  * Validates a schema object before storage.
  * Checks for circular references and size limits.
@@ -58,13 +87,17 @@ async function batchStoreSchemas(dependencies, config, schemas) {
             const docRef = db.collection(schemaCollection).doc(item.name);
             // Critical: Always overwrite 'lastUpdated' to now
-            batch.set(docRef, {
+            // Sanitize metadata to remove undefined values (Firestore doesn't allow undefined)
+            const sanitizedMetadata = item.metadata ? removeUndefinedValues(item.metadata) : {};
+            const docData = removeUndefinedValues({
                 computationName: item.name,
                 category:        item.category,
                 schema:          item.schema,
-                metadata:        item.metadata || {},
+                metadata:        sanitizedMetadata,
                 lastUpdated:     new Date()
-            }, { merge: true });
+            });
+            batch.set(docRef, docData, { merge: true });
             validCount++;

package/functions/core/utils/pubsub_utils.js CHANGED Viewed

@@ -42,14 +42,21 @@ class PubSubUtils {
     /**
      * [NEW] Publishes a single JSON message to a topic.
+     * Includes timeout to prevent hanging on network issues.
      */
-    async publish(topicName, message) {
+    async publish(topicName, message, timeoutMs = 10000) {
         const { pubsub, logger } = this.dependencies;
         const topic = pubsub.topic(topicName);
         const dataBuffer = Buffer.from(JSON.stringify(message));
         try {
-            await topic.publishMessage({ data: dataBuffer });
+            // Wrap publish in a timeout promise
+            const publishPromise = topic.publishMessage({ data: dataBuffer });
+            const timeoutPromise = new Promise((_, reject) =>
+                setTimeout(() => reject(new Error(`Publish timeout after ${timeoutMs}ms`)), timeoutMs)
+            );
+            await Promise.race([publishPromise, timeoutPromise]);
         } catch (error) {
             logger.log('ERROR', `[Core Utils] Failed to publish message to ${topicName}`, { error: error.message });
             throw error;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bulltrackers-module",
-  "version": "1.0.660",
+  "version": "1.0.662",
   "description": "Helper Functions for Bulltrackers.",
   "main": "index.js",
   "files": [