npm - bulltrackers-module - Versions diffs - 1.0.407 → 1.0.409 - Mend

bulltrackers-module 1.0.407 → 1.0.409

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/functions/task-engine/handler_creator.js +69 -60
package/functions/task-engine/utils/task_engine_utils.js +9 -31
package/package.json +1 -1

package/functions/task-engine/handler_creator.js CHANGED Viewed

@@ -1,5 +1,7 @@
 /**
  * @fileoverview Main entry point for the Task Engine Cloud Function.
+ * FIXED: Deduplication window increased to 20m to handle timeouts/redeliveries.
+ * FIXED: Robust timestamp extraction for Gen 1 & Gen 2 Cloud Functions.
  */
 const { handleDiscover } = require('./helpers/discover_helpers');
 const { handleVerify } = require('./helpers/verify_helpers');
@@ -10,99 +12,119 @@ const { handlePopularInvestorUpdate, handleOnDemandUserUpdate } = require('./hel
 const { executeTasks, prepareTaskBatches } = require('./utils/task_engine_utils');
 async function handleRequest(message, context, config, dependencies) {
-    const { logger, batchManager } = dependencies;
+    const { logger, batchManager, db } = dependencies;
-    // [URGENT FIX] Check message age - reject messages older than 15 minutes
-    // This prevents processing stale messages that cause infinite loops and high costs
-    const MAX_MESSAGE_AGE_MS = 15 * 60 * 1000; // 15 minutes
+    // [CRITICAL FIX] Max Age increased to 25m to match the larger dedup window.
+    // If a message is older than this, we drop it to stop infinite loops.
+    const MAX_MESSAGE_AGE_MS = 25 * 60 * 1000;
+    // [CRITICAL FIX] Dedup Window increased to 20m.
+    // Must be significantly larger than the Function Timeout (9m or 60m).
+    // This ensures that if a function times out and Pub/Sub redelivers it,
+    // we still recognize it as "recently processed" and skip it.
+    const DEDUP_WINDOW_MS = 20 * 60 * 1000;
     let messageAge = null;
     let publishTime = null;
+    const messageId = context.eventId || message.messageId || message.id || `msg-${Date.now()}-${Math.random()}`;
     try {
-        // Extract publish time from message
-        if (message.publishTime) {
-            publishTime = new Date(message.publishTime);
-            messageAge = Date.now() - publishTime.getTime();
-        } else if (message.attributes && message.attributes.dispatched_at) {
-            publishTime = new Date(message.attributes.dispatched_at);
-            messageAge = Date.now() - publishTime.getTime();
-        } else if (context.timestamp) {
-            publishTime = new Date(context.timestamp);
+        // [CRITICAL FIX] Robust Timestamp Extraction (Gen 1 & Gen 2 support)
+        let publishTimeStr = message.publishTime ||
+                             message.time || // CloudEvents (Gen 2)
+                             (message.attributes && message.attributes.dispatched_at) ||
+                             context.timestamp;
+        if (publishTimeStr) {
+            publishTime = new Date(publishTimeStr);
             messageAge = Date.now() - publishTime.getTime();
+        } else {
+            logger.log('WARN', '[TaskEngine] Could not determine message publish time. Using current time as proxy (risky).');
+            // If we can't find a time, we can't trust age checks, but we proceed cautiously.
         }
+        // 1. Age Check
         if (messageAge !== null && messageAge > MAX_MESSAGE_AGE_MS) {
             const ageMinutes = Math.round(messageAge / 60000);
             logger.log('WARN', `[TaskEngine] REJECTING stale message (${ageMinutes} minutes old). Acknowledging and skipping to prevent cost.`, {
+                messageId,
                 messageAge: `${ageMinutes} minutes`,
                 publishTime: publishTime?.toISOString(),
-                maxAge: '15 minutes',
+                maxAge: '25 minutes',
                 action: 'acknowledged_and_skipped'
             });
-            // Return immediately - Cloud Function will auto-acknowledge
-            return;
+            return; // Return immediately - Cloud Function will auto-acknowledge
+        }
+        // 2. Deduplication Check
+        const dedupCollection = db.collection('system_task_deduplication');
+        const dedupDocId = `msg_${messageId}`;
+        const dedupRef = dedupCollection.doc(dedupDocId);
+        const dedupDoc = await dedupRef.get();
+        if (dedupDoc.exists) {
+            const dedupData = dedupDoc.data();
+            const processedAt = dedupData.processedAt?.toDate?.() || new Date(dedupData.processedAt);
+            const timeSinceProcessed = Date.now() - processedAt.getTime();
+            if (timeSinceProcessed < DEDUP_WINDOW_MS) {
+                const minutesAgo = Math.round(timeSinceProcessed / 60000);
+                logger.log('WARN', `[TaskEngine] REJECTING duplicate message (processed ${minutesAgo} minutes ago). Acknowledging and skipping to prevent infinite loop.`, {
+                    messageId,
+                    processedAt: processedAt.toISOString(),
+                    timeSinceProcessed: `${minutesAgo} minutes`,
+                    dedupWindow: '20 minutes',
+                    action: 'acknowledged_and_skipped_duplicate'
+                });
+                return; // Return immediately - Cloud Function will auto-acknowledge
+            }
         }
+        // Mark message as being processed (with TTL)
+        await dedupRef.set({
+            messageId,
+            processedAt: new Date(),
+            publishTime: publishTime?.toISOString() || 'unknown',
+            expiresAt: new Date(Date.now() + DEDUP_WINDOW_MS)
+        });
     } catch (ageCheckError) {
-        // If we can't determine age, log but continue (better to process than get stuck)
-        logger.log('WARN', '[TaskEngine] Could not determine message age, proceeding with caution', { error: ageCheckError.message });
+        logger.log('WARN', '[TaskEngine] Could not check message age/deduplication, proceeding with caution', { error: ageCheckError.message });
     }
-    // 1. Parse the Message Payload
+    // 3. Parse the Message Payload
     let payload;
     try {
         const rawData = message.data ? Buffer.from(message.data, 'base64').toString() : message;
         payload = (typeof rawData === 'string') ? JSON.parse(rawData) : rawData;
     } catch (e) {
         logger.log('ERROR', '[TaskEngine] Failed to parse message payload.', e);
-        // Return to acknowledge the message even on parse error
-        return;
+        return; // Return to acknowledge
     }
     // CASE A: Payload is a Batch (from Dispatcher)
     if (payload.tasks && Array.isArray(payload.tasks)) {
-        // Enhanced logging to identify source of tasks
-        const messagePublishTime = publishTime?.toISOString() || context.timestamp || message.publishTime || 'unknown';
-        const messageId = context.eventId || message.messageId || 'unknown';
+        const messagePublishTime = publishTime?.toISOString() || 'unknown';
         const messageAgeMinutes = messageAge ? Math.round(messageAge / 60000) : 'unknown';
-        const taskTypes = payload.tasks.map(t => t.type).filter(Boolean);
-        const taskTypeCounts = taskTypes.reduce((acc, type) => {
-            acc[type] = (acc[type] || 0) + 1;
-            return acc;
-        }, {});
-        // Sample first 3 tasks for debugging
-        const sampleTasks = payload.tasks.slice(0, 3).map(t => ({
-            type: t.type,
-            hasData: !!t.data,
-            dataKeys: t.data ? Object.keys(t.data) : [],
-            dataSample: t.data ? JSON.stringify(t.data).substring(0, 100) : 'no data'
-        }));
         logger.log('INFO', `[TaskEngine] Received BATCH of ${payload.tasks.length} tasks.`, {
             messageId,
             messagePublishTime,
             messageAgeMinutes: `${messageAgeMinutes} minutes`,
-            taskTypeCounts,
-            sampleTasks,
             totalTasks: payload.tasks.length
         });
         const taskId = context.eventId || 'batch-' + Date.now();
         try {
-            // Use existing utils to execute the batch
             const { tasksToRun, otherTasks } = await prepareTaskBatches(payload.tasks, null, logger);
-            // executeTasks now handles the final flush internally
             await executeTasks(tasksToRun, otherTasks, dependencies, config, taskId);
         } catch (batchError) {
             logger.log('ERROR', `[TaskEngine] Error processing batch. Message will be acknowledged to prevent retry loop.`, {
                 error: batchError.message,
                 stack: batchError.stack,
-                messageId,
-                totalTasks: payload.tasks.length
+                messageId
             });
-            // Don't rethrow - acknowledge the message to prevent infinite retry
         }
         return;
     }
@@ -115,15 +137,11 @@ async function handleRequest(message, context, config, dependencies) {
         return;
     }
-    // [FIX] Log full payload for debugging
     logger.log('INFO', `[TaskEngine] Processing Single Task: ${type}`, {
         type,
-        hasData: !!data,
-        dataKeys: data ? Object.keys(data) : [],
         dataSummary: data ? JSON.stringify(data).substring(0, 200) : 'no data'
     });
-    // 2. Route to Specific Handlers
     try {
         switch (type) {
             case 'DISCOVER':
@@ -139,12 +157,9 @@ async function handleRequest(message, context, config, dependencies) {
                 await handlePopularInvestorUpdate(data, config, dependencies);
                 break;
             case 'ON_DEMAND_USER_UPDATE':
-                // [FIX] Ensure data is correctly extracted and log for debugging
                 const onDemandData = data || payload;
-                logger.log('INFO', `[TaskEngine] ON_DEMAND_USER_UPDATE - CID: ${onDemandData.cid}, Username: ${onDemandData.username}`);
                 if (!onDemandData.cid || !onDemandData.username) {
-                    logger.log('ERROR', `[TaskEngine] ON_DEMAND_USER_UPDATE missing required fields`, { data: onDemandData, payload });
-                    // Don't throw - acknowledge to prevent retry
+                    logger.log('ERROR', `[TaskEngine] ON_DEMAND_USER_UPDATE missing required fields`, { data: onDemandData });
                     return;
                 }
                 await handleOnDemandUserUpdate(onDemandData, config, dependencies);
@@ -153,21 +168,15 @@ async function handleRequest(message, context, config, dependencies) {
                 logger.log('WARN', `[TaskEngine] Unknown task type: ${type}`);
         }
-        // [CRITICAL FIX] Flush for Single Tasks
-        // Single tasks generate very little data (~2 ops), so they will NEVER
-        // hit the auto-flush threshold. We must flush explicitly here.
         if (batchManager) {
             await batchManager.flushBatches();
         }
     } catch (err) {
-        logger.log('ERROR', `[TaskEngine] Error processing task ${type}. Message will be acknowledged to prevent retry loop.`, {
+        logger.log('ERROR', `[TaskEngine] Error processing task ${type}. Message will be acknowledged.`, {
             error: err.message,
-            stack: err.stack,
             type
         });
-        // Don't rethrow - acknowledge the message to prevent infinite retry loops
-        // This prevents Pub/Sub from redelivering failed messages indefinitely
     }
 }

package/functions/task-engine/utils/task_engine_utils.js CHANGED Viewed

@@ -1,7 +1,7 @@
 /*
  * FILENAME: CloudFunctions/NpmWrappers/bulltrackers-module/functions/task-engine/utils/task_engine_utils.js
- * (REFACTORED: Concurrency limit set to 1 to prevent API throttling)
- * FIXED: Added final flushBatches() call to ensure data < 400 items is written.
+ * (REFACTORED: Concurrency limit increased to 5)
+ * FIXED: Increased concurrency to prevent timeouts on large batches.
  */
 /**
@@ -38,21 +38,9 @@ async function prepareTaskBatches(tasks, batchManager, logger) {
         if (task.type === 'update') {
             // Standard portfolio updates (Normal/Speculator ONLY)
             // NOTE: Popular Investors use type 'POPULAR_INVESTOR_UPDATE', not 'update'
-            logger.log('WARN', `[TaskEngine] Processing OLD-STYLE UPDATE task (Normal/Speculator):`, {
-                taskType: task.type,
-                userType: task.userType || 'unknown',
-                userId: task.userId,
-                note: 'This is a normal/speculator task. Popular Investors use POPULAR_INVESTOR_UPDATE type.'
-            });
             tasksToRun.push(task);
         } else {
             // Discover, Verify, Popular Investor (POPULAR_INVESTOR_UPDATE), Signed-In User (ON_DEMAND_USER_UPDATE)
-            logger.log('INFO', `[TaskEngine] Processing task type: ${task.type}`, {
-                taskType: task.type,
-                hasCid: !!task.cid,
-                hasUsername: !!task.username,
-                userId: task.userId || task.cid || 'unknown'
-            });
             otherTasks.push(task);
         }
     }
@@ -64,12 +52,16 @@ async function prepareTaskBatches(tasks, batchManager, logger) {
 /**
  * Executes all tasks.
- * (REFACTORED: Concurrency limit set to 1)
+ * (FIXED: Concurrency limit increased to 5)
  */
 async function executeTasks(tasksToRun, otherTasks, dependencies, config, taskId) {
     const { logger, batchManager } = dependencies;
-    const limit = pLimit(1);
+    // [CRITICAL FIX] Increased from 1 to 5.
+    // A limit of 1 was causing timeouts on batches of 500 tasks (500s > 60s/540s timeout).
+    // 5 allows processing ~500 tasks in ~100 seconds (assuming 1s per task latency),
+    // which is well within the 9-minute Gen1 timeout.
+    const limit = pLimit(5);
     const allTaskPromises = [];
     let taskCounters = { update: 0, discover: 0, verify: 0, popular_investor: 0, on_demand: 0, unknown: 0, failed: 0 };
@@ -79,7 +71,6 @@ async function executeTasks(tasksToRun, otherTasks, dependencies, config, taskId
         const subTaskId = `${task.type}-${task.userType || 'unknown'}-${task.userId || task.cids?.[0] || task.cid || 'sub'}`;
         if (task.type === 'POPULAR_INVESTOR_UPDATE') {
-            // [FIX] Extract data from task if it's nested, otherwise use task directly
             const taskData = task.data || task;
             allTaskPromises.push(limit(() =>
                 handlePopularInvestorUpdate(taskData, config, dependencies)
@@ -93,9 +84,7 @@ async function executeTasks(tasksToRun, otherTasks, dependencies, config, taskId
         }
         if (task.type === 'ON_DEMAND_USER_UPDATE') {
-            // [FIX] Extract data from task if it's nested, otherwise use task directly
             const taskData = task.data || task;
-            // [FIX] Validate task data before processing
             if (!taskData.cid || !taskData.username) {
                 logger.log('ERROR', `[TaskEngine/${taskId}] ON_DEMAND_USER_UPDATE task missing required fields`, { task, taskData });
                 taskCounters.failed++;
@@ -135,18 +124,9 @@ async function executeTasks(tasksToRun, otherTasks, dependencies, config, taskId
     }
     // 2. Queue 'update' tasks (Standard Normal/Speculator)
-    // NOTE: These should ONLY be normal/speculator tasks. Popular Investors use POPULAR_INVESTOR_UPDATE type.
     for (const task of tasksToRun) {
         const subTaskId = `${task.type}-${task.userType || 'unknown'}-${task.userId}`;
-        // [LOG FIX] Log what we're about to process
-        logger.log('INFO', `[TaskEngine/${taskId}] Queuing UPDATE task:`, {
-            taskType: task.type,
-            userType: task.userType || 'unknown',
-            userId: task.userId,
-            taskKeys: Object.keys(task)
-        });
         allTaskPromises.push(
             limit(() =>
                 handleUpdate(task, subTaskId, dependencies, config)
@@ -162,9 +142,7 @@ async function executeTasks(tasksToRun, otherTasks, dependencies, config, taskId
     // 3. Wait for ALL tasks to complete
     await Promise.all(allTaskPromises);
-    // 4. [CRITICAL FIX] Flush any remaining data in the buffer
-    // This ensures that even if we only processed 200 items (less than the 400 threshold),
-    // they still get written to Firestore before the function exits.
+    // 4. Flush any remaining data in the buffer
     if (batchManager) {
         logger.log('INFO', `[TaskEngine/${taskId}] Triggering final batch flush...`);
         await batchManager.flushBatches();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bulltrackers-module",
-  "version": "1.0.407",
+  "version": "1.0.409",
   "description": "Helper Functions for Bulltrackers.",
   "main": "index.js",
   "files": [