bulltrackers-module 1.0.407 → 1.0.409

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  /**
2
2
  * @fileoverview Main entry point for the Task Engine Cloud Function.
3
+ * FIXED: Deduplication window increased to 20m to handle timeouts/redeliveries.
4
+ * FIXED: Robust timestamp extraction for Gen 1 & Gen 2 Cloud Functions.
3
5
  */
4
6
  const { handleDiscover } = require('./helpers/discover_helpers');
5
7
  const { handleVerify } = require('./helpers/verify_helpers');
@@ -10,99 +12,119 @@ const { handlePopularInvestorUpdate, handleOnDemandUserUpdate } = require('./hel
10
12
  const { executeTasks, prepareTaskBatches } = require('./utils/task_engine_utils');
11
13
 
12
14
  async function handleRequest(message, context, config, dependencies) {
13
- const { logger, batchManager } = dependencies;
15
+ const { logger, batchManager, db } = dependencies;
14
16
 
15
- // [URGENT FIX] Check message age - reject messages older than 15 minutes
16
- // This prevents processing stale messages that cause infinite loops and high costs
17
- const MAX_MESSAGE_AGE_MS = 15 * 60 * 1000; // 15 minutes
17
+ // [CRITICAL FIX] Max Age increased to 25m to match the larger dedup window.
18
+ // If a message is older than this, we drop it to stop infinite loops.
19
+ const MAX_MESSAGE_AGE_MS = 25 * 60 * 1000;
20
+
21
+ // [CRITICAL FIX] Dedup Window increased to 20m.
22
+ // Must be significantly larger than the Function Timeout (9m or 60m).
23
+ // This ensures that if a function times out and Pub/Sub redelivers it,
24
+ // we still recognize it as "recently processed" and skip it.
25
+ const DEDUP_WINDOW_MS = 20 * 60 * 1000;
26
+
18
27
  let messageAge = null;
19
28
  let publishTime = null;
29
+ const messageId = context.eventId || message.messageId || message.id || `msg-${Date.now()}-${Math.random()}`;
20
30
 
21
31
  try {
22
- // Extract publish time from message
23
- if (message.publishTime) {
24
- publishTime = new Date(message.publishTime);
25
- messageAge = Date.now() - publishTime.getTime();
26
- } else if (message.attributes && message.attributes.dispatched_at) {
27
- publishTime = new Date(message.attributes.dispatched_at);
28
- messageAge = Date.now() - publishTime.getTime();
29
- } else if (context.timestamp) {
30
- publishTime = new Date(context.timestamp);
32
+ // [CRITICAL FIX] Robust Timestamp Extraction (Gen 1 & Gen 2 support)
33
+ let publishTimeStr = message.publishTime ||
34
+ message.time || // CloudEvents (Gen 2)
35
+ (message.attributes && message.attributes.dispatched_at) ||
36
+ context.timestamp;
37
+
38
+ if (publishTimeStr) {
39
+ publishTime = new Date(publishTimeStr);
31
40
  messageAge = Date.now() - publishTime.getTime();
41
+ } else {
42
+ logger.log('WARN', '[TaskEngine] Could not determine message publish time. Using current time as proxy (risky).');
43
+ // If we can't find a time, we can't trust age checks, but we proceed cautiously.
32
44
  }
33
45
 
46
+ // 1. Age Check
34
47
  if (messageAge !== null && messageAge > MAX_MESSAGE_AGE_MS) {
35
48
  const ageMinutes = Math.round(messageAge / 60000);
36
49
  logger.log('WARN', `[TaskEngine] REJECTING stale message (${ageMinutes} minutes old). Acknowledging and skipping to prevent cost.`, {
50
+ messageId,
37
51
  messageAge: `${ageMinutes} minutes`,
38
52
  publishTime: publishTime?.toISOString(),
39
- maxAge: '15 minutes',
53
+ maxAge: '25 minutes',
40
54
  action: 'acknowledged_and_skipped'
41
55
  });
42
- // Return immediately - Cloud Function will auto-acknowledge
43
- return;
56
+ return; // Return immediately - Cloud Function will auto-acknowledge
57
+ }
58
+
59
+ // 2. Deduplication Check
60
+ const dedupCollection = db.collection('system_task_deduplication');
61
+ const dedupDocId = `msg_${messageId}`;
62
+ const dedupRef = dedupCollection.doc(dedupDocId);
63
+ const dedupDoc = await dedupRef.get();
64
+
65
+ if (dedupDoc.exists) {
66
+ const dedupData = dedupDoc.data();
67
+ const processedAt = dedupData.processedAt?.toDate?.() || new Date(dedupData.processedAt);
68
+ const timeSinceProcessed = Date.now() - processedAt.getTime();
69
+
70
+ if (timeSinceProcessed < DEDUP_WINDOW_MS) {
71
+ const minutesAgo = Math.round(timeSinceProcessed / 60000);
72
+ logger.log('WARN', `[TaskEngine] REJECTING duplicate message (processed ${minutesAgo} minutes ago). Acknowledging and skipping to prevent infinite loop.`, {
73
+ messageId,
74
+ processedAt: processedAt.toISOString(),
75
+ timeSinceProcessed: `${minutesAgo} minutes`,
76
+ dedupWindow: '20 minutes',
77
+ action: 'acknowledged_and_skipped_duplicate'
78
+ });
79
+ return; // Return immediately - Cloud Function will auto-acknowledge
80
+ }
44
81
  }
82
+
83
+ // Mark message as being processed (with TTL)
84
+ await dedupRef.set({
85
+ messageId,
86
+ processedAt: new Date(),
87
+ publishTime: publishTime?.toISOString() || 'unknown',
88
+ expiresAt: new Date(Date.now() + DEDUP_WINDOW_MS)
89
+ });
90
+
45
91
  } catch (ageCheckError) {
46
- // If we can't determine age, log but continue (better to process than get stuck)
47
- logger.log('WARN', '[TaskEngine] Could not determine message age, proceeding with caution', { error: ageCheckError.message });
92
+ logger.log('WARN', '[TaskEngine] Could not check message age/deduplication, proceeding with caution', { error: ageCheckError.message });
48
93
  }
49
94
 
50
- // 1. Parse the Message Payload
95
+ // 3. Parse the Message Payload
51
96
  let payload;
52
97
  try {
53
98
  const rawData = message.data ? Buffer.from(message.data, 'base64').toString() : message;
54
99
  payload = (typeof rawData === 'string') ? JSON.parse(rawData) : rawData;
55
100
  } catch (e) {
56
101
  logger.log('ERROR', '[TaskEngine] Failed to parse message payload.', e);
57
- // Return to acknowledge the message even on parse error
58
- return;
102
+ return; // Return to acknowledge
59
103
  }
60
104
 
61
105
  // CASE A: Payload is a Batch (from Dispatcher)
62
106
  if (payload.tasks && Array.isArray(payload.tasks)) {
63
- // Enhanced logging to identify source of tasks
64
- const messagePublishTime = publishTime?.toISOString() || context.timestamp || message.publishTime || 'unknown';
65
- const messageId = context.eventId || message.messageId || 'unknown';
107
+ const messagePublishTime = publishTime?.toISOString() || 'unknown';
66
108
  const messageAgeMinutes = messageAge ? Math.round(messageAge / 60000) : 'unknown';
67
- const taskTypes = payload.tasks.map(t => t.type).filter(Boolean);
68
- const taskTypeCounts = taskTypes.reduce((acc, type) => {
69
- acc[type] = (acc[type] || 0) + 1;
70
- return acc;
71
- }, {});
72
-
73
- // Sample first 3 tasks for debugging
74
- const sampleTasks = payload.tasks.slice(0, 3).map(t => ({
75
- type: t.type,
76
- hasData: !!t.data,
77
- dataKeys: t.data ? Object.keys(t.data) : [],
78
- dataSample: t.data ? JSON.stringify(t.data).substring(0, 100) : 'no data'
79
- }));
80
109
 
81
110
  logger.log('INFO', `[TaskEngine] Received BATCH of ${payload.tasks.length} tasks.`, {
82
111
  messageId,
83
112
  messagePublishTime,
84
113
  messageAgeMinutes: `${messageAgeMinutes} minutes`,
85
- taskTypeCounts,
86
- sampleTasks,
87
114
  totalTasks: payload.tasks.length
88
115
  });
89
116
 
90
117
  const taskId = context.eventId || 'batch-' + Date.now();
91
118
 
92
119
  try {
93
- // Use existing utils to execute the batch
94
120
  const { tasksToRun, otherTasks } = await prepareTaskBatches(payload.tasks, null, logger);
95
-
96
- // executeTasks now handles the final flush internally
97
121
  await executeTasks(tasksToRun, otherTasks, dependencies, config, taskId);
98
122
  } catch (batchError) {
99
123
  logger.log('ERROR', `[TaskEngine] Error processing batch. Message will be acknowledged to prevent retry loop.`, {
100
124
  error: batchError.message,
101
125
  stack: batchError.stack,
102
- messageId,
103
- totalTasks: payload.tasks.length
126
+ messageId
104
127
  });
105
- // Don't rethrow - acknowledge the message to prevent infinite retry
106
128
  }
107
129
  return;
108
130
  }
@@ -115,15 +137,11 @@ async function handleRequest(message, context, config, dependencies) {
115
137
  return;
116
138
  }
117
139
 
118
- // [FIX] Log full payload for debugging
119
140
  logger.log('INFO', `[TaskEngine] Processing Single Task: ${type}`, {
120
141
  type,
121
- hasData: !!data,
122
- dataKeys: data ? Object.keys(data) : [],
123
142
  dataSummary: data ? JSON.stringify(data).substring(0, 200) : 'no data'
124
143
  });
125
144
 
126
- // 2. Route to Specific Handlers
127
145
  try {
128
146
  switch (type) {
129
147
  case 'DISCOVER':
@@ -139,12 +157,9 @@ async function handleRequest(message, context, config, dependencies) {
139
157
  await handlePopularInvestorUpdate(data, config, dependencies);
140
158
  break;
141
159
  case 'ON_DEMAND_USER_UPDATE':
142
- // [FIX] Ensure data is correctly extracted and log for debugging
143
160
  const onDemandData = data || payload;
144
- logger.log('INFO', `[TaskEngine] ON_DEMAND_USER_UPDATE - CID: ${onDemandData.cid}, Username: ${onDemandData.username}`);
145
161
  if (!onDemandData.cid || !onDemandData.username) {
146
- logger.log('ERROR', `[TaskEngine] ON_DEMAND_USER_UPDATE missing required fields`, { data: onDemandData, payload });
147
- // Don't throw - acknowledge to prevent retry
162
+ logger.log('ERROR', `[TaskEngine] ON_DEMAND_USER_UPDATE missing required fields`, { data: onDemandData });
148
163
  return;
149
164
  }
150
165
  await handleOnDemandUserUpdate(onDemandData, config, dependencies);
@@ -153,21 +168,15 @@ async function handleRequest(message, context, config, dependencies) {
153
168
  logger.log('WARN', `[TaskEngine] Unknown task type: ${type}`);
154
169
  }
155
170
 
156
- // [CRITICAL FIX] Flush for Single Tasks
157
- // Single tasks generate very little data (~2 ops), so they will NEVER
158
- // hit the auto-flush threshold. We must flush explicitly here.
159
171
  if (batchManager) {
160
172
  await batchManager.flushBatches();
161
173
  }
162
174
 
163
175
  } catch (err) {
164
- logger.log('ERROR', `[TaskEngine] Error processing task ${type}. Message will be acknowledged to prevent retry loop.`, {
176
+ logger.log('ERROR', `[TaskEngine] Error processing task ${type}. Message will be acknowledged.`, {
165
177
  error: err.message,
166
- stack: err.stack,
167
178
  type
168
179
  });
169
- // Don't rethrow - acknowledge the message to prevent infinite retry loops
170
- // This prevents Pub/Sub from redelivering failed messages indefinitely
171
180
  }
172
181
  }
173
182
 
@@ -1,7 +1,7 @@
1
1
  /*
2
2
  * FILENAME: CloudFunctions/NpmWrappers/bulltrackers-module/functions/task-engine/utils/task_engine_utils.js
3
- * (REFACTORED: Concurrency limit set to 1 to prevent API throttling)
4
- * FIXED: Added final flushBatches() call to ensure data < 400 items is written.
3
+ * (REFACTORED: Concurrency limit increased to 5)
4
+ * FIXED: Increased concurrency to prevent timeouts on large batches.
5
5
  */
6
6
 
7
7
  /**
@@ -38,21 +38,9 @@ async function prepareTaskBatches(tasks, batchManager, logger) {
38
38
  if (task.type === 'update') {
39
39
  // Standard portfolio updates (Normal/Speculator ONLY)
40
40
  // NOTE: Popular Investors use type 'POPULAR_INVESTOR_UPDATE', not 'update'
41
- logger.log('WARN', `[TaskEngine] Processing OLD-STYLE UPDATE task (Normal/Speculator):`, {
42
- taskType: task.type,
43
- userType: task.userType || 'unknown',
44
- userId: task.userId,
45
- note: 'This is a normal/speculator task. Popular Investors use POPULAR_INVESTOR_UPDATE type.'
46
- });
47
41
  tasksToRun.push(task);
48
42
  } else {
49
43
  // Discover, Verify, Popular Investor (POPULAR_INVESTOR_UPDATE), Signed-In User (ON_DEMAND_USER_UPDATE)
50
- logger.log('INFO', `[TaskEngine] Processing task type: ${task.type}`, {
51
- taskType: task.type,
52
- hasCid: !!task.cid,
53
- hasUsername: !!task.username,
54
- userId: task.userId || task.cid || 'unknown'
55
- });
56
44
  otherTasks.push(task);
57
45
  }
58
46
  }
@@ -64,12 +52,16 @@ async function prepareTaskBatches(tasks, batchManager, logger) {
64
52
 
65
53
  /**
66
54
  * Executes all tasks.
67
- * (REFACTORED: Concurrency limit set to 1)
55
+ * (FIXED: Concurrency limit increased to 5)
68
56
  */
69
57
  async function executeTasks(tasksToRun, otherTasks, dependencies, config, taskId) {
70
58
  const { logger, batchManager } = dependencies;
71
59
 
72
- const limit = pLimit(1);
60
+ // [CRITICAL FIX] Increased from 1 to 5.
61
+ // A limit of 1 was causing timeouts on batches of 500 tasks (500s > 60s/540s timeout).
62
+ // 5 allows processing ~500 tasks in ~100 seconds (assuming 1s per task latency),
63
+ // which is well within the 9-minute Gen1 timeout.
64
+ const limit = pLimit(5);
73
65
 
74
66
  const allTaskPromises = [];
75
67
  let taskCounters = { update: 0, discover: 0, verify: 0, popular_investor: 0, on_demand: 0, unknown: 0, failed: 0 };
@@ -79,7 +71,6 @@ async function executeTasks(tasksToRun, otherTasks, dependencies, config, taskId
79
71
  const subTaskId = `${task.type}-${task.userType || 'unknown'}-${task.userId || task.cids?.[0] || task.cid || 'sub'}`;
80
72
 
81
73
  if (task.type === 'POPULAR_INVESTOR_UPDATE') {
82
- // [FIX] Extract data from task if it's nested, otherwise use task directly
83
74
  const taskData = task.data || task;
84
75
  allTaskPromises.push(limit(() =>
85
76
  handlePopularInvestorUpdate(taskData, config, dependencies)
@@ -93,9 +84,7 @@ async function executeTasks(tasksToRun, otherTasks, dependencies, config, taskId
93
84
  }
94
85
 
95
86
  if (task.type === 'ON_DEMAND_USER_UPDATE') {
96
- // [FIX] Extract data from task if it's nested, otherwise use task directly
97
87
  const taskData = task.data || task;
98
- // [FIX] Validate task data before processing
99
88
  if (!taskData.cid || !taskData.username) {
100
89
  logger.log('ERROR', `[TaskEngine/${taskId}] ON_DEMAND_USER_UPDATE task missing required fields`, { task, taskData });
101
90
  taskCounters.failed++;
@@ -135,18 +124,9 @@ async function executeTasks(tasksToRun, otherTasks, dependencies, config, taskId
135
124
  }
136
125
 
137
126
  // 2. Queue 'update' tasks (Standard Normal/Speculator)
138
- // NOTE: These should ONLY be normal/speculator tasks. Popular Investors use POPULAR_INVESTOR_UPDATE type.
139
127
  for (const task of tasksToRun) {
140
128
  const subTaskId = `${task.type}-${task.userType || 'unknown'}-${task.userId}`;
141
129
 
142
- // [LOG FIX] Log what we're about to process
143
- logger.log('INFO', `[TaskEngine/${taskId}] Queuing UPDATE task:`, {
144
- taskType: task.type,
145
- userType: task.userType || 'unknown',
146
- userId: task.userId,
147
- taskKeys: Object.keys(task)
148
- });
149
-
150
130
  allTaskPromises.push(
151
131
  limit(() =>
152
132
  handleUpdate(task, subTaskId, dependencies, config)
@@ -162,9 +142,7 @@ async function executeTasks(tasksToRun, otherTasks, dependencies, config, taskId
162
142
  // 3. Wait for ALL tasks to complete
163
143
  await Promise.all(allTaskPromises);
164
144
 
165
- // 4. [CRITICAL FIX] Flush any remaining data in the buffer
166
- // This ensures that even if we only processed 200 items (less than the 400 threshold),
167
- // they still get written to Firestore before the function exits.
145
+ // 4. Flush any remaining data in the buffer
168
146
  if (batchManager) {
169
147
  logger.log('INFO', `[TaskEngine/${taskId}] Triggering final batch flush...`);
170
148
  await batchManager.flushBatches();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.407",
3
+ "version": "1.0.409",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [