bulltrackers-module 1.0.660 → 1.0.662

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,9 +5,12 @@
5
5
 
6
6
  const { FieldValue } = require('@google-cloud/firestore');
7
7
  const zlib = require('zlib');
8
+ const { Storage } = require('@google-cloud/storage');
8
9
  const { getAlertTypeByComputation, generateAlertMessage } = require('./alert_type_registry');
9
10
  // Migration helpers removed - write directly to new path
10
11
 
12
+ const storage = new Storage(); // Singleton GCS Client
13
+
11
14
  /**
12
15
  * Process alerts for a specific PI from computation results
13
16
  */
@@ -474,10 +477,48 @@ function readComputationResults(docData) {
474
477
  }
475
478
 
476
479
  /**
477
- * Read computation results, handling sharded data
480
+ * Read computation results, handling GCS pointers, sharded data, and compressed data
481
+ * UPDATED: Added GCS pointer support to read from GCS when data is offloaded
478
482
  */
479
- async function readComputationResultsWithShards(db, docData, docRef) {
483
+ async function readComputationResultsWithShards(db, docData, docRef, logger = null) {
480
484
  try {
485
+ // -------------------------------------------------------------------------
486
+ // 1. GCS POINTER HANDLER (Check first - highest priority)
487
+ // -------------------------------------------------------------------------
488
+ if (docData.gcsUri || (docData._gcs && docData.gcsBucket && docData.gcsPath)) {
489
+ try {
490
+ const bucketName = docData.gcsBucket || docData.gcsUri.split('/')[2];
491
+ const fileName = docData.gcsPath || docData.gcsUri.split('/').slice(3).join('/');
492
+
493
+ if (logger) {
494
+ logger.log('INFO', `[AlertSystem] Reading computation results from GCS: ${fileName}`);
495
+ }
496
+
497
+ // Stream download is memory efficient for large files
498
+ const [fileContent] = await storage.bucket(bucketName).file(fileName).download();
499
+
500
+ // Assume Gzip (as writer does it), if fails try plain
501
+ let decompressedData;
502
+ try {
503
+ decompressedData = JSON.parse(zlib.gunzipSync(fileContent).toString('utf8'));
504
+ } catch (gzipErr) {
505
+ // Fallback for uncompressed GCS files
506
+ decompressedData = JSON.parse(fileContent.toString('utf8'));
507
+ }
508
+
509
+ // Process the decompressed data through readComputationResults
510
+ return readComputationResults(decompressedData);
511
+ } catch (gcsErr) {
512
+ if (logger) {
513
+ logger.log('ERROR', `[AlertSystem] GCS fetch failed, falling back to Firestore: ${gcsErr.message}`);
514
+ }
515
+ // Fall through to Firestore logic below
516
+ }
517
+ }
518
+
519
+ // -------------------------------------------------------------------------
520
+ // 2. FIRESTORE SHARDED HANDLER
521
+ // -------------------------------------------------------------------------
481
522
  if (docData._sharded === true && docData._shardCount) {
482
523
  const shardsCol = docRef.collection('_shards');
483
524
  const shardsSnapshot = await shardsCol.get();
@@ -492,9 +533,17 @@ async function readComputationResultsWithShards(db, docData, docRef) {
492
533
  return readComputationResults(mergedData);
493
534
  }
494
535
  }
536
+
537
+ // -------------------------------------------------------------------------
538
+ // 3. FIRESTORE COMPRESSED OR DIRECT DATA HANDLER
539
+ // -------------------------------------------------------------------------
495
540
  return readComputationResults(docData);
496
541
  } catch (error) {
497
- console.error('[readComputationResultsWithShards] Error reading sharded results', error);
542
+ if (logger) {
543
+ logger.log('ERROR', `[AlertSystem] Error reading computation results: ${error.message}`);
544
+ } else {
545
+ console.error('[readComputationResultsWithShards] Error reading sharded results', error);
546
+ }
498
547
  return { cids: [], metadata: {}, perUserData: {} };
499
548
  }
500
549
  }
@@ -72,9 +72,9 @@ async function handleAlertTrigger(message, context, config, dependencies) {
72
72
  return;
73
73
  }
74
74
 
75
- // 3. Read and decompress computation results
75
+ // 3. Read and decompress computation results (handling GCS, shards, and compression)
76
76
  const docData = docSnapshot.data();
77
- const results = readComputationResults(docData);
77
+ const results = await readComputationResultsWithShards(db, docData, docRef, logger);
78
78
 
79
79
  if (!results.cids || results.cids.length === 0) {
80
80
  logger.log('INFO', `[AlertTrigger] No PIs found in computation results for ${computationName}`);
@@ -188,7 +188,7 @@ async function handleComputationResultWrite(change, context, config, dependencie
188
188
  // If it's PopularInvestorProfileMetrics, check for all-clear notifications only
189
189
  if (isProfileMetrics) {
190
190
  const docData = change.after.data();
191
- const results = await readComputationResultsWithShards(db, docData, change.after.ref);
191
+ const results = await readComputationResultsWithShards(db, docData, change.after.ref, logger);
192
192
  if (results.cids && results.cids.length > 0) {
193
193
  await checkAndSendAllClearNotifications(db, logger, results.cids, date, config, dependencies);
194
194
  }
@@ -203,9 +203,9 @@ async function handleComputationResultWrite(change, context, config, dependencie
203
203
 
204
204
  logger.log('INFO', `[AlertTrigger] Processing alert computation: ${computationName} for date ${date}`);
205
205
 
206
- // 2. Read and decompress computation results (handling shards)
206
+ // 2. Read and decompress computation results (handling GCS, shards, and compression)
207
207
  const docData = change.after.data();
208
- const results = await readComputationResultsWithShards(db, docData, change.after.ref);
208
+ const results = await readComputationResultsWithShards(db, docData, change.after.ref, logger);
209
209
 
210
210
  if (!results.cids || results.cids.length === 0) {
211
211
  logger.log('INFO', `[AlertTrigger] No PIs found in computation results for ${computationName}`);
@@ -1,10 +1,13 @@
1
1
  // Firestore helper functions for fetching data from collections
2
2
  const { FieldValue, Timestamp } = require('@google-cloud/firestore');
3
+ const { Storage } = require('@google-cloud/storage');
3
4
  const { dispatchSyncRequest } = require('../task_engine_helper.js');
4
5
  const { sanitizeCid, sanitizeDocId } = require('../security_utils.js');
5
6
  const crypto = require('crypto');
6
7
  const zlib = require('zlib');
7
8
 
9
+ const storage = new Storage(); // Singleton GCS Client
10
+
8
11
  // 1. Fetch latest stored snapshots of user data from a user-centric collection
9
12
 
10
13
  // Examples
@@ -1210,14 +1213,40 @@ const getComputationResults = async (db, computationName, dateStr, userId = null
1210
1213
 
1211
1214
  const pointerData = pointerSnap.data();
1212
1215
 
1213
- // 2. Strategy: Compressed Data
1216
+ // 2. Strategy: GCS Pointer (Check first - highest priority)
1217
+ // If _gcs is true or gcsUri exists, the data is stored in GCS
1218
+ // Note: Page mode is exempt from GCS logic (handled separately below)
1219
+ if (pointerData._isPageMode !== true && (pointerData.gcsUri || (pointerData._gcs && pointerData.gcsBucket && pointerData.gcsPath))) {
1220
+ try {
1221
+ const bucketName = pointerData.gcsBucket || pointerData.gcsUri.split('/')[2];
1222
+ const fileName = pointerData.gcsPath || pointerData.gcsUri.split('/').slice(3).join('/');
1223
+
1224
+ console.log(`[Computation] Reading from GCS: ${fileName} for ${computationName}`);
1225
+
1226
+ // Stream download is memory efficient for large files
1227
+ const [fileContent] = await storage.bucket(bucketName).file(fileName).download();
1228
+
1229
+ // Assume Gzip (as writer does it), if fails try plain
1230
+ try {
1231
+ return JSON.parse(zlib.gunzipSync(fileContent).toString('utf8'));
1232
+ } catch (gzipErr) {
1233
+ // Fallback for uncompressed GCS files
1234
+ return JSON.parse(fileContent.toString('utf8'));
1235
+ }
1236
+ } catch (gcsErr) {
1237
+ console.error(`[Computation] GCS fetch failed for ${computationName}, falling back to Firestore: ${gcsErr.message}`);
1238
+ // Fall through to Firestore strategies below
1239
+ }
1240
+ }
1241
+
1242
+ // 3. Strategy: Compressed Data
1214
1243
  // If _compressed is true, the data is inside the payload field, just zipped.
1215
1244
  if (pointerData._compressed === true) {
1216
1245
  console.log(`[Computation] Reading compressed data for ${computationName}`);
1217
1246
  return tryDecompress(pointerData);
1218
1247
  }
1219
1248
 
1220
- // 3. Strategy: Sharded Data
1249
+ // 4. Strategy: Sharded Data
1221
1250
  // If _sharded is true, we must fetch N documents from the _shards subcollection.
1222
1251
  if (pointerData._sharded === true) {
1223
1252
  const shardCount = pointerData._shardCount || 0;
@@ -1260,14 +1289,15 @@ const getComputationResults = async (db, computationName, dateStr, userId = null
1260
1289
  return reassembledData;
1261
1290
  }
1262
1291
 
1263
- // 4. Strategy: Page Mode (User Centric)
1292
+ // 5. Strategy: Page Mode (User Centric)
1264
1293
  // If _isPageMode is true, we delegate to the pageCollection helper.
1294
+ // Note: Page mode is exempt from GCS logic (uses individual user documents)
1265
1295
  if (pointerData._isPageMode === true) {
1266
1296
  console.log(`[Computation] Fetching page mode data for ${computationName} / User: ${userId}`);
1267
1297
  return await pageCollection(db, dateStr, computationName, userId);
1268
1298
  }
1269
1299
 
1270
- // 5. Strategy: Standard (Direct Read)
1300
+ // 6. Strategy: Standard (Direct Read)
1271
1301
  // If no flags are set, the data is in the pointer document itself.
1272
1302
  console.log(`[Computation] Returning direct pointer data for ${computationName}`);
1273
1303
  return pointerData;
@@ -133,15 +133,70 @@ async function getStableDateSession(config, dependencies, pass, dateLimitStr, fo
133
133
  // 2. NEW SNAPSHOT HANDLER
134
134
  async function handleSnapshot(config, dependencies, reqBody) {
135
135
  const { logger } = dependencies;
136
- const date = reqBody.date;
136
+ const targetDate = reqBody.date; // Optional: if provided, only process up to this date
137
137
 
138
- if (!date) throw new Error('Snapshot action requires a "date"');
139
-
140
138
  try {
141
- logger.log('INFO', `[Dispatcher] 📸 Triggering Snapshot Service for ${date}`);
142
- // Calls the service we created earlier
143
- const result = await generateDailySnapshots(date, config, dependencies);
144
- return result;
139
+ // Get earliest available root data date
140
+ const earliestDates = await getEarliestDataDates(config, dependencies);
141
+ const earliestDate = earliestDates.absoluteEarliest;
142
+
143
+ if (!earliestDate) {
144
+ throw new Error('Could not determine earliest available root data date');
145
+ }
146
+
147
+ // Determine end date: use targetDate if provided, otherwise use today
148
+ const endDate = targetDate ? new Date(targetDate + 'T00:00:00Z') : new Date();
149
+ endDate.setUTCHours(0, 0, 0, 0);
150
+
151
+ // Generate all dates from earliest to end date
152
+ const startDate = new Date(earliestDate);
153
+ startDate.setUTCHours(0, 0, 0, 0);
154
+
155
+ const dateStrings = getExpectedDateStrings(startDate, endDate);
156
+
157
+ if (dateStrings.length === 0) {
158
+ logger.log('WARN', '[Dispatcher] No dates to process for snapshot');
159
+ return { status: 'OK', processed: 0, skipped: 0 };
160
+ }
161
+
162
+ logger.log('INFO', `[Dispatcher] 📸 Processing snapshots for ${dateStrings.length} dates from ${dateStrings[0]} to ${dateStrings[dateStrings.length - 1]}`);
163
+
164
+ // Process each date (snapshot service will skip if already exists)
165
+ const results = [];
166
+ const BATCH_SIZE = 5; // Process 5 dates in parallel to avoid overwhelming the system
167
+
168
+ for (let i = 0; i < dateStrings.length; i += BATCH_SIZE) {
169
+ const batch = dateStrings.slice(i, i + BATCH_SIZE);
170
+ const batchResults = await Promise.allSettled(
171
+ batch.map(dateStr => generateDailySnapshots(dateStr, config, dependencies))
172
+ );
173
+
174
+ batchResults.forEach((result, idx) => {
175
+ const dateStr = batch[idx];
176
+ if (result.status === 'fulfilled') {
177
+ const value = result.value;
178
+ results.push({ date: dateStr, status: value.status || 'OK' });
179
+ } else {
180
+ logger.log('ERROR', `[Dispatcher] Snapshot failed for ${dateStr}: ${result.reason?.message || result.reason}`);
181
+ results.push({ date: dateStr, status: 'ERROR', error: result.reason?.message || String(result.reason) });
182
+ }
183
+ });
184
+ }
185
+
186
+ const successful = results.filter(r => r.status === 'OK').length;
187
+ const skipped = results.filter(r => r.status === 'SKIPPED').length;
188
+ const failed = results.filter(r => r.status === 'ERROR').length;
189
+
190
+ logger.log('INFO', `[Dispatcher] 📸 Snapshot batch complete: ${successful} processed, ${skipped} skipped, ${failed} failed out of ${results.length} total`);
191
+
192
+ return {
193
+ status: failed === 0 ? 'OK' : 'PARTIAL',
194
+ processed: successful,
195
+ skipped: skipped,
196
+ failed: failed,
197
+ total: results.length,
198
+ results: results
199
+ };
145
200
  } catch (e) {
146
201
  logger.log('ERROR', `[Dispatcher] Snapshot failed: ${e.message}`);
147
202
  // Return error object so workflow can see failure
@@ -247,6 +247,8 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
247
247
 
248
248
  if (calc.manifest.class.getSchema && flushMode !== 'INTERMEDIATE') {
249
249
  const { class: _cls, ...safeMetadata } = calc.manifest;
250
+ // Ensure ttlDays is set to the resolved value (defaults to 90 if undefined)
251
+ safeMetadata.ttlDays = ttlDays;
250
252
  schemas.push({ name, category: calc.manifest.category, schema: calc.manifest.class.getSchema(), metadata: safeMetadata });
251
253
  }
252
254
  if (calc.manifest.previousCategory && calc.manifest.previousCategory !== calc.manifest.category && flushMode !== 'INTERMEDIATE') {
@@ -10,11 +10,30 @@ const dataLoader = require('../utils/data_loader');
10
10
 
11
11
  async function generateDailySnapshots(dateStr, config, deps) {
12
12
  const { logger } = deps;
13
- logger.log('INFO', `[SnapshotService] 📸 Starting Full System Snapshot for ${dateStr}`);
14
-
15
13
  const bucketName = config.gcsBucketName || 'bulltrackers';
16
14
  const bucket = storage.bucket(bucketName);
17
15
 
16
+ // Quick check: if all main snapshots exist, skip entirely
17
+ const mainFiles = [
18
+ `${dateStr}/snapshots/portfolios.json.gz`,
19
+ `${dateStr}/snapshots/social.json.gz`,
20
+ `${dateStr}/snapshots/history.jsonl.gz`,
21
+ `${dateStr}/snapshots/ratings.json.gz`,
22
+ `${dateStr}/snapshots/rankings.json.gz`
23
+ ];
24
+
25
+ if (!config.forceSnapshot) {
26
+ const existenceChecks = await Promise.all(mainFiles.map(path => bucket.file(path).exists()));
27
+ const allExist = existenceChecks.every(([exists]) => exists);
28
+
29
+ if (allExist) {
30
+ logger.log('INFO', `[SnapshotService] ⏭️ All snapshots already exist for ${dateStr}, skipping`);
31
+ return { status: 'SKIPPED', date: dateStr, reason: 'all_exist' };
32
+ }
33
+ }
34
+
35
+ logger.log('INFO', `[SnapshotService] 📸 Starting Full System Snapshot for ${dateStr}`);
36
+
18
37
  // parallelize independent fetches
19
38
  await Promise.all([
20
39
  snapshotPortfolios(dateStr, bucket, config, deps), // Heavy
@@ -26,7 +45,7 @@ async function generateDailySnapshots(dateStr, config, deps) {
26
45
  snapshotMetadata(dateStr, bucket, config, deps) // Small Docs (Insights, Alerts, Watchlist)
27
46
  ]);
28
47
 
29
- logger.log('INFO', `[SnapshotService] ✅ Full System Snapshot Complete.`);
48
+ logger.log('INFO', `[SnapshotService] ✅ Full System Snapshot Complete for ${dateStr}`);
30
49
  return { status: 'OK', date: dateStr };
31
50
  }
32
51
 
@@ -109,10 +128,10 @@ async function snapshotRankings(dateStr, bucket, config, deps) {
109
128
  async function snapshotMetadata(dateStr, bucket, config, deps) {
110
129
  // Bundle small files into one "metadata.json" or keep separate. Separate is safer for loaders.
111
130
  const ops = [
112
- { name: 'insights', fn: () => dataLoader.loadDailyInsights(config, deps, dateStr) },
113
- { name: 'page_views', fn: () => dataLoader.loadPIPageViews(config, deps, dateStr) },
114
- { name: 'watchlist', fn: () => dataLoader.loadWatchlistMembership(config, deps, dateStr) },
115
- { name: 'alerts', fn: () => dataLoader.loadPIAlertHistory(config, deps, dateStr) },
131
+ { name: 'insights', fn: () => dataLoader.loadDailyInsights(config, deps, dateStr) },
132
+ { name: 'page_views', fn: () => dataLoader.loadPIPageViews(config, deps, dateStr) },
133
+ { name: 'watchlist', fn: () => dataLoader.loadWatchlistMembership(config, deps, dateStr) },
134
+ { name: 'alerts', fn: () => dataLoader.loadPIAlertHistory(config, deps, dateStr) },
116
135
  { name: 'master_list', fn: () => dataLoader.loadPopularInvestorMasterList(config, deps) } // Not date bound usually, but good to snapshot state
117
136
  ];
118
137
 
@@ -4,6 +4,35 @@
4
4
  * UPDATED: Added schema validation to prevent silent batch failures.
5
5
  */
6
6
 
7
+ /**
8
+ * Recursively removes undefined values from an object.
9
+ * Firestore doesn't allow undefined values, so we filter them out entirely.
10
+ * @param {any} data - Data to sanitize
11
+ * @returns {any} Sanitized data with undefined values removed
12
+ */
13
+ function removeUndefinedValues(data) {
14
+ if (data === undefined) return undefined; // Will be filtered out
15
+ if (data === null) return null;
16
+ if (data instanceof Date) return data;
17
+
18
+ if (Array.isArray(data)) {
19
+ return data.map(item => removeUndefinedValues(item)).filter(item => item !== undefined);
20
+ }
21
+
22
+ if (typeof data === 'object') {
23
+ const sanitized = {};
24
+ for (const [key, value] of Object.entries(data)) {
25
+ const sanitizedValue = removeUndefinedValues(value);
26
+ if (sanitizedValue !== undefined) {
27
+ sanitized[key] = sanitizedValue;
28
+ }
29
+ }
30
+ return sanitized;
31
+ }
32
+
33
+ return data;
34
+ }
35
+
7
36
  /**
8
37
  * Validates a schema object before storage.
9
38
  * Checks for circular references and size limits.
@@ -58,13 +87,17 @@ async function batchStoreSchemas(dependencies, config, schemas) {
58
87
  const docRef = db.collection(schemaCollection).doc(item.name);
59
88
 
60
89
  // Critical: Always overwrite 'lastUpdated' to now
61
- batch.set(docRef, {
90
+ // Sanitize metadata to remove undefined values (Firestore doesn't allow undefined)
91
+ const sanitizedMetadata = item.metadata ? removeUndefinedValues(item.metadata) : {};
92
+ const docData = removeUndefinedValues({
62
93
  computationName: item.name,
63
94
  category: item.category,
64
95
  schema: item.schema,
65
- metadata: item.metadata || {},
96
+ metadata: sanitizedMetadata,
66
97
  lastUpdated: new Date()
67
- }, { merge: true });
98
+ });
99
+
100
+ batch.set(docRef, docData, { merge: true });
68
101
 
69
102
  validCount++;
70
103
 
@@ -42,14 +42,21 @@ class PubSubUtils {
42
42
 
43
43
  /**
44
44
  * [NEW] Publishes a single JSON message to a topic.
45
+ * Includes timeout to prevent hanging on network issues.
45
46
  */
46
- async publish(topicName, message) {
47
+ async publish(topicName, message, timeoutMs = 10000) {
47
48
  const { pubsub, logger } = this.dependencies;
48
49
  const topic = pubsub.topic(topicName);
49
50
  const dataBuffer = Buffer.from(JSON.stringify(message));
50
51
 
51
52
  try {
52
- await topic.publishMessage({ data: dataBuffer });
53
+ // Wrap publish in a timeout promise
54
+ const publishPromise = topic.publishMessage({ data: dataBuffer });
55
+ const timeoutPromise = new Promise((_, reject) =>
56
+ setTimeout(() => reject(new Error(`Publish timeout after ${timeoutMs}ms`)), timeoutMs)
57
+ );
58
+
59
+ await Promise.race([publishPromise, timeoutPromise]);
53
60
  } catch (error) {
54
61
  logger.log('ERROR', `[Core Utils] Failed to publish message to ${topicName}`, { error: error.message });
55
62
  throw error;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.660",
3
+ "version": "1.0.662",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [