bulltrackers-module 1.0.658 → 1.0.660

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/functions/computation-system/data/AvailabilityChecker.js +163 -317
  2. package/functions/computation-system/data/CachedDataLoader.js +158 -222
  3. package/functions/computation-system/data/DependencyFetcher.js +201 -406
  4. package/functions/computation-system/executors/MetaExecutor.js +176 -280
  5. package/functions/computation-system/executors/StandardExecutor.js +325 -383
  6. package/functions/computation-system/helpers/computation_dispatcher.js +306 -701
  7. package/functions/computation-system/helpers/computation_worker.js +3 -2
  8. package/functions/computation-system/legacy/AvailabilityCheckerOld.js +382 -0
  9. package/functions/computation-system/legacy/CachedDataLoaderOld.js +357 -0
  10. package/functions/computation-system/legacy/DependencyFetcherOld.js +478 -0
  11. package/functions/computation-system/legacy/MetaExecutorold.js +364 -0
  12. package/functions/computation-system/legacy/StandardExecutorold.js +476 -0
  13. package/functions/computation-system/legacy/computation_dispatcherold.js +944 -0
  14. package/functions/computation-system/persistence/ResultCommitter.js +137 -188
  15. package/functions/computation-system/services/SnapshotService.js +129 -0
  16. package/functions/computation-system/tools/BuildReporter.js +12 -7
  17. package/functions/computation-system/utils/data_loader.js +213 -238
  18. package/package.json +3 -2
  19. package/functions/computation-system/workflows/bulltrackers_pipeline.yaml +0 -163
  20. package/functions/computation-system/workflows/data_feeder_pipeline.yaml +0 -115
  21. package/functions/computation-system/workflows/datafeederpipelineinstructions.md +0 -30
  22. package/functions/computation-system/workflows/morning_prep_pipeline.yaml +0 -55
@@ -1,14 +1,19 @@
1
1
  /**
2
2
  * @fileoverview Data loader sub-pipes for the Computation System.
3
3
  * REFACTORED: Now stateless and receive dependencies.
4
+ * UPDATED: Integrated GCS Snapshot "Fast Path" for massive read reduction across ALL data types.
4
5
  * FIXED: Added strict userType filtering to prevent fetching unnecessary data.
5
6
  * UPDATED: Verification now uses CollectionGroup query due to per-user storage.
6
- * UPDATED: Ratings now correctly handles flattened top-level schema (keys like "reviews.ID").
7
- * REMOVED: Redundant Price Shard Indexing logic.
7
+ * UPDATED: Ratings now correctly handles flattened top-level schema.
8
8
  */
9
9
  const zlib = require('zlib');
10
+ const { Storage } = require('@google-cloud/storage');
11
+ const readline = require('readline');
10
12
 
11
- // Helper for decompressing any doc if needed
13
+ // Singleton Storage Client
14
+ const storage = new Storage();
15
+
16
+ // Helper for decompressing any doc if needed (Firestore Legacy)
12
17
  function tryDecompress(data) {
13
18
  if (data && data._compressed === true && data.payload) {
14
19
  try {
@@ -21,6 +26,26 @@ function tryDecompress(data) {
21
26
  return data;
22
27
  }
23
28
 
29
+ // --- GCS FAST PATH HELPER ---
30
+ async function tryLoadFromGCS(config, dateString, snapshotName, logger) {
31
+ if (!dateString) return null;
32
+ const bucketName = config.gcsBucketName || 'bulltrackers';
33
+ try {
34
+ const bucket = storage.bucket(bucketName);
35
+ const file = bucket.file(`${dateString}/snapshots/${snapshotName}.json.gz`);
36
+ const [exists] = await file.exists();
37
+
38
+ if (exists) {
39
+ logger.log('INFO', `[DataLoader] ⚡️ GCS HIT: ${snapshotName} for ${dateString}`);
40
+ const [content] = await file.download();
41
+ return JSON.parse(zlib.gunzipSync(content).toString());
42
+ }
43
+ } catch (e) {
44
+ logger.log('WARN', `[DataLoader] GCS Check Failed (${snapshotName}): ${e.message}`);
45
+ }
46
+ return null;
47
+ }
48
+
24
49
  /** --- Data Loader Sub-Pipes (Stateless, Dependency-Injection) --- */
25
50
 
26
51
  /** Stage 1: Get portfolio part document references for a given date */
@@ -38,7 +63,6 @@ async function getPortfolioPartRefs(config, deps, dateString, requiredUserTypes
38
63
 
39
64
  // NEW STRUCTURE: Read from date-based collections (per-user documents)
40
65
  try {
41
- // Signed-In User Portfolios
42
66
  if (fetchAll || types.has('SIGNED_IN_USER')) {
43
67
  const signedInPortCollectionName = 'SignedInUserPortfolioData';
44
68
  const signedInPortDateDoc = db.collection(signedInPortCollectionName).doc(dateString);
@@ -49,16 +73,10 @@ async function getPortfolioPartRefs(config, deps, dateString, requiredUserTypes
49
73
 
50
74
  signedInPortSubcollections.forEach(subcol => {
51
75
  const cid = subcol.id;
52
- allPartRefs.push({
53
- ref: subcol.doc(cid),
54
- type: 'SIGNED_IN_USER',
55
- cid: cid,
56
- collectionType: 'NEW_STRUCTURE'
57
- });
76
+ allPartRefs.push({ ref: subcol.doc(cid), type: 'SIGNED_IN_USER', cid: cid, collectionType: 'NEW_STRUCTURE' });
58
77
  });
59
78
  }
60
79
 
61
- // Popular Investor Portfolios
62
80
  if (fetchAll || types.has('POPULAR_INVESTOR')) {
63
81
  const piPortCollectionName = 'PopularInvestorPortfolioData';
64
82
  const piPortDateDoc = db.collection(piPortCollectionName).doc(dateString);
@@ -69,12 +87,7 @@ async function getPortfolioPartRefs(config, deps, dateString, requiredUserTypes
69
87
 
70
88
  piPortSubcollections.forEach(subcol => {
71
89
  const cid = subcol.id;
72
- allPartRefs.push({
73
- ref: subcol.doc(cid),
74
- type: 'POPULAR_INVESTOR',
75
- cid: cid,
76
- collectionType: 'NEW_STRUCTURE'
77
- });
90
+ allPartRefs.push({ ref: subcol.doc(cid), type: 'POPULAR_INVESTOR', cid: cid, collectionType: 'NEW_STRUCTURE' });
78
91
  });
79
92
  }
80
93
  } catch (newStructError) {
@@ -83,42 +96,25 @@ async function getPortfolioPartRefs(config, deps, dateString, requiredUserTypes
83
96
 
84
97
  // LEGACY STRUCTURE: Read from block-based collections
85
98
  const collectionsToQuery = [];
86
-
87
- if ((fetchAll || types.has('NORMAL')) && config.normalUserPortfolioCollection)
88
- collectionsToQuery.push({ name: config.normalUserPortfolioCollection, type: 'NORMAL' });
89
-
90
- if ((fetchAll || types.has('SPECULATOR')) && config.speculatorPortfolioCollection)
91
- collectionsToQuery.push({ name: config.speculatorPortfolioCollection, type: 'SPECULATOR' });
92
-
93
- if ((fetchAll || types.has('POPULAR_INVESTOR')) && config.piPortfolioCollection)
94
- collectionsToQuery.push({ name: config.piPortfolioCollection, type: 'POPULAR_INVESTOR' });
95
-
96
- if ((fetchAll || types.has('SIGNED_IN_USER')) && config.signedInUsersCollection)
97
- collectionsToQuery.push({ name: config.signedInUsersCollection, type: 'SIGNED_IN_USER' });
99
+ if ((fetchAll || types.has('NORMAL')) && config.normalUserPortfolioCollection) collectionsToQuery.push({ name: config.normalUserPortfolioCollection, type: 'NORMAL' });
100
+ if ((fetchAll || types.has('SPECULATOR')) && config.speculatorPortfolioCollection) collectionsToQuery.push({ name: config.speculatorPortfolioCollection, type: 'SPECULATOR' });
101
+ if ((fetchAll || types.has('POPULAR_INVESTOR')) && config.piPortfolioCollection) collectionsToQuery.push({ name: config.piPortfolioCollection, type: 'POPULAR_INVESTOR' });
102
+ if ((fetchAll || types.has('SIGNED_IN_USER')) && config.signedInUsersCollection) collectionsToQuery.push({ name: config.signedInUsersCollection, type: 'SIGNED_IN_USER' });
98
103
 
99
104
  for (const { name: collectionName, type: collectionType } of collectionsToQuery) {
100
105
  try {
101
106
  const blockDocsQuery = db.collection(collectionName);
102
107
  const blockDocRefs = await withRetry(() => blockDocsQuery.listDocuments(), `listDocuments(${collectionName})`);
103
-
104
108
  if (!blockDocRefs.length) continue;
105
109
 
106
110
  const partsPromises = blockDocRefs.map(blockDocRef => {
107
- const partsCollectionRef = blockDocRef
108
- .collection(config.snapshotsSubcollection || 'snapshots')
109
- .doc(dateString)
110
- .collection(config.partsSubcollection || 'parts');
111
+ const partsCollectionRef = blockDocRef.collection(config.snapshotsSubcollection || 'snapshots').doc(dateString).collection(config.partsSubcollection || 'parts');
111
112
  return withRetry(() => partsCollectionRef.listDocuments(), `listParts(${partsCollectionRef.path})`);
112
113
  });
113
114
 
114
115
  const partDocArrays = await Promise.all(partsPromises);
115
-
116
116
  partDocArrays.forEach(partDocs => {
117
- allPartRefs.push(...partDocs.map(ref => ({
118
- ref,
119
- type: collectionType,
120
- collectionType: 'LEGACY'
121
- })));
117
+ allPartRefs.push(...partDocs.map(ref => ({ ref, type: collectionType, collectionType: 'LEGACY' })));
122
118
  });
123
119
  } catch (legacyError) {
124
120
  logger.log('WARN', `Failed to load legacy collection ${collectionName}: ${legacyError.message}`);
@@ -163,9 +159,7 @@ async function loadDataByRefs(config, deps, refObjects) {
163
159
 
164
160
  if (meta.type === 'POPULAR_INVESTOR') {
165
161
  chunkData[cid]._userType = 'POPULAR_INVESTOR';
166
- if (chunkData[cid].deepPositions) {
167
- chunkData[cid].DeepPositions = chunkData[cid].deepPositions;
168
- }
162
+ if (chunkData[cid].deepPositions) chunkData[cid].DeepPositions = chunkData[cid].deepPositions;
169
163
  } else if (meta.type === 'SIGNED_IN_USER') {
170
164
  chunkData[cid]._userType = 'SIGNED_IN_USER';
171
165
  }
@@ -184,21 +178,15 @@ async function loadDataByRefs(config, deps, refObjects) {
184
178
  if (deepSnap.exists) {
185
179
  const deepChunk = tryDecompress(deepSnap.data());
186
180
  for (const [uid, pData] of Object.entries(chunkData)) {
187
- if (deepChunk[uid] && deepChunk[uid].positions) {
188
- pData.DeepPositions = deepChunk[uid].positions;
189
- }
181
+ if (deepChunk[uid] && deepChunk[uid].positions) pData.DeepPositions = deepChunk[uid].positions;
190
182
  }
191
183
  }
192
- for (const pData of Object.values(chunkData)) {
193
- pData._userType = 'POPULAR_INVESTOR';
194
- }
184
+ for (const pData of Object.values(chunkData)) pData._userType = 'POPULAR_INVESTOR';
195
185
  return chunkData;
196
186
  }).catch(() => chunkData)
197
187
  );
198
188
  } else if (meta.type === 'SIGNED_IN_USER') {
199
- for (const pData of Object.values(chunkData)) {
200
- pData._userType = 'SIGNED_IN_USER';
201
- }
189
+ for (const pData of Object.values(chunkData)) pData._userType = 'SIGNED_IN_USER';
202
190
  deepFetchPromises.push(Promise.resolve(chunkData));
203
191
  } else {
204
192
  deepFetchPromises.push(Promise.resolve(chunkData));
@@ -208,16 +196,19 @@ async function loadDataByRefs(config, deps, refObjects) {
208
196
 
209
197
  const resolvedChunks = await Promise.all(deepFetchPromises);
210
198
  resolvedChunks.forEach(chunk => {
211
- if (chunk && typeof chunk === 'object') {
212
- Object.assign(mergedPortfolios, chunk);
213
- }
199
+ if (chunk && typeof chunk === 'object') Object.assign(mergedPortfolios, chunk);
214
200
  });
215
201
  }
216
202
  return mergedPortfolios;
217
203
  }
218
204
 
219
205
  /** Stage 3: Load a full day map by delegating to loadDataByRefs */
220
- async function loadFullDayMap(config, deps, partRefs) {
206
+ async function loadFullDayMap(config, deps, partRefs, dateString) {
207
+ // 1. GCS FAST PATH
208
+ const cached = await tryLoadFromGCS(config, dateString, 'portfolios', deps.logger);
209
+ if (cached) return cached;
210
+
211
+ // 2. FIRESTORE FALLBACK
221
212
  const { logger } = deps;
222
213
  if (!partRefs.length) return {};
223
214
  logger.log('TRACE', `Loading full day map from ${partRefs.length} references...`);
@@ -230,6 +221,12 @@ async function loadFullDayMap(config, deps, partRefs) {
230
221
  async function loadDailyInsights(config, deps, dateString) {
231
222
  const { db, logger, calculationUtils } = deps;
232
223
  const { withRetry } = calculationUtils;
224
+
225
+ // 1. GCS FAST PATH
226
+ const cached = await tryLoadFromGCS(config, dateString, 'insights', logger);
227
+ if (cached) return cached;
228
+
229
+ // 2. FIRESTORE FALLBACK
233
230
  const insightsCollectionName = config.insightsCollectionName || 'daily_instrument_insights';
234
231
  logger.log('INFO', `Loading daily insights for ${dateString} from ${insightsCollectionName}`);
235
232
  try {
@@ -246,27 +243,23 @@ async function loadDailyInsights(config, deps, dateString) {
246
243
 
247
244
  /** Stage 5: Load and Partition Social Data */
248
245
  async function loadDailySocialPostInsights(config, deps, dateString) {
249
- const { db, logger, calculationUtils, collectionRegistry } = deps;
246
+ const { db, logger, calculationUtils } = deps;
250
247
  const { withRetry } = calculationUtils;
251
248
 
249
+ // 1. GCS FAST PATH
250
+ const cached = await tryLoadFromGCS(config, dateString, 'social', logger);
251
+ if (cached) return cached;
252
+
253
+ // 2. FIRESTORE FALLBACK
252
254
  logger.log('INFO', `Loading and partitioning social data for ${dateString}`);
253
255
 
254
- // 1. Initialize Buckets
255
- const result = {
256
- generic: {}, // Map<PostId, Data> - For Normal/Speculator
257
- pi: {}, // Map<UserId, Map<PostId, Data>> - For Popular Investors
258
- signedIn: {} // Map<UserId, Map<PostId, Data>> - For Signed-In Users
259
- };
260
-
261
- // NEW STRUCTURE: Read from date-based collections
256
+ const result = { generic: {}, pi: {}, signedIn: {} };
257
+
258
+ // NEW STRUCTURE
262
259
  try {
263
- // Signed-In User Social: SignedInUserSocialPostData/{date}/{cid}/{cid}
264
260
  const signedInSocialCollectionName = 'SignedInUserSocialPostData';
265
261
  const signedInSocialDateDoc = db.collection(signedInSocialCollectionName).doc(dateString);
266
- const signedInSocialSubcollections = await withRetry(
267
- () => signedInSocialDateDoc.listCollections(),
268
- `listSignedInSocial(${dateString})`
269
- );
262
+ const signedInSocialSubcollections = await withRetry(() => signedInSocialDateDoc.listCollections(), `listSignedInSocial(${dateString})`);
270
263
 
271
264
  for (const subcol of signedInSocialSubcollections) {
272
265
  const cid = subcol.id;
@@ -280,13 +273,9 @@ async function loadDailySocialPostInsights(config, deps, dateString) {
280
273
  }
281
274
  }
282
275
 
283
- // Popular Investor Social: PopularInvestorSocialPostData/{date}/{cid}/{cid}
284
276
  const piSocialCollectionName = 'PopularInvestorSocialPostData';
285
277
  const piSocialDateDoc = db.collection(piSocialCollectionName).doc(dateString);
286
- const piSocialSubcollections = await withRetry(
287
- () => piSocialDateDoc.listCollections(),
288
- `listPISocial(${dateString})`
289
- );
278
+ const piSocialSubcollections = await withRetry(() => piSocialDateDoc.listCollections(), `listPISocial(${dateString})`);
290
279
 
291
280
  for (const subcol of piSocialSubcollections) {
292
281
  const cid = subcol.id;
@@ -300,44 +289,35 @@ async function loadDailySocialPostInsights(config, deps, dateString) {
300
289
  }
301
290
  }
302
291
 
303
- // Instrument Social: InstrumentFeedSocialPostData/{date}/posts/{postId}
304
292
  const instrumentSocialCollectionName = 'InstrumentFeedSocialPostData';
305
293
  const instrumentSocialDateDoc = db.collection(instrumentSocialCollectionName).doc(dateString);
306
294
  const instrumentSocialPostsCol = instrumentSocialDateDoc.collection('posts');
307
- const instrumentSocialSnapshot = await withRetry(
308
- () => instrumentSocialPostsCol.limit(1000).get(),
309
- `getInstrumentSocial(${dateString})`
310
- );
295
+ const instrumentSocialSnapshot = await withRetry(() => instrumentSocialPostsCol.limit(1000).get(), `getInstrumentSocial(${dateString})`);
311
296
 
312
297
  instrumentSocialSnapshot.forEach(doc => {
313
298
  const data = tryDecompress(doc.data());
314
299
  result.generic[doc.id] = data;
315
300
  });
316
301
 
317
- logger.log('INFO', `Loaded Social Data (NEW): ${Object.keys(result.generic).length} Generic, ${Object.keys(result.pi).length} PIs, ${Object.keys(result.signedIn).length} SignedIn.`);
302
+ logger.log('INFO', `Loaded Social Data (NEW): ${Object.keys(result.generic).length} Generic, ${Object.keys(result.pi).length} PIs.`);
318
303
  } catch (newStructError) {
319
- logger.log('WARN', `Failed to load from new structure, falling back to legacy: ${newStructError.message}`);
304
+ logger.log('WARN', `Failed to load from new structure: ${newStructError.message}`);
320
305
  }
321
306
 
322
- // LEGACY STRUCTURE: CollectionGroup query (for backward compatibility)
307
+ // LEGACY STRUCTURE
323
308
  const PI_COL_NAME = config.piSocialCollectionName || config.piSocialCollection || 'pi_social_posts';
324
309
  const SIGNED_IN_COL_NAME = config.signedInUserSocialCollection || 'signed_in_users_social';
325
-
326
310
  const startDate = new Date(dateString + 'T00:00:00Z');
327
311
  const endDate = new Date(dateString + 'T23:59:59Z');
328
312
 
329
313
  try {
330
- const postsQuery = db.collectionGroup('posts')
331
- .where('fetchedAt', '>=', startDate)
332
- .where('fetchedAt', '<=', endDate);
333
-
314
+ const postsQuery = db.collectionGroup('posts').where('fetchedAt', '>=', startDate).where('fetchedAt', '<=', endDate);
334
315
  const querySnapshot = await withRetry(() => postsQuery.get(), `getSocialPosts(${dateString})`);
335
316
 
336
317
  if (!querySnapshot.empty) {
337
318
  querySnapshot.forEach(doc => {
338
319
  const data = tryDecompress(doc.data());
339
320
  const path = doc.ref.path;
340
-
341
321
  if (path.includes(PI_COL_NAME)) {
342
322
  const parts = path.split('/');
343
323
  const colIndex = parts.indexOf(PI_COL_NAME);
@@ -346,8 +326,7 @@ async function loadDailySocialPostInsights(config, deps, dateString) {
346
326
  if (!result.pi[userId]) result.pi[userId] = {};
347
327
  result.pi[userId][doc.id] = data;
348
328
  }
349
- }
350
- else if (path.includes(SIGNED_IN_COL_NAME)) {
329
+ } else if (path.includes(SIGNED_IN_COL_NAME)) {
351
330
  const parts = path.split('/');
352
331
  const colIndex = parts.indexOf(SIGNED_IN_COL_NAME);
353
332
  if (colIndex !== -1 && parts[colIndex + 1]) {
@@ -355,18 +334,13 @@ async function loadDailySocialPostInsights(config, deps, dateString) {
355
334
  if (!result.signedIn[userId]) result.signedIn[userId] = {};
356
335
  result.signedIn[userId][doc.id] = data;
357
336
  }
358
- }
359
- else {
337
+ } else {
360
338
  result.generic[doc.id] = data;
361
339
  }
362
340
  });
363
- logger.log('INFO', `Loaded Social Data (LEGACY): ${Object.keys(result.generic).length} Generic, ${Object.keys(result.pi).length} PIs, ${Object.keys(result.signedIn).length} SignedIn.`);
364
- } else {
365
- logger.log('WARN', `No social posts found for ${dateString} via CollectionGroup.`);
366
341
  }
367
-
368
342
  } catch (error) {
369
- logger.log('ERROR', `Failed to load social posts: ${error.message}`);
343
+ logger.log('ERROR', `Failed to load social posts (legacy): ${error.message}`);
370
344
  }
371
345
 
372
346
  return result;
@@ -395,12 +369,7 @@ async function getHistoryPartRefs(config, deps, dateString, requiredUserTypes =
395
369
  `listSignedInHistory(${dateString})`
396
370
  );
397
371
  signedInHistSubcollections.forEach(subcol => {
398
- allPartRefs.push({
399
- ref: subcol.doc(subcol.id),
400
- type: 'SIGNED_IN_USER',
401
- cid: subcol.id,
402
- collectionType: 'NEW_STRUCTURE'
403
- });
372
+ allPartRefs.push({ ref: subcol.doc(subcol.id), type: 'SIGNED_IN_USER', cid: subcol.id, collectionType: 'NEW_STRUCTURE' });
404
373
  });
405
374
  }
406
375
 
@@ -412,12 +381,7 @@ async function getHistoryPartRefs(config, deps, dateString, requiredUserTypes =
412
381
  `listPIHistory(${dateString})`
413
382
  );
414
383
  piHistSubcollections.forEach(subcol => {
415
- allPartRefs.push({
416
- ref: subcol.doc(subcol.id),
417
- type: 'POPULAR_INVESTOR',
418
- cid: subcol.id,
419
- collectionType: 'NEW_STRUCTURE'
420
- });
384
+ allPartRefs.push({ ref: subcol.doc(subcol.id), type: 'POPULAR_INVESTOR', cid: subcol.id, collectionType: 'NEW_STRUCTURE' });
421
385
  });
422
386
  }
423
387
  } catch (newStructError) {
@@ -426,14 +390,10 @@ async function getHistoryPartRefs(config, deps, dateString, requiredUserTypes =
426
390
 
427
391
  // LEGACY STRUCTURE
428
392
  const collectionsToQuery = [];
429
- if ((fetchAll || types.has('NORMAL')) && config.normalUserHistoryCollection)
430
- collectionsToQuery.push(config.normalUserHistoryCollection);
431
- if ((fetchAll || types.has('SPECULATOR')) && config.speculatorHistoryCollection)
432
- collectionsToQuery.push(config.speculatorHistoryCollection);
433
- if ((fetchAll || types.has('POPULAR_INVESTOR')) && config.piHistoryCollection)
434
- collectionsToQuery.push(config.piHistoryCollection);
435
- if ((fetchAll || types.has('SIGNED_IN_USER')) && config.signedInHistoryCollection)
436
- collectionsToQuery.push(config.signedInHistoryCollection);
393
+ if ((fetchAll || types.has('NORMAL')) && config.normalUserHistoryCollection) collectionsToQuery.push(config.normalUserHistoryCollection);
394
+ if ((fetchAll || types.has('SPECULATOR')) && config.speculatorHistoryCollection) collectionsToQuery.push(config.speculatorHistoryCollection);
395
+ if ((fetchAll || types.has('POPULAR_INVESTOR')) && config.piHistoryCollection) collectionsToQuery.push(config.piHistoryCollection);
396
+ if ((fetchAll || types.has('SIGNED_IN_USER')) && config.signedInHistoryCollection) collectionsToQuery.push(config.signedInHistoryCollection);
437
397
 
438
398
  for (const collectionName of collectionsToQuery) {
439
399
  try {
@@ -442,18 +402,13 @@ async function getHistoryPartRefs(config, deps, dateString, requiredUserTypes =
442
402
  if (!blockDocRefs.length) continue;
443
403
 
444
404
  const partsPromises = blockDocRefs.map(blockDocRef => {
445
- const partsCollectionRef = blockDocRef.collection(config.snapshotsSubcollection || 'snapshots')
446
- .doc(dateString).collection(config.partsSubcollection || 'parts');
405
+ const partsCollectionRef = blockDocRef.collection(config.snapshotsSubcollection || 'snapshots').doc(dateString).collection(config.partsSubcollection || 'parts');
447
406
  return withRetry(() => partsCollectionRef.listDocuments(), `listParts(${partsCollectionRef.path})`);
448
407
  });
449
408
 
450
409
  const partDocArrays = await Promise.all(partsPromises);
451
410
  partDocArrays.forEach(partDocs => {
452
- allPartRefs.push(...partDocs.map(ref => ({
453
- ref,
454
- type: 'PART',
455
- collectionType: 'LEGACY'
456
- })));
411
+ allPartRefs.push(...partDocs.map(ref => ({ ref, type: 'PART', collectionType: 'LEGACY' })));
457
412
  });
458
413
  } catch (legacyError) {
459
414
  logger.log('WARN', `Failed to load legacy history collection ${collectionName}: ${legacyError.message}`);
@@ -467,6 +422,28 @@ async function getHistoryPartRefs(config, deps, dateString, requiredUserTypes =
467
422
  /** Stage 7: Stream portfolio data in chunks */
468
423
  async function* streamPortfolioData(config, deps, dateString, providedRefs = null, requiredUserTypes = null) {
469
424
  const { logger } = deps;
425
+
426
+ // 1. GCS FAST PATH (Snapshot) - Only if full run (no providedRefs)
427
+ if (!providedRefs) {
428
+ try {
429
+ const bucketName = config.gcsBucketName || 'bulltrackers';
430
+ const bucket = storage.bucket(bucketName);
431
+ const file = bucket.file(`${dateString}/snapshots/portfolios.json.gz`);
432
+ const [exists] = await file.exists();
433
+
434
+ if (exists) {
435
+ logger.log('INFO', `[DataLoader] ⚡️ STREAMING: Hydrating Portfolios from GCS Snapshot`);
436
+ const [content] = await file.download();
437
+ const fullData = JSON.parse(zlib.gunzipSync(content).toString());
438
+ yield fullData; // Yield all in one chunk as it fits in memory
439
+ return;
440
+ }
441
+ } catch (e) {
442
+ logger.log('WARN', `[DataLoader] GCS Portfolio Stream failed: ${e.message}. Falling back.`);
443
+ }
444
+ }
445
+
446
+ // 2. FIRESTORE FALLBACK
470
447
  const refs = providedRefs || (await getPortfolioPartRefs(config, deps, dateString, requiredUserTypes));
471
448
  if (refs.length === 0) { logger.log('WARN', `[streamPortfolioData] No portfolio refs found for ${dateString}. Stream is empty.`); return; }
472
449
 
@@ -484,6 +461,50 @@ async function* streamPortfolioData(config, deps, dateString, providedRefs = nul
484
461
  /** Stage 8: Stream history data in chunks */
485
462
  async function* streamHistoryData(config, deps, dateString, providedRefs = null, requiredUserTypes = null) {
486
463
  const { logger } = deps;
464
+
465
+ // 1. GCS FAST PATH (JSONL Streaming) - Only if full run
466
+ if (!providedRefs) {
467
+ try {
468
+ const bucketName = config.gcsBucketName || 'bulltrackers';
469
+ const bucket = storage.bucket(bucketName);
470
+ const file = bucket.file(`${dateString}/snapshots/history.jsonl.gz`);
471
+ const [exists] = await file.exists();
472
+
473
+ if (exists) {
474
+ logger.log('INFO', `[DataLoader] ⚡️ STREAMING: Hydrating History from GCS (JSONL)`);
475
+
476
+ const fileStream = file.createReadStream().pipe(zlib.createGunzip());
477
+ const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity });
478
+
479
+ let currentBatch = {};
480
+ let count = 0;
481
+ const BATCH_SIZE = 50;
482
+
483
+ for await (const line of rl) {
484
+ if (!line.trim()) continue;
485
+ try {
486
+ const userEntry = JSON.parse(line);
487
+ Object.assign(currentBatch, userEntry);
488
+ count++;
489
+ if (count >= BATCH_SIZE) {
490
+ yield currentBatch;
491
+ currentBatch = {};
492
+ count = 0;
493
+ }
494
+ } catch (parseErr) {
495
+ logger.log('ERROR', `[DataLoader] JSONL Parse Error: ${parseErr.message}`);
496
+ }
497
+ }
498
+ if (Object.keys(currentBatch).length > 0) yield currentBatch;
499
+ logger.log('INFO', `[DataLoader] Finished streaming History from GCS.`);
500
+ return;
501
+ }
502
+ } catch (e) {
503
+ logger.log('WARN', `[DataLoader] GCS History Stream failed: ${e.message}. Falling back.`);
504
+ }
505
+ }
506
+
507
+ // 2. FIRESTORE FALLBACK
487
508
  const refs = providedRefs || (await getHistoryPartRefs(config, deps, dateString, requiredUserTypes));
488
509
  if (refs.length === 0) { logger.log('WARN', `[streamHistoryData] No history refs found for ${dateString}. Stream is empty.`); return; }
489
510
 
@@ -515,19 +536,12 @@ async function getPriceShardRefs(config, deps) {
515
536
 
516
537
  /** Stage 10: Smart Shard Lookup System (DEPRECATED/SIMPLIFIED) */
517
538
  async function ensurePriceShardIndex(config, deps) {
518
- // [DEPRECATED] This function previously built an index in 'system_metadata/price_shard_index'.
519
- // It has been removed to avoid performing computation/indexing in the data loader.
520
- // Use 'Fetch All' strategy in Stage 9 instead.
521
- return {};
539
+ return {}; // Deprecated
522
540
  }
523
541
 
524
542
  async function getRelevantShardRefs(config, deps, targetInstrumentIds) {
525
543
  const { logger } = deps;
526
-
527
- // [UPDATED] Smart shard lookup is disabled due to missing index infrastructure
528
- // and to avoid computing indexes during load time.
529
- // Falling back to Stage 9 (Fetch All Shards).
530
- logger.log('INFO', `[ShardLookup] Smart indexing disabled. Fetching all price shards for ${targetInstrumentIds ? targetInstrumentIds.length : 'all'} instruments.`);
544
+ logger.log('INFO', `[ShardLookup] Smart indexing disabled. Fetching all price shards.`);
531
545
  return getPriceShardRefs(config, deps);
532
546
  }
533
547
 
@@ -535,19 +549,18 @@ async function getRelevantShardRefs(config, deps, targetInstrumentIds) {
535
549
  async function loadPopularInvestorRankings(config, deps, dateString) {
536
550
  const { db, logger, calculationUtils } = deps;
537
551
  const { withRetry } = calculationUtils;
552
+
553
+ // 1. GCS FAST PATH
554
+ const cached = await tryLoadFromGCS(config, dateString, 'rankings', logger);
555
+ if (cached) return cached;
556
+
557
+ // 2. FIRESTORE FALLBACK
538
558
  const collectionName = config.popularInvestorRankingsCollection || 'popular_investor_rankings';
539
-
540
559
  logger.log('INFO', `Loading Popular Investor Rankings for ${dateString}`);
541
-
542
560
  try {
543
561
  const docRef = db.collection(collectionName).doc(dateString);
544
562
  const docSnap = await withRetry(() => docRef.get(), `getRankings(${dateString})`);
545
-
546
- if (!docSnap.exists) {
547
- logger.log('WARN', `Rankings not found for ${dateString}`);
548
- return null;
549
- }
550
-
563
+ if (!docSnap.exists) { logger.log('WARN', `Rankings not found for ${dateString}`); return null; }
551
564
  const data = tryDecompress(docSnap.data());
552
565
  return data.Items || [];
553
566
  } catch (error) {
@@ -558,48 +571,41 @@ async function loadPopularInvestorRankings(config, deps, dateString) {
558
571
 
559
572
  /** Stage 12: Load User Verification Profiles
560
573
  * [UPDATED] Scans global verification data via CollectionGroup since it's now stored per-user.
574
+ * [UPDATED] Added optional dateString param to support GCS snapshot checks.
561
575
  */
562
- async function loadVerificationProfiles(config, deps) {
576
+ async function loadVerificationProfiles(config, deps, dateString = null) {
563
577
  const { db, logger, calculationUtils } = deps;
564
578
  const { withRetry } = calculationUtils;
579
+
580
+ // 1. GCS FAST PATH (If date provided)
581
+ if (dateString) {
582
+ const cached = await tryLoadFromGCS(config, dateString, 'verification', logger);
583
+ if (cached) return cached;
584
+ }
565
585
 
566
- // Verification is now stored at /SignedInUsers/{cid}/verification/data
567
- // To fetch globally, we must use a CollectionGroup query on 'verification'
568
- // and filter for the document ID 'data'.
569
-
586
+ // 2. FIRESTORE FALLBACK (Global Scan)
570
587
  logger.log('INFO', `Loading Verification Profiles (CollectionGroup: verification/data)`);
571
-
572
588
  try {
573
- // Warning: This requires a Firestore Index if used with complex filters, but basic get() usually works.
574
589
  const snapshot = await withRetry(() => db.collectionGroup('verification').get(), 'getVerificationsGroup');
575
-
576
590
  if (snapshot.empty) return {};
577
591
 
578
592
  const profiles = {};
579
593
  let count = 0;
580
-
581
594
  snapshot.forEach(doc => {
582
- if (doc.id !== 'data') return; // Enforce specific document ID from schema
583
-
595
+ if (doc.id !== 'data') return;
584
596
  const raw = tryDecompress(doc.data());
585
-
586
- // Map new schema fields to internal profile structure
587
- // New Schema: { etoroCID, etoroUsername, verifiedAt, setupCompletedAt ... }
588
597
  if (raw.etoroCID) {
589
598
  profiles[raw.etoroCID] = {
590
599
  cid: raw.etoroCID,
591
600
  username: raw.etoroUsername,
592
- // 'aboutMe' and 'restrictions' are NOT present in the new schema.
593
- // Defaulting to empty values to preserve downstream compatibility.
594
601
  aboutMe: "",
595
602
  aboutMeShort: "",
596
- isVerified: !!(raw.verifiedAt), // Using existence of verifiedAt as flag
603
+ isVerified: !!(raw.verifiedAt),
597
604
  restrictions: []
598
605
  };
599
606
  count++;
600
607
  }
601
608
  });
602
-
603
609
  logger.log('INFO', `Loaded ${count} verification profiles.`);
604
610
  return profiles;
605
611
  } catch (error) {
@@ -608,59 +614,42 @@ async function loadVerificationProfiles(config, deps) {
608
614
  }
609
615
  }
610
616
 
611
- /** Stage 13: Load PI Ratings Data
612
- * [UPDATED] Reads from /PiReviews/{date}/shards/daily_log.
613
- * [FIXED] Handles FLATTENED schema where keys like "reviews.ID" are at the top level.
614
- * Returns RAW logs grouped by PI. NO COMPUTATION.
615
- */
617
+ /** Stage 13: Load PI Ratings Data */
616
618
  async function loadPIRatings(config, deps, dateString) {
617
619
  const { db, logger, calculationUtils } = deps;
618
620
  const { withRetry } = calculationUtils;
619
-
620
- // New Path: /PiReviews/{date}/shards/daily_log
621
-
621
+
622
+ // 1. GCS FAST PATH
623
+ const cached = await tryLoadFromGCS(config, dateString, 'ratings', logger);
624
+ if (cached) return cached;
625
+
626
+ // 2. FIRESTORE FALLBACK
622
627
  logger.log('INFO', `Loading PI Ratings (Raw Logs) for ${dateString}`);
623
-
624
628
  try {
625
629
  const shardsColRef = db.collection('PiReviews').doc(dateString).collection('shards');
626
630
  const shardDocs = await withRetry(() => shardsColRef.listDocuments(), `listRatingShards(${dateString})`);
627
631
 
628
632
  if (!shardDocs || shardDocs.length === 0) {
629
- logger.log('WARN', `No rating shards found for ${dateString} at ${shardsColRef.path}`);
633
+ logger.log('WARN', `No rating shards found for ${dateString}`);
630
634
  return {};
631
635
  }
632
636
 
633
637
  const rawReviewsByPi = {};
634
-
635
638
  for (const docRef of shardDocs) {
636
639
  const docSnap = await docRef.get();
637
640
  if (!docSnap.exists) continue;
638
-
639
641
  const rawData = tryDecompress(docSnap.data());
640
-
641
- // SCHEMA HANDLING:
642
- // Keys at the root of the document are the review IDs (e.g. "reviews.29312236_31075566").
643
- // We iterate over all values and check if they look like review objects.
644
-
645
642
  Object.values(rawData).forEach(entry => {
646
- // Check for valid review object structure
647
643
  if (entry && typeof entry === 'object' && entry.piCid && entry.rating !== undefined) {
648
-
649
- if (!rawReviewsByPi[entry.piCid]) {
650
- rawReviewsByPi[entry.piCid] = [];
651
- }
652
-
653
- // Store the raw entry directly.
644
+ if (!rawReviewsByPi[entry.piCid]) rawReviewsByPi[entry.piCid] = [];
654
645
  rawReviewsByPi[entry.piCid].push(entry);
655
646
  }
656
647
  });
657
648
  }
658
-
659
649
  logger.log('INFO', `Loaded raw reviews for ${Object.keys(rawReviewsByPi).length} PIs.`);
660
650
  return rawReviewsByPi;
661
-
662
651
  } catch (error) {
663
- logger.log('ERROR', `Failed to load PI Ratings for ${dateString}: ${error.message}`);
652
+ logger.log('ERROR', `Failed to load PI Ratings: ${error.message}`);
664
653
  return {};
665
654
  }
666
655
  }
@@ -669,24 +658,23 @@ async function loadPIRatings(config, deps, dateString) {
669
658
  async function loadPIPageViews(config, deps, dateString) {
670
659
  const { db, logger, calculationUtils } = deps;
671
660
  const { withRetry } = calculationUtils;
661
+
662
+ // 1. GCS FAST PATH
663
+ const cached = await tryLoadFromGCS(config, dateString, 'page_views', logger);
664
+ if (cached) return cached;
665
+
666
+ // 2. FIRESTORE FALLBACK
672
667
  const collectionName = config.piPageViewsCollection || 'PIPageViewsData';
673
-
674
668
  logger.log('INFO', `Loading PI Page Views for ${dateString}`);
675
-
676
669
  try {
677
670
  const docRef = db.collection(collectionName).doc(dateString);
678
671
  const docSnap = await withRetry(() => docRef.get(), `getPIPageViews(${dateString})`);
679
-
680
- if (!docSnap.exists) {
681
- logger.log('WARN', `PI Page Views not found for ${dateString}`);
682
- return null;
683
- }
684
-
672
+ if (!docSnap.exists) { logger.log('WARN', `PI Page Views not found`); return null; }
685
673
  const data = tryDecompress(docSnap.data());
686
674
  const { date, lastUpdated, ...piPageViews } = data;
687
675
  return piPageViews;
688
676
  } catch (error) {
689
- logger.log('ERROR', `Failed to load PI Page Views for ${dateString}: ${error.message}`);
677
+ logger.log('ERROR', `Failed to load PI Page Views: ${error.message}`);
690
678
  return null;
691
679
  }
692
680
  }
@@ -695,24 +683,23 @@ async function loadPIPageViews(config, deps, dateString) {
695
683
  async function loadWatchlistMembership(config, deps, dateString) {
696
684
  const { db, logger, calculationUtils } = deps;
697
685
  const { withRetry } = calculationUtils;
698
- const collectionName = config.watchlistMembershipCollection || 'WatchlistMembershipData';
699
686
 
687
+ // 1. GCS FAST PATH
688
+ const cached = await tryLoadFromGCS(config, dateString, 'watchlist', logger);
689
+ if (cached) return cached;
690
+
691
+ // 2. FIRESTORE FALLBACK
692
+ const collectionName = config.watchlistMembershipCollection || 'WatchlistMembershipData';
700
693
  logger.log('INFO', `Loading Watchlist Membership for ${dateString}`);
701
-
702
694
  try {
703
695
  const docRef = db.collection(collectionName).doc(dateString);
704
696
  const docSnap = await withRetry(() => docRef.get(), `getWatchlistMembership(${dateString})`);
705
-
706
- if (!docSnap.exists) {
707
- logger.log('WARN', `Watchlist Membership not found for ${dateString}`);
708
- return null;
709
- }
710
-
697
+ if (!docSnap.exists) { logger.log('WARN', `Watchlist Membership not found`); return null; }
711
698
  const data = tryDecompress(docSnap.data());
712
699
  const { date, lastUpdated, ...watchlistMembership } = data;
713
700
  return watchlistMembership;
714
701
  } catch (error) {
715
- logger.log('ERROR', `Failed to load Watchlist Membership for ${dateString}: ${error.message}`);
702
+ logger.log('ERROR', `Failed to load Watchlist Membership: ${error.message}`);
716
703
  return null;
717
704
  }
718
705
  }
@@ -721,51 +708,39 @@ async function loadWatchlistMembership(config, deps, dateString) {
721
708
  async function loadPIAlertHistory(config, deps, dateString) {
722
709
  const { db, logger, calculationUtils } = deps;
723
710
  const { withRetry } = calculationUtils;
711
+
712
+ // 1. GCS FAST PATH
713
+ const cached = await tryLoadFromGCS(config, dateString, 'alerts', logger);
714
+ if (cached) return cached;
715
+
716
+ // 2. FIRESTORE FALLBACK
724
717
  const collectionName = config.piAlertHistoryCollection || 'PIAlertHistoryData';
725
-
726
718
  logger.log('INFO', `Loading PI Alert History for ${dateString}`);
727
-
728
719
  try {
729
720
  const docRef = db.collection(collectionName).doc(dateString);
730
721
  const docSnap = await withRetry(() => docRef.get(), `getPIAlertHistory(${dateString})`);
731
-
732
- if (!docSnap.exists) {
733
- logger.log('WARN', `PI Alert History not found for ${dateString}`);
734
- return null;
735
- }
736
-
722
+ if (!docSnap.exists) { logger.log('WARN', `PI Alert History not found`); return null; }
737
723
  const data = tryDecompress(docSnap.data());
738
724
  const { date, lastUpdated, ...piAlertHistory } = data;
739
725
  return piAlertHistory;
740
726
  } catch (error) {
741
- logger.log('ERROR', `Failed to load PI Alert History for ${dateString}: ${error.message}`);
727
+ logger.log('ERROR', `Failed to load PI Alert History: ${error.message}`);
742
728
  return null;
743
729
  }
744
730
  }
745
731
 
746
- /** Stage 17: Load PI-Centric Watchlist Data */
732
+ /** Stage 17: Load PI-Centric Watchlist Data (Targeted - Keep as Firestore) */
747
733
  async function loadPIWatchlistData(config, deps, piCid) {
748
734
  const { db, logger, calculationUtils } = deps;
749
735
  const { withRetry } = calculationUtils;
750
736
  const piCidStr = String(piCid);
751
737
 
752
738
  logger.log('INFO', `Loading PI Watchlist Data for PI ${piCid}`);
753
-
754
739
  try {
755
- const docRef = db.collection('PopularInvestors')
756
- .doc(piCidStr)
757
- .collection('watchlistData')
758
- .doc('current');
759
-
740
+ const docRef = db.collection('PopularInvestors').doc(piCidStr).collection('watchlistData').doc('current');
760
741
  const docSnap = await withRetry(() => docRef.get(), `getPIWatchlistData(${piCidStr})`);
761
-
762
- if (!docSnap.exists) {
763
- logger.log('WARN', `PI Watchlist Data not found for PI ${piCidStr}`);
764
- return null;
765
- }
766
-
767
- const data = tryDecompress(docSnap.data());
768
- return data;
742
+ if (!docSnap.exists) { logger.log('WARN', `PI Watchlist Data not found for PI ${piCidStr}`); return null; }
743
+ return tryDecompress(docSnap.data());
769
744
  } catch (error) {
770
745
  logger.log('ERROR', `Failed to load PI Watchlist Data for PI ${piCidStr}: ${error.message}`);
771
746
  return null;
@@ -773,24 +748,24 @@ async function loadPIWatchlistData(config, deps, piCid) {
773
748
  }
774
749
 
775
750
  // Load Popular Investor Master List
776
- async function loadPopularInvestorMasterList(config, deps) {
751
+ async function loadPopularInvestorMasterList(config, deps, dateString = null) {
777
752
  const { db, logger, calculationUtils } = deps;
778
753
  const { withRetry } = calculationUtils;
779
754
 
755
+ // 1. GCS FAST PATH (If date context exists)
756
+ if (dateString) {
757
+ const cached = await tryLoadFromGCS(config, dateString, 'master_list', logger);
758
+ if (cached) return cached;
759
+ }
760
+
761
+ // 2. FIRESTORE FALLBACK
780
762
  const collectionName = config.piMasterListCollection || 'system_state';
781
763
  const docId = config.piMasterListDocId || 'popular_investor_master_list';
782
-
783
764
  logger.log('INFO', `Loading Popular Investor Master List from ${collectionName}/${docId}`);
784
-
785
765
  try {
786
766
  const docRef = db.collection(collectionName).doc(docId);
787
767
  const docSnap = await withRetry(() => docRef.get(), 'getPIMasterList');
788
-
789
- if (!docSnap.exists) {
790
- logger.log('WARN', 'Popular Investor Master List not found.');
791
- return {};
792
- }
793
-
768
+ if (!docSnap.exists) { logger.log('WARN', 'Popular Investor Master List not found.'); return {}; }
794
769
  const data = tryDecompress(docSnap.data());
795
770
  return data.investors || data;
796
771
  } catch (error) {