bulltrackers-module 1.0.721 → 1.0.722

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1159 +1,324 @@
1
1
  /**
2
- * @fileoverview Data loader sub-pipes for the Computation System.
3
- * REFACTORED: Now stateless and receive dependencies.
4
- * UPDATED: Integrated GCS Snapshot "Fast Path" for massive read reduction across ALL data types.
5
- * FIXED: Added strict userType filtering to prevent fetching unnecessary data.
6
- * UPDATED: Verification now uses CollectionGroup query due to per-user storage.
7
- * UPDATED: Ratings now correctly handles flattened top-level schema.
2
+ * @fileoverview Data Loading Layer.
3
+ * REFACTORED:
4
+ * 1. Routes 90% of data fetching to BigQuery (Portfolios, History, Prices, Insights, Rankings, etc.).
5
+ * 2. Retains Firestore logic ONLY for:
6
+ * - Verifications (user_verifications)
7
+ * - Retail Users (Normal/Speculator Portfolios/History)
8
+ * - Generic Social Feed (Legacy compatibility)
8
9
  */
9
- const zlib = require('zlib');
10
- const { Storage } = require('@google-cloud/storage');
11
- const readline = require('readline');
12
-
13
- // Singleton Storage Client
14
- const storage = new Storage();
15
-
16
- // Helper for decompressing any doc if needed (Firestore Legacy)
17
- function tryDecompress(data) {
18
- if (data && data._compressed === true && data.payload) {
19
- try {
20
- return JSON.parse(zlib.gunzipSync(data.payload).toString());
21
- } catch (e) {
22
- console.error('[DataLoader] Decompression failed', e);
23
- return {};
24
- }
25
- }
26
- return data;
27
- }
28
-
29
- // --- GCS FAST PATH HELPER ---
30
- async function tryLoadFromGCS(config, dateString, snapshotName, logger) {
31
- if (!dateString) return null;
32
- const bucketName = config.gcsBucketName || 'bulltrackers';
33
- try {
34
- const bucket = storage.bucket(bucketName);
35
- const file = bucket.file(`${dateString}/snapshots/${snapshotName}.json.gz`);
36
- const [exists] = await file.exists();
37
-
38
- if (exists) {
39
- logger.log('INFO', `[DataLoader] ⚡️ GCS HIT: ${snapshotName} for ${dateString}`);
40
- const [content] = await file.download();
41
-
42
- // FIX: GCS client auto-decompresses if Content-Encoding is gzip.
43
- // We try gunzip first; if it fails with header check, it's likely already JSON.
44
- try {
45
- return JSON.parse(zlib.gunzipSync(content).toString());
46
- } catch (zipError) {
47
- if (zipError.message && zipError.message.includes('incorrect header check')) {
48
- // Content was already decompressed by the client
49
- return JSON.parse(content.toString());
50
- }
51
- throw zipError;
52
- }
53
- }
54
- } catch (e) {
55
- logger.log('WARN', `[DataLoader] GCS Check Failed (${snapshotName}): ${e.message}`);
56
- }
57
- return null;
58
- }
59
-
60
- /** --- Data Loader Sub-Pipes (Stateless, Dependency-Injection) --- */
61
-
62
- /** Stage 1: Get portfolio part document references for a given date */
63
- async function getPortfolioPartRefs(config, deps, dateString, requiredUserTypes = null) {
64
- const { db, logger, calculationUtils } = deps;
65
- const { withRetry } = calculationUtils;
10
+ const {
11
+ queryPortfolioData,
12
+ queryHistoryData,
13
+ querySocialData,
14
+ queryAssetPrices,
15
+ queryAllPricesForDate,
16
+ queryPricesForTickers,
17
+ queryInstrumentInsights,
18
+ queryPIRankings,
19
+ queryTickerMappings,
20
+ queryPIMasterList,
21
+ queryPIRatings,
22
+ queryPIPageViews,
23
+ queryWatchlistMembership,
24
+ queryPIAlertHistory
25
+ } = require('../../core/utils/bigquery_utils');
26
+
27
+ const { normalizeName } = require('./utils');
28
+
29
+ // =============================================================================
30
+ // 1. PORTFOLIOS
31
+ // =============================================================================
32
+ exports.loadDailyPortfolios = async (config, deps, dateStr, userTypes = []) => {
33
+ const { db, logger } = deps;
66
34
 
67
- // Normalize required types. If null/empty or contains 'ALL', fetch everything.
68
- const types = requiredUserTypes ? new Set(requiredUserTypes.map(t => t.toUpperCase())) : null;
69
- const fetchAll = !types || types.size === 0 || types.has('ALL');
35
+ // Normalize user types
36
+ const types = Array.isArray(userTypes) ? userTypes : [userTypes];
37
+ const isRetail = types.some(t => ['NORMAL', 'SPECULATOR'].includes(t.toUpperCase()));
38
+ const isMigrated = types.some(t => ['POPULAR_INVESTOR', 'SIGNED_IN_USER'].includes(t.toUpperCase()));
70
39
 
71
- // =========================================================================
72
- // BIGQUERY FIRST: Try BigQuery before Firestore
73
- // =========================================================================
74
- if (process.env.BIGQUERY_ENABLED !== 'false') {
75
- try {
76
- const { queryPortfolioData } = require('../../core/utils/bigquery_utils');
77
- const bigqueryData = await queryPortfolioData(dateString, null, requiredUserTypes, logger);
78
-
79
- if (bigqueryData && Object.keys(bigqueryData).length > 0) {
80
- logger.log('INFO', `[DataLoader] ✅ Using BigQuery for portfolio data (${dateString}): ${Object.keys(bigqueryData).length} users`);
81
-
82
- // Transform BigQuery data into "ref-like" objects for compatibility
83
- const bigqueryRefs = Object.keys(bigqueryData).map(userId => {
84
- const dataObj = bigqueryData[userId];
85
- return {
86
- ref: null, // No Firestore ref needed
87
- type: dataObj.user_type || 'UNKNOWN',
88
- cid: userId,
89
- collectionType: 'BIGQUERY',
90
- bigqueryData: dataObj.portfolio_data || {} // Extract the actual portfolio data
91
- };
92
- });
93
-
94
- return bigqueryRefs;
95
- } else {
96
- logger.log('INFO', `[DataLoader] ⚠️ BigQuery returned no portfolio data for ${dateString}, falling back to Firestore`);
97
- }
98
- } catch (bqError) {
99
- logger.log('WARN', `[DataLoader] BigQuery portfolio query failed for ${dateString}, falling back to Firestore: ${bqError.message}`);
100
- }
101
- }
102
-
103
- // =========================================================================
104
- // FIRESTORE FALLBACK: Original logic (backwards compatibility)
105
- // =========================================================================
106
- logger.log('INFO', `[DataLoader] 📂 Using Firestore for portfolio data (${dateString}). Filter: ${fetchAll ? 'ALL' : Array.from(types || []).join(',')}`);
107
-
108
- const allPartRefs = [];
109
-
110
- // NEW STRUCTURE: Read from date-based collections (per-user documents)
111
- try {
112
- if (fetchAll || types.has('SIGNED_IN_USER')) {
113
- const signedInPortCollectionName = 'SignedInUserPortfolioData';
114
- const signedInPortDateDoc = db.collection(signedInPortCollectionName).doc(dateString);
115
- const signedInPortSubcollections = await withRetry(
116
- () => signedInPortDateDoc.listCollections(),
117
- `listSignedInPortfolios(${dateString})`
118
- );
119
-
120
- signedInPortSubcollections.forEach(subcol => {
121
- const cid = subcol.id;
122
- allPartRefs.push({ ref: subcol.doc(cid), type: 'SIGNED_IN_USER', cid: cid, collectionType: 'NEW_STRUCTURE' });
123
- });
124
- }
125
-
126
- if (fetchAll || types.has('POPULAR_INVESTOR')) {
127
- const piPortCollectionName = 'PopularInvestorPortfolioData';
128
- const piPortDateDoc = db.collection(piPortCollectionName).doc(dateString);
129
- const piPortSubcollections = await withRetry(
130
- () => piPortDateDoc.listCollections(),
131
- `listPIPortfolios(${dateString})`
132
- );
133
-
134
- piPortSubcollections.forEach(subcol => {
135
- const cid = subcol.id;
136
- allPartRefs.push({ ref: subcol.doc(cid), type: 'POPULAR_INVESTOR', cid: cid, collectionType: 'NEW_STRUCTURE' });
137
- });
138
- }
139
- } catch (newStructError) {
140
- logger.log('WARN', `Failed to load from new structure, falling back to legacy: ${newStructError.message}`);
141
- }
142
-
143
- // LEGACY STRUCTURE: Read from block-based collections
144
- const collectionsToQuery = [];
145
- if ((fetchAll || types.has('NORMAL')) && config.normalUserPortfolioCollection) collectionsToQuery.push({ name: config.normalUserPortfolioCollection, type: 'NORMAL' });
146
- if ((fetchAll || types.has('SPECULATOR')) && config.speculatorPortfolioCollection) collectionsToQuery.push({ name: config.speculatorPortfolioCollection, type: 'SPECULATOR' });
147
- if ((fetchAll || types.has('POPULAR_INVESTOR')) && config.piPortfolioCollection) collectionsToQuery.push({ name: config.piPortfolioCollection, type: 'POPULAR_INVESTOR' });
148
- if ((fetchAll || types.has('SIGNED_IN_USER')) && config.signedInUsersCollection) collectionsToQuery.push({ name: config.signedInUsersCollection, type: 'SIGNED_IN_USER' });
149
-
150
- for (const { name: collectionName, type: collectionType } of collectionsToQuery) {
151
- try {
152
- const blockDocsQuery = db.collection(collectionName);
153
- const blockDocRefs = await withRetry(() => blockDocsQuery.listDocuments(), `listDocuments(${collectionName})`);
154
- if (!blockDocRefs.length) continue;
40
+ let results = {};
155
41
 
156
- const partsPromises = blockDocRefs.map(blockDocRef => {
157
- const partsCollectionRef = blockDocRef.collection(config.snapshotsSubcollection || 'snapshots').doc(dateString).collection(config.partsSubcollection || 'parts');
158
- return withRetry(() => partsCollectionRef.listDocuments(), `listParts(${partsCollectionRef.path})`);
159
- });
160
-
161
- const partDocArrays = await Promise.all(partsPromises);
162
- partDocArrays.forEach(partDocs => {
163
- allPartRefs.push(...partDocs.map(ref => ({ ref, type: collectionType, collectionType: 'LEGACY' })));
164
- });
165
- } catch (legacyError) {
166
- logger.log('WARN', `Failed to load legacy collection ${collectionName}: ${legacyError.message}`);
167
- }
42
+ // A. BigQuery (PIs & SignedIn)
43
+ if (isMigrated && process.env.BIGQUERY_ENABLED !== 'false') {
44
+ const bqData = await queryPortfolioData(dateStr, null, types, logger);
45
+ if (bqData) Object.assign(results, bqData);
168
46
  }
169
-
170
- logger.log('INFO', `Found ${allPartRefs.length} total portfolio refs for ${dateString} for types: ${fetchAll ? 'ALL' : Array.from(types).join(',')}`);
171
- return allPartRefs;
172
- }
173
47
 
174
- /** Stage 2: Load data from an array of document references */
175
- async function loadDataByRefs(config, deps, refObjects) {
176
- const { db, logger, calculationUtils } = deps;
177
- const { withRetry } = calculationUtils;
178
-
179
- if (!refObjects || !refObjects.length) return {};
180
-
181
- // =========================================================================
182
- // CHECK FOR PRICE DATA (BigQuery marker)
183
- // =========================================================================
184
- const priceBigQueryMarker = refObjects.find(r => r._bigquery === true);
185
- if (priceBigQueryMarker) {
186
- // This is a price data load request - use BigQuery
187
- try {
188
- const { queryAssetPrices } = require('../../core/utils/bigquery_utils');
189
- const priceData = await queryAssetPrices(null, null, null, logger);
190
-
191
- if (priceData && Object.keys(priceData).length > 0) {
192
- logger.log('INFO', `[DataLoader] ✅ Loaded ${Object.keys(priceData).length} instruments from BigQuery for price data`);
193
- return priceData;
194
- }
195
-
196
- // If BigQuery returns empty, fallback to Firestore
197
- logger.log('WARN', `[DataLoader] BigQuery returned no price data, falling back to Firestore`);
198
- } catch (bqError) {
199
- logger.log('WARN', `[DataLoader] BigQuery price load failed, falling back to Firestore: ${bqError.message}`);
200
- // Fall through to Firestore
48
+ // B. Firestore (Retail / Fallback)
49
+ // Note: If we need Retail data, we MUST check Firestore as it wasn't migrated.
50
+ if (isRetail) {
51
+ if (types.includes('NORMAL')) {
52
+ const normalData = await loadRetailFirestore(db, 'NormalUserPortfolios', dateStr);
53
+ Object.assign(results, normalData);
201
54
  }
202
- }
203
-
204
- // =========================================================================
205
- // SEPARATE BIGQUERY AND FIRESTORE REFS (for portfolio/history data)
206
- // =========================================================================
207
- const bigqueryRefs = refObjects.filter(r => r.collectionType === 'BIGQUERY');
208
- const firestoreRefs = refObjects.filter(r => r.collectionType !== 'BIGQUERY' && !r._bigquery);
209
-
210
- const mergedPortfolios = {};
211
-
212
- // Load from BigQuery cache (data already fetched in getPortfolioPartRefs/getHistoryPartRefs)
213
- if (bigqueryRefs.length > 0) {
214
- logger.log('INFO', `[DataLoader] 📊 Loading ${bigqueryRefs.length} records from BigQuery cache`);
215
- bigqueryRefs.forEach(ref => {
216
- if (ref.bigqueryData) {
217
- // Extract the actual data (portfolio_data or history_data)
218
- // The bigqueryData is the JSON object from BigQuery
219
- mergedPortfolios[ref.cid] = ref.bigqueryData;
220
- }
221
- });
222
- }
223
-
224
- // Load from Firestore (existing logic)
225
- if (firestoreRefs.length === 0) {
226
- return mergedPortfolios; // All data came from BigQuery
227
- }
228
-
229
- logger.log('INFO', `[DataLoader] 📂 Loading ${firestoreRefs.length} records from Firestore`);
230
- const batchSize = config.partRefBatchSize || 10;
231
-
232
- for (let i = 0; i < firestoreRefs.length; i += batchSize) {
233
- const batch = firestoreRefs.slice(i, i + batchSize);
234
- const refs = batch.map(b => b.ref).filter(ref => ref !== null); // Filter out null refs (BigQuery refs)
235
-
236
- if (refs.length === 0) continue; // Skip if all refs are BigQuery refs
237
-
238
- const snapshots = await withRetry(() => db.getAll(...refs), `getAll(batch ${Math.floor(i/batchSize)})`);
239
-
240
- const deepFetchPromises = [];
241
-
242
- for (let j = 0; j < snapshots.length; j++) {
243
- const doc = snapshots[j];
244
- const meta = batch[j];
245
-
246
- if (!doc.exists) continue;
247
-
248
- const rawData = doc.data();
249
- let chunkData;
250
-
251
- if (meta.collectionType === 'NEW_STRUCTURE') {
252
- const cid = meta.cid || doc.id;
253
- const userData = tryDecompress(rawData);
254
- chunkData = { [cid]: userData };
255
-
256
- if (meta.type === 'POPULAR_INVESTOR') {
257
- chunkData[cid]._userType = 'POPULAR_INVESTOR';
258
- if (chunkData[cid].deepPositions) chunkData[cid].DeepPositions = chunkData[cid].deepPositions;
259
- } else if (meta.type === 'SIGNED_IN_USER') {
260
- chunkData[cid]._userType = 'SIGNED_IN_USER';
261
- }
262
-
263
- deepFetchPromises.push(Promise.resolve(chunkData));
264
- } else {
265
- chunkData = tryDecompress(rawData);
266
-
267
- if (meta.type === 'POPULAR_INVESTOR' && config.piDeepPortfolioCollection) {
268
- const pathSegments = doc.ref.path.split('/');
269
- const deepCollection = config.piDeepPortfolioCollection;
270
- const deepPath = `${deepCollection}/${pathSegments[1]}/${pathSegments[2]}/${pathSegments[3]}/${pathSegments[4]}/${pathSegments[5]}`;
271
-
272
- deepFetchPromises.push(
273
- db.doc(deepPath).get().then(deepSnap => {
274
- if (deepSnap.exists) {
275
- const deepChunk = tryDecompress(deepSnap.data());
276
- for (const [uid, pData] of Object.entries(chunkData)) {
277
- if (deepChunk[uid] && deepChunk[uid].positions) pData.DeepPositions = deepChunk[uid].positions;
278
- }
279
- }
280
- for (const pData of Object.values(chunkData)) pData._userType = 'POPULAR_INVESTOR';
281
- return chunkData;
282
- }).catch(() => chunkData)
283
- );
284
- } else if (meta.type === 'SIGNED_IN_USER') {
285
- for (const pData of Object.values(chunkData)) pData._userType = 'SIGNED_IN_USER';
286
- deepFetchPromises.push(Promise.resolve(chunkData));
287
- } else {
288
- deepFetchPromises.push(Promise.resolve(chunkData));
289
- }
290
- }
55
+ if (types.includes('SPECULATOR')) {
56
+ const specData = await loadRetailFirestore(db, 'SpeculatorPortfolios', dateStr);
57
+ Object.assign(results, specData);
291
58
  }
292
-
293
- const resolvedChunks = await Promise.all(deepFetchPromises);
294
- resolvedChunks.forEach(chunk => {
295
- if (chunk && typeof chunk === 'object') Object.assign(mergedPortfolios, chunk);
296
- });
297
59
  }
298
- return mergedPortfolios;
299
- }
300
-
301
- /** Stage 3: Load a full day map by delegating to loadDataByRefs */
302
- async function loadFullDayMap(config, deps, partRefs, dateString) {
303
- // 1. GCS FAST PATH
304
- const cached = await tryLoadFromGCS(config, dateString, 'portfolios', deps.logger);
305
- if (cached) return cached;
306
-
307
- // 2. FIRESTORE FALLBACK
308
- const { logger } = deps;
309
- if (!partRefs.length) return {};
310
- logger.log('TRACE', `Loading full day map from ${partRefs.length} references...`);
311
- const fullMap = await loadDataByRefs(config, deps, partRefs);
312
- logger.log('TRACE', `Full day map loaded with ${Object.keys(fullMap).length} users`);
313
- return fullMap;
314
- }
315
60
 
316
- /** Stage 3.5: Load Daily Portfolios (Wrapper for Series Loading) */
317
- async function loadDailyPortfolios(config, deps, dateString, requiredUserTypes = null) {
318
- // 1. GCS FAST PATH
319
- const cached = await tryLoadFromGCS(config, dateString, 'portfolios', deps.logger);
320
- if (cached) return cached;
61
+ return results;
62
+ };
321
63
 
322
- // 2. FIRESTORE FALLBACK
323
- // [FIX] Now passing requiredUserTypes to prevent fetching all users (e.g. NormalUserPortfolios)
324
- const partRefs = await getPortfolioPartRefs(config, deps, dateString, requiredUserTypes);
325
- if (partRefs.length === 0) return {};
326
- return loadDataByRefs(config, deps, partRefs);
327
- }
64
+ // =============================================================================
65
+ // 2. TRADE HISTORY
66
+ // =============================================================================
67
+ exports.loadDailyHistory = async (config, deps, dateStr, userTypes = []) => {
68
+ const { db, logger } = deps;
69
+ const types = Array.isArray(userTypes) ? userTypes : [userTypes];
70
+ const isRetail = types.some(t => ['NORMAL', 'SPECULATOR'].includes(t.toUpperCase()));
71
+ const isMigrated = types.some(t => ['POPULAR_INVESTOR', 'SIGNED_IN_USER'].includes(t.toUpperCase()));
328
72
 
329
- /** Stage 4: Load daily instrument insights */
330
- async function loadDailyInsights(config, deps, dateString) {
331
- const { db, logger, calculationUtils } = deps;
332
- const { withRetry } = calculationUtils;
333
-
334
- // 1. GCS FAST PATH
335
- const cached = await tryLoadFromGCS(config, dateString, 'insights', logger);
336
- if (cached) return cached;
337
-
338
- // 2. BIGQUERY FIRST (if enabled)
339
- if (process.env.BIGQUERY_ENABLED !== 'false') {
340
- try {
341
- const { queryInstrumentInsights } = require('../../core/utils/bigquery_utils');
342
- const bigqueryData = await queryInstrumentInsights(dateString, logger);
343
-
344
- if (bigqueryData && Array.isArray(bigqueryData) && bigqueryData.length > 0) {
345
- logger.log('INFO', `[DataLoader] ✅ Using BigQuery for instrument insights (${dateString}): ${bigqueryData.length} instruments`);
346
- // Return in same format as Firestore: { insights: [...] }
347
- return { insights: bigqueryData };
348
- }
349
- } catch (bqError) {
350
- logger.log('WARN', `[DataLoader] BigQuery insights query failed, falling back to Firestore: ${bqError.message}`);
351
- // Fall through to Firestore
352
- }
353
- }
73
+ let results = {};
354
74
 
355
- // 3. FIRESTORE FALLBACK
356
- const insightsCollectionName = config.insightsCollectionName || 'daily_instrument_insights';
357
- logger.log('INFO', `Loading daily insights for ${dateString} from ${insightsCollectionName} (Firestore)`);
358
- try {
359
- const docRef = db.collection(insightsCollectionName).doc(dateString);
360
- const docSnap = await withRetry(() => docRef.get(), `getInsights(${dateString})`);
361
- if (!docSnap.exists) { logger.log('WARN', `Insights not found for ${dateString}`); return null; }
362
- logger.log('TRACE', `Successfully loaded insights for ${dateString}`);
363
- return tryDecompress(docSnap.data());
364
- } catch (error) {
365
- logger.log('ERROR', `Failed to load daily insights for ${dateString}`, { errorMessage: error.message });
366
- return null;
75
+ if (isMigrated && process.env.BIGQUERY_ENABLED !== 'false') {
76
+ const bqData = await queryHistoryData(dateStr, null, types, logger);
77
+ if (bqData) Object.assign(results, bqData);
367
78
  }
368
- }
369
79
 
370
- /** Stage 5: Load and Partition Social Data */
371
- async function loadDailySocialPostInsights(config, deps, dateString) {
372
- const { db, logger, calculationUtils } = deps;
373
- const { withRetry } = calculationUtils;
374
-
375
- // 1. GCS FAST PATH
376
- const cached = await tryLoadFromGCS(config, dateString, 'social', logger);
377
- if (cached) return cached;
378
-
379
- // 2. BIGQUERY FIRST (if enabled)
380
- if (process.env.BIGQUERY_ENABLED !== 'false') {
381
- try {
382
- const { querySocialData } = require('../../core/utils/bigquery_utils');
383
- const bigqueryData = await querySocialData(dateString, null, null, logger);
384
-
385
- if (bigqueryData && Object.keys(bigqueryData).length > 0) {
386
- logger.log('INFO', `[DataLoader] ✅ Using BigQuery for social data (${dateString}): ${Object.keys(bigqueryData).length} users`);
387
-
388
- // Transform BigQuery data to expected format: { generic: {}, pi: {}, signedIn: {} }
389
- // BigQuery returns: { userId: { posts_data: { posts: {...}, postCount: N }, user_type: '...' } }
390
- const result = { generic: {}, pi: {}, signedIn: {} };
391
-
392
- for (const [userId, userData] of Object.entries(bigqueryData)) {
393
- const userType = userData.user_type || 'UNKNOWN';
394
-
395
- // Handle posts_data - may be object (parsed JSON) or string (needs parsing)
396
- let postsData = userData.posts_data || {};
397
- if (typeof postsData === 'string') {
398
- try {
399
- postsData = JSON.parse(postsData);
400
- } catch (e) {
401
- logger.log('WARN', `[DataLoader] Failed to parse posts_data for user ${userId}: ${e.message}`);
402
- continue;
403
- }
404
- }
405
-
406
- // Extract posts map from posts_data structure: { posts: {...}, postCount: N }
407
- const posts = postsData.posts || {};
408
-
409
- // Partition by user type
410
- if (userType === 'POPULAR_INVESTOR') {
411
- result.pi[userId] = posts;
412
- } else if (userType === 'SIGNED_IN_USER') {
413
- result.signedIn[userId] = posts;
414
- } else {
415
- // Generic/unknown user types go to generic
416
- result.generic[userId] = posts;
417
- }
418
- }
419
-
420
- logger.log('INFO', `[DataLoader] ✅ Loaded Social Data from BigQuery: ${Object.keys(result.generic).length} Generic, ${Object.keys(result.pi).length} PIs, ${Object.keys(result.signedIn).length} Signed-In`);
421
- return result;
422
- }
423
- } catch (bqError) {
424
- logger.log('WARN', `[DataLoader] BigQuery social query failed for ${dateString}, falling back to Firestore: ${bqError.message}`);
425
- // Fall through to Firestore
80
+ if (isRetail) {
81
+ if (types.includes('NORMAL')) {
82
+ const normalData = await loadRetailFirestore(db, 'NormalUserTradeHistory', dateStr);
83
+ Object.assign(results, normalData);
84
+ }
85
+ if (types.includes('SPECULATOR')) {
86
+ const specData = await loadRetailFirestore(db, 'SpeculatorTradeHistory', dateStr);
87
+ Object.assign(results, specData);
426
88
  }
427
89
  }
90
+ return results;
91
+ };
428
92
 
429
- // 3. FIRESTORE FALLBACK
430
- logger.log('INFO', `Loading and partitioning social data for ${dateString} (Firestore)`);
431
-
432
- const result = { generic: {}, pi: {}, signedIn: {} };
93
+ // =============================================================================
94
+ // 3. SOCIAL
95
+ // =============================================================================
96
+ exports.loadDailySocialPostInsights = async (config, deps, dateStr, userTypes = []) => {
97
+ const { db, logger } = deps;
98
+ const types = Array.isArray(userTypes) ? userTypes : (userTypes ? [userTypes] : []);
433
99
 
434
- // NEW STRUCTURE
435
- try {
436
- const signedInSocialCollectionName = 'SignedInUserSocialPostData';
437
- const signedInSocialDateDoc = db.collection(signedInSocialCollectionName).doc(dateString);
438
- const signedInSocialSubcollections = await withRetry(() => signedInSocialDateDoc.listCollections(), `listSignedInSocial(${dateString})`);
439
-
440
- for (const subcol of signedInSocialSubcollections) {
441
- const cid = subcol.id;
442
- const cidDoc = await subcol.doc(cid).get();
443
- if (cidDoc.exists) {
444
- const cidData = tryDecompress(cidDoc.data());
445
- if (cidData.posts && typeof cidData.posts === 'object') {
446
- if (!result.signedIn[cid]) result.signedIn[cid] = {};
447
- Object.assign(result.signedIn[cid], cidData.posts);
448
- }
449
- }
450
- }
451
-
452
- const piSocialCollectionName = 'PopularInvestorSocialPostData';
453
- const piSocialDateDoc = db.collection(piSocialCollectionName).doc(dateString);
454
- const piSocialSubcollections = await withRetry(() => piSocialDateDoc.listCollections(), `listPISocial(${dateString})`);
455
-
456
- for (const subcol of piSocialSubcollections) {
457
- const cid = subcol.id;
458
- const cidDoc = await subcol.doc(cid).get();
459
- if (cidDoc.exists) {
460
- const cidData = tryDecompress(cidDoc.data());
461
- if (cidData.posts && typeof cidData.posts === 'object') {
462
- if (!result.pi[cid]) result.pi[cid] = {};
463
- Object.assign(result.pi[cid], cidData.posts);
464
- }
465
- }
466
- }
467
-
468
- const instrumentSocialCollectionName = 'InstrumentFeedSocialPostData';
469
- const instrumentSocialDateDoc = db.collection(instrumentSocialCollectionName).doc(dateString);
470
- const instrumentSocialPostsCol = instrumentSocialDateDoc.collection('posts');
471
- const instrumentSocialSnapshot = await withRetry(() => instrumentSocialPostsCol.limit(1000).get(), `getInstrumentSocial(${dateString})`);
472
-
473
- instrumentSocialSnapshot.forEach(doc => {
474
- const data = tryDecompress(doc.data());
475
- result.generic[doc.id] = data;
476
- });
477
-
478
- logger.log('INFO', `Loaded Social Data (NEW): ${Object.keys(result.generic).length} Generic, ${Object.keys(result.pi).length} PIs.`);
479
- } catch (newStructError) {
480
- logger.log('WARN', `Failed to load from new structure: ${newStructError.message}`);
100
+ // A. BigQuery (User-Specific Social)
101
+ if (types.length > 0 && process.env.BIGQUERY_ENABLED !== 'false') {
102
+ return querySocialData(dateStr, null, types, logger);
481
103
  }
482
104
 
483
- // LEGACY STRUCTURE
484
- const PI_COL_NAME = config.piSocialCollectionName || config.piSocialCollection || 'pi_social_posts';
485
- const SIGNED_IN_COL_NAME = config.signedInUserSocialCollection || 'signed_in_users_social';
486
- const startDate = new Date(dateString + 'T00:00:00Z');
487
- const endDate = new Date(dateString + 'T23:59:59Z');
488
-
105
+ // B. Firestore (Generic Feed - Legacy)
106
+ // If no user types specified, assume generic feed fetch
107
+ const collection = config.socialInsightsCollection || 'daily_social_insights';
489
108
  try {
490
- const postsQuery = db.collectionGroup('posts').where('fetchedAt', '>=', startDate).where('fetchedAt', '<=', endDate);
491
- const querySnapshot = await withRetry(() => postsQuery.get(), `getSocialPosts(${dateString})`);
492
-
493
- if (!querySnapshot.empty) {
494
- querySnapshot.forEach(doc => {
495
- const data = tryDecompress(doc.data());
496
- const path = doc.ref.path;
497
- if (path.includes(PI_COL_NAME)) {
498
- const parts = path.split('/');
499
- const colIndex = parts.indexOf(PI_COL_NAME);
500
- if (colIndex !== -1 && parts[colIndex + 1]) {
501
- const userId = parts[colIndex + 1];
502
- if (!result.pi[userId]) result.pi[userId] = {};
503
- result.pi[userId][doc.id] = data;
504
- }
505
- } else if (path.includes(SIGNED_IN_COL_NAME)) {
506
- const parts = path.split('/');
507
- const colIndex = parts.indexOf(SIGNED_IN_COL_NAME);
508
- if (colIndex !== -1 && parts[colIndex + 1]) {
509
- const userId = parts[colIndex + 1];
510
- if (!result.signedIn[userId]) result.signedIn[userId] = {};
511
- result.signedIn[userId][doc.id] = data;
512
- }
513
- } else {
514
- result.generic[doc.id] = data;
515
- }
516
- });
517
- }
518
- } catch (error) {
519
- logger.log('ERROR', `Failed to load social posts (legacy): ${error.message}`);
109
+ const snap = await db.collection(collection).doc(dateStr).collection('posts').get();
110
+ if (snap.empty) return {};
111
+ const data = {};
112
+ snap.forEach(doc => data[doc.id] = doc.data());
113
+ return data;
114
+ } catch (e) {
115
+ logger.log('WARN', `[DataLoader] Failed to load generic social for ${dateStr}: ${e.message}`);
116
+ return {};
520
117
  }
118
+ };
521
119
 
522
- return result;
523
- }
524
-
525
- /** Stage 6: Get history part references for a given date */
526
- async function getHistoryPartRefs(config, deps, dateString, requiredUserTypes = null) {
527
- const { db, logger, calculationUtils } = deps;
528
- const { withRetry } = calculationUtils;
529
-
530
- // Normalize required types
531
- const types = requiredUserTypes ? new Set(requiredUserTypes.map(t => t.toUpperCase())) : null;
532
- const fetchAll = !types || types.size === 0 || types.has('ALL');
533
-
534
- // =========================================================================
535
- // BIGQUERY FIRST: Try BigQuery before Firestore
536
- // =========================================================================
120
+ // =============================================================================
121
+ // 4. MARKET DATA (Prices)
122
+ // =============================================================================
123
+ exports.getPriceShardRefs = async (config, deps) => {
124
+ // Legacy Shard Helper - In BQ world, we don't use shards but CachedDataLoader expects this structure.
125
+ // We return a "virtual" shard array that signals CachedDataLoader to load from BQ.
537
126
  if (process.env.BIGQUERY_ENABLED !== 'false') {
538
- try {
539
- const { queryHistoryData } = require('../../core/utils/bigquery_utils');
540
- const bigqueryData = await queryHistoryData(dateString, null, requiredUserTypes, logger);
541
-
542
- if (bigqueryData && Object.keys(bigqueryData).length > 0) {
543
- logger.log('INFO', `[DataLoader] Using BigQuery for trade history data (${dateString}): ${Object.keys(bigqueryData).length} users`);
544
-
545
- // Transform BigQuery data into "ref-like" objects for compatibility
546
- const bigqueryRefs = Object.keys(bigqueryData).map(userId => {
547
- const dataObj = bigqueryData[userId];
548
- return {
549
- ref: null, // No Firestore ref needed
550
- type: dataObj.user_type || 'UNKNOWN',
551
- cid: userId,
552
- collectionType: 'BIGQUERY',
553
- bigqueryData: dataObj.history_data || {} // Extract the actual history data
554
- };
555
- });
556
-
557
- return bigqueryRefs;
558
- } else {
559
- logger.log('INFO', `[DataLoader] ⚠️ BigQuery returned no history data for ${dateString}, falling back to Firestore`);
560
- }
561
- } catch (bqError) {
562
- logger.log('WARN', `[DataLoader] BigQuery history query failed for ${dateString}, falling back to Firestore: ${bqError.message}`);
563
- }
564
- }
565
-
566
- // =========================================================================
567
- // FIRESTORE FALLBACK: Original logic (backwards compatibility)
568
- // =========================================================================
569
- logger.log('INFO', `[DataLoader] 📂 Using Firestore for trade history data (${dateString}). Filter: ${fetchAll ? 'ALL' : Array.from(types || []).join(',')}`);
570
-
571
- const allPartRefs = [];
572
-
573
- // NEW STRUCTURE
574
- try {
575
- if (fetchAll || types.has('SIGNED_IN_USER')) {
576
- const signedInHistCollectionName = 'SignedInUserTradeHistoryData';
577
- const signedInHistDateDoc = db.collection(signedInHistCollectionName).doc(dateString);
578
- const signedInHistSubcollections = await withRetry(
579
- () => signedInHistDateDoc.listCollections(),
580
- `listSignedInHistory(${dateString})`
581
- );
582
- signedInHistSubcollections.forEach(subcol => {
583
- allPartRefs.push({ ref: subcol.doc(subcol.id), type: 'SIGNED_IN_USER', cid: subcol.id, collectionType: 'NEW_STRUCTURE' });
584
- });
585
- }
586
-
587
- if (fetchAll || types.has('POPULAR_INVESTOR')) {
588
- const piHistCollectionName = 'PopularInvestorTradeHistoryData';
589
- const piHistDateDoc = db.collection(piHistCollectionName).doc(dateString);
590
- const piHistSubcollections = await withRetry(
591
- () => piHistDateDoc.listCollections(),
592
- `listPIHistory(${dateString})`
593
- );
594
- piHistSubcollections.forEach(subcol => {
595
- allPartRefs.push({ ref: subcol.doc(subcol.id), type: 'POPULAR_INVESTOR', cid: subcol.id, collectionType: 'NEW_STRUCTURE' });
596
- });
597
- }
598
- } catch (newStructError) {
599
- logger.log('WARN', `Failed to load from new structure: ${newStructError.message}`);
127
+ return [ { _bigquery: true } ];
128
+ }
129
+ // Fallback to Firestore Logic - return array of doc refs
130
+ const { db } = deps;
131
+ const collection = config.assetPricesCollection || 'asset_prices';
132
+ const snapshot = await db.collection(collection).listDocuments();
133
+ const refs = [];
134
+ snapshot.forEach(doc => refs.push(doc));
135
+ return refs;
136
+ };
137
+
138
+ exports.getRelevantShardRefs = async (config, deps, targetIds) => {
139
+ // In BQ mode, we don't shard by instrument; return single virtual shard
140
+ if (process.env.BIGQUERY_ENABLED !== 'false') {
141
+ return [ { _bigquery: true, targetIds: targetIds || [] } ];
600
142
  }
601
-
602
- // LEGACY STRUCTURE
603
- const collectionsToQuery = [];
604
- if ((fetchAll || types.has('NORMAL')) && config.normalUserHistoryCollection) collectionsToQuery.push(config.normalUserHistoryCollection);
605
- if ((fetchAll || types.has('SPECULATOR')) && config.speculatorHistoryCollection) collectionsToQuery.push(config.speculatorHistoryCollection);
606
- if ((fetchAll || types.has('POPULAR_INVESTOR')) && config.piHistoryCollection) collectionsToQuery.push(config.piHistoryCollection);
607
- if ((fetchAll || types.has('SIGNED_IN_USER')) && config.signedInHistoryCollection) collectionsToQuery.push(config.signedInHistoryCollection);
143
+ // Firestore behavior - return array of doc refs (same as getPriceShardRefs for now)
144
+ return exports.getPriceShardRefs(config, deps);
145
+ };
608
146
 
609
- for (const collectionName of collectionsToQuery) {
610
- try {
611
- const blockDocsQuery = db.collection(collectionName);
612
- const blockDocRefs = await withRetry(() => blockDocsQuery.listDocuments(), `listDocuments(${collectionName})`);
613
- if (!blockDocRefs.length) continue;
614
-
615
- const partsPromises = blockDocRefs.map(blockDocRef => {
616
- const partsCollectionRef = blockDocRef.collection(config.snapshotsSubcollection || 'snapshots').doc(dateString).collection(config.partsSubcollection || 'parts');
617
- return withRetry(() => partsCollectionRef.listDocuments(), `listParts(${partsCollectionRef.path})`);
618
- });
619
-
620
- const partDocArrays = await Promise.all(partsPromises);
621
- partDocArrays.forEach(partDocs => {
622
- allPartRefs.push(...partDocs.map(ref => ({ ref, type: 'PART', collectionType: 'LEGACY' })));
623
- });
624
- } catch (legacyError) {
625
- logger.log('WARN', `Failed to load legacy history collection ${collectionName}: ${legacyError.message}`);
626
- }
627
- }
628
-
629
- logger.log('INFO', `Found ${allPartRefs.length} total history refs for ${dateString}`);
630
- return allPartRefs;
631
- }
147
+ // =============================================================================
148
+ // 5. ROOT DATA TYPES (Simple Mappings)
149
+ // =============================================================================
632
150
 
633
- /** Stage 7: Stream portfolio data in chunks */
634
- async function* streamPortfolioData(config, deps, dateString, providedRefs = null, requiredUserTypes = null) {
151
+ exports.loadDailyInsights = async (config, deps, dateStr) => {
635
152
  const { logger } = deps;
636
153
 
637
- // =========================================================================
638
- // BIGQUERY FIRST: Try BigQuery before GCS/Firestore
639
- // =========================================================================
640
- if (!providedRefs && process.env.BIGQUERY_ENABLED !== 'false') {
154
+ if (process.env.BIGQUERY_ENABLED !== 'false') {
641
155
  try {
642
- const { queryPortfolioData } = require('../../core/utils/bigquery_utils');
643
- const bigqueryData = await queryPortfolioData(dateString, null, requiredUserTypes, logger);
644
-
645
- if (bigqueryData && Object.keys(bigqueryData).length > 0) {
646
- logger.log('INFO', `[DataLoader] Streaming portfolio data from BigQuery (${dateString}): ${Object.keys(bigqueryData).length} users`);
647
-
648
- // Transform to expected format and yield
649
- const transformedData = {};
650
- Object.keys(bigqueryData).forEach(userId => {
651
- transformedData[userId] = bigqueryData[userId].portfolio_data || {};
652
- });
653
-
654
- yield transformedData;
655
- return; // Exit early, data loaded from BigQuery
656
- }
657
- } catch (bqError) {
658
- logger.log('WARN', `[DataLoader] BigQuery portfolio stream failed for ${dateString}, falling back: ${bqError.message}`);
659
- }
660
- }
661
-
662
- // 1. GCS FAST PATH (Snapshot) - Only if full run (no providedRefs)
663
- if (!providedRefs) {
664
- try {
665
- const bucketName = config.gcsBucketName || 'bulltrackers';
666
- const bucket = storage.bucket(bucketName);
667
- const file = bucket.file(`${dateString}/snapshots/portfolios.json.gz`);
668
- const [exists] = await file.exists();
669
-
670
- if (exists) {
671
- logger.log('INFO', `[DataLoader] ⚡️ STREAMING: Hydrating Portfolios from GCS Snapshot`);
672
- const [content] = await file.download();
673
-
674
- // FIX: Handle Double Decompression
675
- let fullData;
676
- try {
677
- fullData = JSON.parse(zlib.gunzipSync(content).toString());
678
- } catch (zipError) {
679
- if (zipError.message && zipError.message.includes('incorrect header check')) {
680
- fullData = JSON.parse(content.toString());
681
- } else {
682
- throw zipError;
683
- }
684
- }
685
-
686
- yield fullData; // Yield all in one chunk as it fits in memory
687
- return;
156
+ const rows = await queryInstrumentInsights(dateStr, logger);
157
+ if (Array.isArray(rows) && rows.length > 0) {
158
+ logger.log('INFO', `[DataLoader] ✅ Using BigQuery for instrument insights (${dateStr}): ${rows.length} instruments`);
159
+ // Wrap in Firestore-shaped document format for InsightsExtractor compatibility
160
+ return { insights: rows };
688
161
  }
689
162
  } catch (e) {
690
- logger.log('WARN', `[DataLoader] GCS Portfolio Stream failed: ${e.message}. Falling back.`);
163
+ logger.log('WARN', `[DataLoader] BigQuery insights query failed for ${dateStr}: ${e.message}`);
691
164
  }
692
165
  }
693
166
 
694
- // 2. FIRESTORE FALLBACK
695
- const refs = providedRefs || (await getPortfolioPartRefs(config, deps, dateString, requiredUserTypes));
696
- if (refs.length === 0) { logger.log('WARN', `[streamPortfolioData] No portfolio refs found for ${dateString}. Stream is empty.`); return; }
697
-
698
- const batchSize = config.partRefBatchSize || 10;
699
- logger.log('INFO', `[streamPortfolioData] Streaming ${refs.length} portfolio parts in chunks of ${batchSize}...`);
700
-
701
- for (let i = 0; i < refs.length; i += batchSize) {
702
- const batchRefs = refs.slice(i, i + batchSize);
703
- const data = await loadDataByRefs(config, deps, batchRefs);
704
- yield data;
705
- }
706
- logger.log('INFO', `[streamPortfolioData] Finished streaming for ${dateString}.`);
707
- }
167
+ // No Firestore fallback by design – return empty but correctly shaped
168
+ return { insights: [] };
169
+ };
708
170
 
709
- /** Stage 8: Stream history data in chunks */
710
- async function* streamHistoryData(config, deps, dateString, providedRefs = null, requiredUserTypes = null) {
711
- const { logger } = deps;
171
+ exports.loadPopularInvestorRankings = async (config, deps, dateStr) => {
172
+ const data = await queryPIRankings(dateStr, deps.logger);
173
+ return data ? data.Items : [];
174
+ };
712
175
 
713
- // =========================================================================
714
- // BIGQUERY FIRST: Try BigQuery before Firestore
715
- // =========================================================================
716
- if (!providedRefs && process.env.BIGQUERY_ENABLED !== 'false') {
717
- try {
718
- const { queryHistoryData } = require('../../core/utils/bigquery_utils');
719
- const bigqueryData = await queryHistoryData(dateString, null, requiredUserTypes, logger);
720
-
721
- if (bigqueryData && Object.keys(bigqueryData).length > 0) {
722
- logger.log('INFO', `[DataLoader] ✅ Streaming history data from BigQuery (${dateString}): ${Object.keys(bigqueryData).length} users`);
723
-
724
- // Transform to expected format and yield
725
- const transformedData = {};
726
- Object.keys(bigqueryData).forEach(userId => {
727
- transformedData[userId] = bigqueryData[userId].history_data || {};
728
- });
729
-
730
- yield transformedData;
731
- return; // Exit early, data loaded from BigQuery
732
- }
733
- } catch (bqError) {
734
- logger.log('WARN', `[DataLoader] BigQuery history stream failed for ${dateString}, falling back: ${bqError.message}`);
735
- }
736
- }
176
+ exports.loadPIRatings = async (config, deps, dateStr) => {
177
+ return queryPIRatings(dateStr, deps.logger);
178
+ };
737
179
 
738
- // 1. GCS FAST PATH (JSONL Streaming) - Only if full run
739
- if (!providedRefs) {
740
- try {
741
- const bucketName = config.gcsBucketName || 'bulltrackers';
742
- const bucket = storage.bucket(bucketName);
743
- const file = bucket.file(`${dateString}/snapshots/history.jsonl.gz`);
744
- const [exists] = await file.exists();
180
+ exports.loadPIPageViews = async (config, deps, dateStr) => {
181
+ return queryPIPageViews(dateStr, deps.logger);
182
+ };
745
183
 
746
- if (exists) {
747
- logger.log('INFO', `[DataLoader] ⚡️ STREAMING: Hydrating History from GCS (JSONL)`);
748
-
749
- const fileStream = file.createReadStream();
750
- const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity });
184
+ exports.loadWatchlistMembership = async (config, deps, dateStr) => {
185
+ return queryWatchlistMembership(dateStr, deps.logger);
186
+ };
751
187
 
752
- let currentBatch = {};
753
- let count = 0;
754
- const BATCH_SIZE = 50;
188
+ exports.loadPIAlertHistory = async (config, deps, dateStr) => {
189
+ return queryPIAlertHistory(dateStr, deps.logger);
190
+ };
755
191
 
756
- for await (const line of rl) {
757
- if (!line.trim()) continue;
758
- try {
759
- const userEntry = JSON.parse(line);
760
- Object.assign(currentBatch, userEntry);
761
- count++;
762
- if (count >= BATCH_SIZE) {
763
- yield currentBatch;
764
- currentBatch = {};
765
- count = 0;
766
- }
767
- } catch (parseErr) {
768
- logger.log('ERROR', `[DataLoader] JSONL Parse Error: ${parseErr.message}`);
769
- }
770
- }
771
- if (Object.keys(currentBatch).length > 0) yield currentBatch;
772
- logger.log('INFO', `[DataLoader] Finished streaming History from GCS.`);
773
- return;
774
- }
775
- } catch (e) {
776
- logger.log('WARN', `[DataLoader] GCS History Stream failed: ${e.message}. Falling back.`);
777
- }
778
- }
192
+ exports.loadPopularInvestorMasterList = async (config, deps) => {
193
+ return queryPIMasterList(deps.logger);
194
+ };
779
195
 
780
- // 2. FIRESTORE FALLBACK
781
- const refs = providedRefs || (await getHistoryPartRefs(config, deps, dateString, requiredUserTypes));
782
- if (refs.length === 0) { logger.log('WARN', `[streamHistoryData] No history refs found for ${dateString}. Stream is empty.`); return; }
783
-
784
- const batchSize = config.partRefBatchSize || 10;
785
- logger.log('INFO', `[streamHistoryData] Streaming ${refs.length} history parts in chunks of ${batchSize}...`);
786
-
787
- for (let i = 0; i < refs.length; i += batchSize) {
788
- const batchRefs = refs.slice(i, i + batchSize);
789
- const data = await loadDataByRefs(config, deps, batchRefs);
790
- yield data;
791
- }
792
- logger.log('INFO', `[streamHistoryData] Finished streaming for ${dateString}.`);
793
- }
196
+ exports.loadPIWatchlistData = async (config, deps, piCid) => {
197
+ // Watchlist data is time-series in BQ. For "Current State" (ID based),
198
+ // we query the most recent date available for this PI.
199
+ // This is a specialized query not in standard utils, so we implement it here or assume caller passes date.
200
+ // However, CachedDataLoader expects (cid) -> Data.
201
+ // We'll return null here as WatchlistMembership (by date) is the preferred method now.
202
+ deps.logger.log('WARN', '[DataLoader] loadPIWatchlistData (by CID) is deprecated in favor of loadWatchlistMembership (by Date).');
203
+ return null;
204
+ };
794
205
 
795
- /** Stage 9: Get all price shard references (Basic)
796
- * [UPDATED] Returns special marker for BigQuery mode, or Firestore refs for fallback
797
- */
798
- async function getPriceShardRefs(config, deps) {
799
- const { logger } = deps;
800
-
801
- // Try BigQuery first if enabled
802
- if (process.env.BIGQUERY_ENABLED !== 'false') {
803
- // Return a special marker object to indicate BigQuery mode
804
- // The loader will detect this and load from BigQuery instead
805
- return [{ _bigquery: true }];
806
- }
807
-
808
- // Fallback to Firestore
809
- const { db, calculationUtils } = deps;
810
- const { withRetry } = calculationUtils;
811
- const collection = config.priceCollection || 'asset_prices';
206
+ // =============================================================================
207
+ // 6. EXCEPTIONS (Firestore Only)
208
+ // =============================================================================
209
+
210
+ exports.loadVerificationProfiles = async (config, deps, dateStr) => {
211
+ const { db, logger } = deps;
812
212
  try {
813
- const collectionRef = db.collection(collection);
814
- const refs = await withRetry(() => collectionRef.listDocuments(), `listDocuments(${collection})`);
815
- return refs;
213
+ // Verifications are a single collection, not date-partitioned snapshots
214
+ const snap = await db.collection('user_verifications').get();
215
+ const verifications = {};
216
+ snap.forEach(doc => verifications[doc.id] = doc.data());
217
+ return verifications;
816
218
  } catch (e) {
817
- logger.log('ERROR', `Failed to list price shards: ${e.message}`);
818
- return [];
219
+ logger.log('ERROR', `[DataLoader] Failed to load verifications: ${e.message}`);
220
+ return {};
819
221
  }
820
- }
222
+ };
821
223
 
822
- /** Stage 10: Smart Shard Lookup System (DEPRECATED/SIMPLIFIED) */
823
- async function ensurePriceShardIndex(config, deps) {
824
- return {}; // Deprecated
825
- }
224
+ // =============================================================================
225
+ // HELPERS
226
+ // =============================================================================
826
227
 
827
- async function getRelevantShardRefs(config, deps, targetInstrumentIds) {
828
- const { logger } = deps;
829
- logger.log('INFO', `[ShardLookup] Smart indexing disabled. Fetching all price shards.`);
830
- return getPriceShardRefs(config, deps);
831
- }
832
-
833
- /** Stage 11: Load Popular Investor Rankings */
834
- async function loadPopularInvestorRankings(config, deps, dateString) {
835
- const { db, logger, calculationUtils } = deps;
836
- const { withRetry } = calculationUtils;
228
+ // =============================================================================
229
+ // 7. PRICE DATA BY REFS (For PriceBatchExecutor)
230
+ // =============================================================================
837
231
 
838
- // 1. GCS FAST PATH
839
- const cached = await tryLoadFromGCS(config, dateString, 'rankings', logger);
840
- if (cached) return cached;
841
-
842
- // 2. BIGQUERY FIRST (if enabled)
843
- if (process.env.BIGQUERY_ENABLED !== 'false') {
844
- try {
845
- const { queryPIRankings } = require('../../core/utils/bigquery_utils');
846
- const bigqueryData = await queryPIRankings(dateString, logger);
847
-
848
- if (bigqueryData && bigqueryData.Items && bigqueryData.Items.length > 0) {
849
- logger.log('INFO', `[DataLoader] ✅ Using BigQuery for PI rankings (${dateString}): ${bigqueryData.Items.length} items`);
850
- return bigqueryData.Items;
851
- }
852
- } catch (bqError) {
853
- logger.log('WARN', `[DataLoader] BigQuery rankings query failed, falling back to Firestore: ${bqError.message}`);
854
- // Fall through to Firestore
855
- }
856
- }
857
-
858
- // 3. FIRESTORE FALLBACK
859
- const collectionName = config.popularInvestorRankingsCollection || 'popular_investor_rankings';
860
- logger.log('INFO', `Loading Popular Investor Rankings for ${dateString} from Firestore`);
861
- try {
862
- const docRef = db.collection(collectionName).doc(dateString);
863
- const docSnap = await withRetry(() => docRef.get(), `getRankings(${dateString})`);
864
- if (!docSnap.exists) { logger.log('WARN', `Rankings not found for ${dateString}`); return null; }
865
- const data = tryDecompress(docSnap.data());
866
- return data.Items || [];
867
- } catch (error) {
868
- logger.log('ERROR', `Failed to load Rankings for ${dateString}: ${error.message}`);
869
- return null;
870
- }
871
- }
872
-
873
- /** Stage 12: Load User Verification Profiles
874
- * [UPDATED] Scans global verification data via CollectionGroup since it's now stored per-user.
875
- * [UPDATED] Added optional dateString param to support GCS snapshot checks.
232
+ /**
233
+ * Load price data from an array of shard references (virtual or Firestore doc refs).
234
+ * Used by PriceBatchExecutor for batch price computations.
235
+ * @param {object} config - Configuration object
236
+ * @param {object} deps - Dependencies (db, logger, etc.)
237
+ * @param {Array} shardRefs - Array of shard references (virtual BigQuery objects or Firestore doc refs)
238
+ * @returns {Promise<object>} Combined price data object keyed by instrument ID
876
239
  */
877
- async function loadVerificationProfiles(config, deps, dateString = null) {
878
- const { db, logger, calculationUtils } = deps;
879
- const { withRetry } = calculationUtils;
880
-
881
- // 1. GCS FAST PATH (If date provided)
882
- if (dateString) {
883
- const cached = await tryLoadFromGCS(config, dateString, 'verification', logger);
884
- if (cached) return cached;
885
- }
240
+ exports.loadDataByRefs = async (config, deps, shardRefs) => {
241
+ const { logger } = deps;
886
242
 
887
- // 2. FIRESTORE FALLBACK (Global Scan)
888
- logger.log('INFO', `Loading Verification Profiles (CollectionGroup: verification/data)`);
889
- try {
890
- const snapshot = await withRetry(() => db.collectionGroup('verification').get(), 'getVerificationsGroup');
891
- if (snapshot.empty) return {};
892
-
893
- const profiles = {};
894
- let count = 0;
895
- snapshot.forEach(doc => {
896
- if (doc.id !== 'data') return;
897
- const raw = tryDecompress(doc.data());
898
- if (raw.etoroCID) {
899
- profiles[raw.etoroCID] = {
900
- cid: raw.etoroCID,
901
- username: raw.etoroUsername,
902
- aboutMe: "",
903
- aboutMeShort: "",
904
- isVerified: !!(raw.verifiedAt),
905
- restrictions: []
906
- };
907
- count++;
908
- }
909
- });
910
- logger.log('INFO', `Loaded ${count} verification profiles.`);
911
- return profiles;
912
- } catch (error) {
913
- logger.log('ERROR', `Failed to load Verification Profiles: ${error.message}`);
243
+ if (!Array.isArray(shardRefs) || shardRefs.length === 0) {
914
244
  return {};
915
245
  }
916
- }
917
-
918
- /** Stage 13: Load PI Ratings Data */
919
- async function loadPIRatings(config, deps, dateString) {
920
- const { db, logger, calculationUtils } = deps;
921
- const { withRetry } = calculationUtils;
922
-
923
- // 1. GCS FAST PATH
924
- const cached = await tryLoadFromGCS(config, dateString, 'ratings', logger);
925
- if (cached) return cached;
926
-
927
- // 2. BIGQUERY FIRST (if enabled)
928
- if (process.env.BIGQUERY_ENABLED !== 'false') {
929
- try {
930
- const { queryPIRatings } = require('../../core/utils/bigquery_utils');
931
- const bigqueryData = await queryPIRatings(dateString, logger);
932
- if (bigqueryData) {
933
- logger.log('INFO', `[DataLoader] ✅ Loaded PI Ratings from BigQuery for ${dateString}`);
934
- return bigqueryData;
935
- }
936
- } catch (error) {
937
- logger.log('WARN', `[DataLoader] BigQuery PI Ratings query failed, falling back to Firestore: ${error.message}`);
938
- }
939
- }
940
246
 
941
- // 3. FIRESTORE FALLBACK
942
- const collectionName = config.piRatingsCollection || 'PIRatingsData';
943
- logger.log('INFO', `Loading PI Ratings from Firestore for ${dateString}`);
944
- try {
945
- const docRef = db.collection(collectionName).doc(dateString);
946
- const docSnap = await withRetry(() => docRef.get(), `getPIRatings(${dateString})`);
947
- if (!docSnap.exists) {
948
- logger.log('WARN', `PI Ratings not found for ${dateString}`);
949
- return {};
950
- }
951
- const data = tryDecompress(docSnap.data());
952
- const { date, lastUpdated, ...piRatings } = data;
953
- return piRatings;
954
- } catch (error) {
955
- logger.log('ERROR', `Failed to load PI Ratings: ${error.message}`);
956
- return {};
957
- }
958
- }
959
-
960
- /** Stage 14: Load PI Page Views Data */
961
- async function loadPIPageViews(config, deps, dateString) {
962
- const { db, logger, calculationUtils } = deps;
963
- const { withRetry } = calculationUtils;
964
-
965
- // 1. GCS FAST PATH
966
- const cached = await tryLoadFromGCS(config, dateString, 'page_views', logger);
967
- if (cached) return cached;
968
-
969
- // 2. BIGQUERY FIRST (if enabled)
970
- if (process.env.BIGQUERY_ENABLED !== 'false') {
971
- try {
972
- const { queryPIPageViews } = require('../../core/utils/bigquery_utils');
973
- const bigqueryData = await queryPIPageViews(dateString, logger);
974
- if (bigqueryData) {
975
- logger.log('INFO', `[DataLoader] ✅ Loaded PI Page Views from BigQuery for ${dateString}`);
976
- return bigqueryData;
977
- }
978
- } catch (error) {
979
- logger.log('WARN', `[DataLoader] BigQuery PI Page Views query failed, falling back to Firestore: ${error.message}`);
980
- }
981
- }
982
-
983
- // 3. FIRESTORE FALLBACK
984
- const collectionName = config.piPageViewsCollection || 'PIPageViewsData';
985
- logger.log('INFO', `Loading PI Page Views from Firestore for ${dateString}`);
986
- try {
987
- const docRef = db.collection(collectionName).doc(dateString);
988
- const docSnap = await withRetry(() => docRef.get(), `getPIPageViews(${dateString})`);
989
- if (!docSnap.exists) { logger.log('WARN', `PI Page Views not found`); return null; }
990
- const data = tryDecompress(docSnap.data());
991
- const { date, lastUpdated, ...piPageViews } = data;
992
- return piPageViews;
993
- } catch (error) {
994
- logger.log('ERROR', `Failed to load PI Page Views: ${error.message}`);
995
- return null;
996
- }
997
- }
998
-
999
- /** Stage 15: Load Watchlist Membership Data */
1000
- async function loadWatchlistMembership(config, deps, dateString) {
1001
- const { db, logger, calculationUtils } = deps;
1002
- const { withRetry } = calculationUtils;
247
+ // Check if we're in BigQuery mode (virtual shards)
248
+ const isBigQuery = shardRefs.some(ref => ref && ref._bigquery === true);
1003
249
 
1004
- // 1. GCS FAST PATH
1005
- const cached = await tryLoadFromGCS(config, dateString, 'watchlist', logger);
1006
- if (cached) return cached;
1007
-
1008
- // 2. BIGQUERY FIRST (if enabled)
1009
- if (process.env.BIGQUERY_ENABLED !== 'false') {
250
+ if (isBigQuery && process.env.BIGQUERY_ENABLED !== 'false') {
1010
251
  try {
1011
- const { queryWatchlistMembership } = require('../../core/utils/bigquery_utils');
1012
- const bigqueryData = await queryWatchlistMembership(dateString, logger);
1013
- if (bigqueryData) {
1014
- logger.log('INFO', `[DataLoader] ✅ Loaded Watchlist Membership from BigQuery for ${dateString}`);
1015
- return bigqueryData;
252
+ // Extract targetIds from virtual shards if present
253
+ const targetIds = shardRefs
254
+ .filter(ref => ref._bigquery && ref.targetIds && ref.targetIds.length > 0)
255
+ .flatMap(ref => ref.targetIds);
256
+
257
+ // Query BigQuery for prices
258
+ // queryAssetPrices signature: (startDateStr, endDateStr, instrumentIds, logger)
259
+ const { queryAssetPrices } = require('../../core/utils/bigquery_utils');
260
+ const pricesData = await queryAssetPrices(null, null, targetIds.length > 0 ? targetIds : null, logger);
261
+
262
+ // Filter by targetIds if specified
263
+ if (targetIds.length > 0 && pricesData) {
264
+ const targetSet = new Set(targetIds.map(id => String(id)));
265
+ const filtered = {};
266
+ for (const [instrumentId, priceData] of Object.entries(pricesData)) {
267
+ if (targetSet.has(String(instrumentId))) {
268
+ filtered[instrumentId] = priceData;
269
+ }
270
+ }
271
+ return filtered;
1016
272
  }
1017
- } catch (error) {
1018
- logger.log('WARN', `[DataLoader] BigQuery Watchlist Membership query failed, falling back to Firestore: ${error.message}`);
273
+
274
+ return pricesData || {};
275
+ } catch (e) {
276
+ logger.log('ERROR', `[DataLoader] BigQuery price load failed: ${e.message}`);
277
+ return {};
1019
278
  }
1020
279
  }
1021
280
 
1022
- // 3. FIRESTORE FALLBACK
1023
- const collectionName = config.watchlistMembershipCollection || 'WatchlistMembershipData';
1024
- logger.log('INFO', `Loading Watchlist Membership from Firestore for ${dateString}`);
281
+ // Firestore fallback - load from doc refs
282
+ const combined = {};
1025
283
  try {
1026
- const docRef = db.collection(collectionName).doc(dateString);
1027
- const docSnap = await withRetry(() => docRef.get(), `getWatchlistMembership(${dateString})`);
1028
- if (!docSnap.exists) { logger.log('WARN', `Watchlist Membership not found`); return null; }
1029
- const data = tryDecompress(docSnap.data());
1030
- const { date, lastUpdated, ...watchlistMembership } = data;
1031
- return watchlistMembership;
1032
- } catch (error) {
1033
- logger.log('ERROR', `Failed to load Watchlist Membership: ${error.message}`);
1034
- return null;
1035
- }
1036
- }
1037
-
1038
- /** Stage 16: Load PI Alert History Data */
1039
- async function loadPIAlertHistory(config, deps, dateString) {
1040
- const { db, logger, calculationUtils } = deps;
1041
- const { withRetry } = calculationUtils;
1042
-
1043
- // 1. GCS FAST PATH
1044
- const cached = await tryLoadFromGCS(config, dateString, 'alerts', logger);
1045
- if (cached) return cached;
1046
-
1047
- // 2. BIGQUERY FIRST (if enabled)
1048
- if (process.env.BIGQUERY_ENABLED !== 'false') {
1049
- try {
1050
- const { queryPIAlertHistory } = require('../../core/utils/bigquery_utils');
1051
- const bigqueryData = await queryPIAlertHistory(dateString, logger);
1052
- if (bigqueryData) {
1053
- logger.log('INFO', `[DataLoader] ✅ Loaded PI Alert History from BigQuery for ${dateString}`);
1054
- return bigqueryData;
284
+ const loadPromises = shardRefs.map(async (docRef) => {
285
+ try {
286
+ const snap = await docRef.get();
287
+ if (snap.exists) {
288
+ const data = snap.data();
289
+ // Firestore price shards are nested: { instrumentId: { prices: {...} } }
290
+ Object.assign(combined, data);
291
+ }
292
+ } catch (e) {
293
+ logger.log('WARN', `[DataLoader] Failed to load price shard: ${e.message}`);
1055
294
  }
1056
- } catch (error) {
1057
- logger.log('WARN', `[DataLoader] BigQuery PI Alert History query failed, falling back to Firestore: ${error.message}`);
1058
- }
1059
- }
1060
-
1061
- // 3. FIRESTORE FALLBACK
1062
- const collectionName = config.piAlertHistoryCollection || 'PIAlertHistoryData';
1063
- logger.log('INFO', `Loading PI Alert History from Firestore for ${dateString}`);
1064
- try {
1065
- const docRef = db.collection(collectionName).doc(dateString);
1066
- const docSnap = await withRetry(() => docRef.get(), `getPIAlertHistory(${dateString})`);
1067
- if (!docSnap.exists) { logger.log('WARN', `PI Alert History not found`); return null; }
1068
- const data = tryDecompress(docSnap.data());
1069
- const { date, lastUpdated, ...piAlertHistory } = data;
1070
- return piAlertHistory;
1071
- } catch (error) {
1072
- logger.log('ERROR', `Failed to load PI Alert History: ${error.message}`);
1073
- return null;
1074
- }
1075
- }
1076
-
1077
- /** Stage 17: Load PI-Centric Watchlist Data (Targeted - Keep as Firestore) */
1078
- async function loadPIWatchlistData(config, deps, piCid) {
1079
- const { db, logger, calculationUtils } = deps;
1080
- const { withRetry } = calculationUtils;
1081
- const piCidStr = String(piCid);
1082
-
1083
- logger.log('INFO', `Loading PI Watchlist Data for PI ${piCid}`);
1084
- try {
1085
- const docRef = db.collection('PopularInvestors').doc(piCidStr).collection('watchlistData').doc('current');
1086
- const docSnap = await withRetry(() => docRef.get(), `getPIWatchlistData(${piCidStr})`);
1087
- if (!docSnap.exists) { logger.log('WARN', `PI Watchlist Data not found for PI ${piCidStr}`); return null; }
1088
- return tryDecompress(docSnap.data());
1089
- } catch (error) {
1090
- logger.log('ERROR', `Failed to load PI Watchlist Data for PI ${piCidStr}: ${error.message}`);
1091
- return null;
295
+ });
296
+
297
+ await Promise.all(loadPromises);
298
+ } catch (e) {
299
+ logger.log('ERROR', `[DataLoader] Failed to load price data from refs: ${e.message}`);
1092
300
  }
1093
- }
1094
-
1095
- // Load Popular Investor Master List
1096
- async function loadPopularInvestorMasterList(config, deps, dateString = null) {
1097
- const { db, logger, calculationUtils } = deps;
1098
- const { withRetry } = calculationUtils;
1099
301
 
1100
- // 1. GCS FAST PATH (If date context exists)
1101
- if (dateString) {
1102
- const cached = await tryLoadFromGCS(config, dateString, 'master_list', logger);
1103
- if (cached) return cached;
1104
- }
302
+ return combined;
303
+ };
1105
304
 
1106
- // 2. BIGQUERY FIRST (if enabled)
1107
- if (process.env.BIGQUERY_ENABLED !== 'false') {
1108
- try {
1109
- const { queryPIMasterList } = require('../../core/utils/bigquery_utils');
1110
- const bigqueryData = await queryPIMasterList(logger);
1111
-
1112
- if (bigqueryData && Object.keys(bigqueryData).length > 0) {
1113
- logger.log('INFO', `[DataLoader] ✅ Using BigQuery for PI master list: ${Object.keys(bigqueryData).length} PIs`);
1114
- return bigqueryData;
1115
- }
1116
- } catch (bqError) {
1117
- logger.log('WARN', `[DataLoader] BigQuery master list query failed, falling back to Firestore: ${bqError.message}`);
1118
- // Fall through to Firestore
1119
- }
1120
- }
305
+ // =============================================================================
306
+ // HELPERS
307
+ // =============================================================================
1121
308
 
1122
- // 3. FIRESTORE FALLBACK
1123
- const collectionName = config.piMasterListCollection || 'system_state';
1124
- const docId = config.piMasterListDocId || 'popular_investor_master_list';
1125
- logger.log('INFO', `Loading Popular Investor Master List from ${collectionName}/${docId} (Firestore)`);
309
+ async function loadRetailFirestore(db, collectionName, dateStr) {
310
+ const CANARY_ID = '19M'; // Legacy Block
1126
311
  try {
1127
- const docRef = db.collection(collectionName).doc(docId);
1128
- const docSnap = await withRetry(() => docRef.get(), 'getPIMasterList');
1129
- if (!docSnap.exists) { logger.log('WARN', 'Popular Investor Master List not found.'); return {}; }
1130
- const data = tryDecompress(docSnap.data());
1131
- return data.investors || data;
1132
- } catch (error) {
1133
- logger.log('ERROR', `Failed to load PI Master List: ${error.message}`);
312
+ const partsRef = db.collection(collectionName).doc(CANARY_ID)
313
+ .collection('snapshots').doc(dateStr).collection('parts');
314
+
315
+ const snap = await partsRef.get();
316
+ if (snap.empty) return {};
317
+
318
+ const combined = {};
319
+ snap.forEach(doc => Object.assign(combined, doc.data()));
320
+ return combined;
321
+ } catch (e) {
1134
322
  return {};
1135
323
  }
1136
- }
1137
-
1138
- module.exports = {
1139
- getPortfolioPartRefs,
1140
- loadDataByRefs,
1141
- loadFullDayMap,
1142
- loadDailyPortfolios,
1143
- loadDailyInsights,
1144
- loadDailySocialPostInsights,
1145
- getHistoryPartRefs,
1146
- streamPortfolioData,
1147
- streamHistoryData,
1148
- getPriceShardRefs,
1149
- ensurePriceShardIndex,
1150
- getRelevantShardRefs,
1151
- loadPopularInvestorRankings,
1152
- loadVerificationProfiles,
1153
- loadPIRatings,
1154
- loadPIPageViews,
1155
- loadWatchlistMembership,
1156
- loadPIAlertHistory,
1157
- loadPopularInvestorMasterList,
1158
- loadPIWatchlistData,
1159
- };
324
+ }