bulltrackers-module 1.0.721 → 1.0.722
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/data/CachedDataLoader.js +101 -102
- package/functions/computation-system/data/DependencyFetcher.js +48 -8
- package/functions/computation-system/persistence/ResultCommitter.js +158 -573
- package/functions/computation-system/utils/data_loader.js +253 -1088
- package/functions/core/utils/bigquery_utils.js +248 -112
- package/functions/etoro-price-fetcher/helpers/handler_helpers.js +4 -1
- package/functions/fetch-insights/helpers/handler_helpers.js +63 -65
- package/functions/fetch-popular-investors/helpers/fetch_helpers.js +143 -458
- package/functions/orchestrator/index.js +108 -141
- package/functions/root-data-indexer/index.js +130 -437
- package/package.json +3 -2
- package/functions/invalid-speculator-handler/helpers/handler_helpers.js +0 -38
- package/functions/speculator-cleanup-orchestrator/helpers/cleanup_helpers.js +0 -101
|
@@ -1,1159 +1,324 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @fileoverview Data
|
|
3
|
-
* REFACTORED:
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
2
|
+
* @fileoverview Data Loading Layer.
|
|
3
|
+
* REFACTORED:
|
|
4
|
+
* 1. Routes 90% of data fetching to BigQuery (Portfolios, History, Prices, Insights, Rankings, etc.).
|
|
5
|
+
* 2. Retains Firestore logic ONLY for:
|
|
6
|
+
* - Verifications (user_verifications)
|
|
7
|
+
* - Retail Users (Normal/Speculator Portfolios/History)
|
|
8
|
+
* - Generic Social Feed (Legacy compatibility)
|
|
8
9
|
*/
|
|
9
|
-
const
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
//
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
const
|
|
33
|
-
try {
|
|
34
|
-
const bucket = storage.bucket(bucketName);
|
|
35
|
-
const file = bucket.file(`${dateString}/snapshots/${snapshotName}.json.gz`);
|
|
36
|
-
const [exists] = await file.exists();
|
|
37
|
-
|
|
38
|
-
if (exists) {
|
|
39
|
-
logger.log('INFO', `[DataLoader] ⚡️ GCS HIT: ${snapshotName} for ${dateString}`);
|
|
40
|
-
const [content] = await file.download();
|
|
41
|
-
|
|
42
|
-
// FIX: GCS client auto-decompresses if Content-Encoding is gzip.
|
|
43
|
-
// We try gunzip first; if it fails with header check, it's likely already JSON.
|
|
44
|
-
try {
|
|
45
|
-
return JSON.parse(zlib.gunzipSync(content).toString());
|
|
46
|
-
} catch (zipError) {
|
|
47
|
-
if (zipError.message && zipError.message.includes('incorrect header check')) {
|
|
48
|
-
// Content was already decompressed by the client
|
|
49
|
-
return JSON.parse(content.toString());
|
|
50
|
-
}
|
|
51
|
-
throw zipError;
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
} catch (e) {
|
|
55
|
-
logger.log('WARN', `[DataLoader] GCS Check Failed (${snapshotName}): ${e.message}`);
|
|
56
|
-
}
|
|
57
|
-
return null;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
/** --- Data Loader Sub-Pipes (Stateless, Dependency-Injection) --- */
|
|
61
|
-
|
|
62
|
-
/** Stage 1: Get portfolio part document references for a given date */
|
|
63
|
-
async function getPortfolioPartRefs(config, deps, dateString, requiredUserTypes = null) {
|
|
64
|
-
const { db, logger, calculationUtils } = deps;
|
|
65
|
-
const { withRetry } = calculationUtils;
|
|
10
|
+
const {
|
|
11
|
+
queryPortfolioData,
|
|
12
|
+
queryHistoryData,
|
|
13
|
+
querySocialData,
|
|
14
|
+
queryAssetPrices,
|
|
15
|
+
queryAllPricesForDate,
|
|
16
|
+
queryPricesForTickers,
|
|
17
|
+
queryInstrumentInsights,
|
|
18
|
+
queryPIRankings,
|
|
19
|
+
queryTickerMappings,
|
|
20
|
+
queryPIMasterList,
|
|
21
|
+
queryPIRatings,
|
|
22
|
+
queryPIPageViews,
|
|
23
|
+
queryWatchlistMembership,
|
|
24
|
+
queryPIAlertHistory
|
|
25
|
+
} = require('../../core/utils/bigquery_utils');
|
|
26
|
+
|
|
27
|
+
const { normalizeName } = require('./utils');
|
|
28
|
+
|
|
29
|
+
// =============================================================================
|
|
30
|
+
// 1. PORTFOLIOS
|
|
31
|
+
// =============================================================================
|
|
32
|
+
exports.loadDailyPortfolios = async (config, deps, dateStr, userTypes = []) => {
|
|
33
|
+
const { db, logger } = deps;
|
|
66
34
|
|
|
67
|
-
// Normalize
|
|
68
|
-
const types =
|
|
69
|
-
const
|
|
35
|
+
// Normalize user types
|
|
36
|
+
const types = Array.isArray(userTypes) ? userTypes : [userTypes];
|
|
37
|
+
const isRetail = types.some(t => ['NORMAL', 'SPECULATOR'].includes(t.toUpperCase()));
|
|
38
|
+
const isMigrated = types.some(t => ['POPULAR_INVESTOR', 'SIGNED_IN_USER'].includes(t.toUpperCase()));
|
|
70
39
|
|
|
71
|
-
|
|
72
|
-
// BIGQUERY FIRST: Try BigQuery before Firestore
|
|
73
|
-
// =========================================================================
|
|
74
|
-
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
75
|
-
try {
|
|
76
|
-
const { queryPortfolioData } = require('../../core/utils/bigquery_utils');
|
|
77
|
-
const bigqueryData = await queryPortfolioData(dateString, null, requiredUserTypes, logger);
|
|
78
|
-
|
|
79
|
-
if (bigqueryData && Object.keys(bigqueryData).length > 0) {
|
|
80
|
-
logger.log('INFO', `[DataLoader] ✅ Using BigQuery for portfolio data (${dateString}): ${Object.keys(bigqueryData).length} users`);
|
|
81
|
-
|
|
82
|
-
// Transform BigQuery data into "ref-like" objects for compatibility
|
|
83
|
-
const bigqueryRefs = Object.keys(bigqueryData).map(userId => {
|
|
84
|
-
const dataObj = bigqueryData[userId];
|
|
85
|
-
return {
|
|
86
|
-
ref: null, // No Firestore ref needed
|
|
87
|
-
type: dataObj.user_type || 'UNKNOWN',
|
|
88
|
-
cid: userId,
|
|
89
|
-
collectionType: 'BIGQUERY',
|
|
90
|
-
bigqueryData: dataObj.portfolio_data || {} // Extract the actual portfolio data
|
|
91
|
-
};
|
|
92
|
-
});
|
|
93
|
-
|
|
94
|
-
return bigqueryRefs;
|
|
95
|
-
} else {
|
|
96
|
-
logger.log('INFO', `[DataLoader] ⚠️ BigQuery returned no portfolio data for ${dateString}, falling back to Firestore`);
|
|
97
|
-
}
|
|
98
|
-
} catch (bqError) {
|
|
99
|
-
logger.log('WARN', `[DataLoader] BigQuery portfolio query failed for ${dateString}, falling back to Firestore: ${bqError.message}`);
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
// =========================================================================
|
|
104
|
-
// FIRESTORE FALLBACK: Original logic (backwards compatibility)
|
|
105
|
-
// =========================================================================
|
|
106
|
-
logger.log('INFO', `[DataLoader] 📂 Using Firestore for portfolio data (${dateString}). Filter: ${fetchAll ? 'ALL' : Array.from(types || []).join(',')}`);
|
|
107
|
-
|
|
108
|
-
const allPartRefs = [];
|
|
109
|
-
|
|
110
|
-
// NEW STRUCTURE: Read from date-based collections (per-user documents)
|
|
111
|
-
try {
|
|
112
|
-
if (fetchAll || types.has('SIGNED_IN_USER')) {
|
|
113
|
-
const signedInPortCollectionName = 'SignedInUserPortfolioData';
|
|
114
|
-
const signedInPortDateDoc = db.collection(signedInPortCollectionName).doc(dateString);
|
|
115
|
-
const signedInPortSubcollections = await withRetry(
|
|
116
|
-
() => signedInPortDateDoc.listCollections(),
|
|
117
|
-
`listSignedInPortfolios(${dateString})`
|
|
118
|
-
);
|
|
119
|
-
|
|
120
|
-
signedInPortSubcollections.forEach(subcol => {
|
|
121
|
-
const cid = subcol.id;
|
|
122
|
-
allPartRefs.push({ ref: subcol.doc(cid), type: 'SIGNED_IN_USER', cid: cid, collectionType: 'NEW_STRUCTURE' });
|
|
123
|
-
});
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
if (fetchAll || types.has('POPULAR_INVESTOR')) {
|
|
127
|
-
const piPortCollectionName = 'PopularInvestorPortfolioData';
|
|
128
|
-
const piPortDateDoc = db.collection(piPortCollectionName).doc(dateString);
|
|
129
|
-
const piPortSubcollections = await withRetry(
|
|
130
|
-
() => piPortDateDoc.listCollections(),
|
|
131
|
-
`listPIPortfolios(${dateString})`
|
|
132
|
-
);
|
|
133
|
-
|
|
134
|
-
piPortSubcollections.forEach(subcol => {
|
|
135
|
-
const cid = subcol.id;
|
|
136
|
-
allPartRefs.push({ ref: subcol.doc(cid), type: 'POPULAR_INVESTOR', cid: cid, collectionType: 'NEW_STRUCTURE' });
|
|
137
|
-
});
|
|
138
|
-
}
|
|
139
|
-
} catch (newStructError) {
|
|
140
|
-
logger.log('WARN', `Failed to load from new structure, falling back to legacy: ${newStructError.message}`);
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
// LEGACY STRUCTURE: Read from block-based collections
|
|
144
|
-
const collectionsToQuery = [];
|
|
145
|
-
if ((fetchAll || types.has('NORMAL')) && config.normalUserPortfolioCollection) collectionsToQuery.push({ name: config.normalUserPortfolioCollection, type: 'NORMAL' });
|
|
146
|
-
if ((fetchAll || types.has('SPECULATOR')) && config.speculatorPortfolioCollection) collectionsToQuery.push({ name: config.speculatorPortfolioCollection, type: 'SPECULATOR' });
|
|
147
|
-
if ((fetchAll || types.has('POPULAR_INVESTOR')) && config.piPortfolioCollection) collectionsToQuery.push({ name: config.piPortfolioCollection, type: 'POPULAR_INVESTOR' });
|
|
148
|
-
if ((fetchAll || types.has('SIGNED_IN_USER')) && config.signedInUsersCollection) collectionsToQuery.push({ name: config.signedInUsersCollection, type: 'SIGNED_IN_USER' });
|
|
149
|
-
|
|
150
|
-
for (const { name: collectionName, type: collectionType } of collectionsToQuery) {
|
|
151
|
-
try {
|
|
152
|
-
const blockDocsQuery = db.collection(collectionName);
|
|
153
|
-
const blockDocRefs = await withRetry(() => blockDocsQuery.listDocuments(), `listDocuments(${collectionName})`);
|
|
154
|
-
if (!blockDocRefs.length) continue;
|
|
40
|
+
let results = {};
|
|
155
41
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
const partDocArrays = await Promise.all(partsPromises);
|
|
162
|
-
partDocArrays.forEach(partDocs => {
|
|
163
|
-
allPartRefs.push(...partDocs.map(ref => ({ ref, type: collectionType, collectionType: 'LEGACY' })));
|
|
164
|
-
});
|
|
165
|
-
} catch (legacyError) {
|
|
166
|
-
logger.log('WARN', `Failed to load legacy collection ${collectionName}: ${legacyError.message}`);
|
|
167
|
-
}
|
|
42
|
+
// A. BigQuery (PIs & SignedIn)
|
|
43
|
+
if (isMigrated && process.env.BIGQUERY_ENABLED !== 'false') {
|
|
44
|
+
const bqData = await queryPortfolioData(dateStr, null, types, logger);
|
|
45
|
+
if (bqData) Object.assign(results, bqData);
|
|
168
46
|
}
|
|
169
|
-
|
|
170
|
-
logger.log('INFO', `Found ${allPartRefs.length} total portfolio refs for ${dateString} for types: ${fetchAll ? 'ALL' : Array.from(types).join(',')}`);
|
|
171
|
-
return allPartRefs;
|
|
172
|
-
}
|
|
173
47
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
// =========================================================================
|
|
182
|
-
// CHECK FOR PRICE DATA (BigQuery marker)
|
|
183
|
-
// =========================================================================
|
|
184
|
-
const priceBigQueryMarker = refObjects.find(r => r._bigquery === true);
|
|
185
|
-
if (priceBigQueryMarker) {
|
|
186
|
-
// This is a price data load request - use BigQuery
|
|
187
|
-
try {
|
|
188
|
-
const { queryAssetPrices } = require('../../core/utils/bigquery_utils');
|
|
189
|
-
const priceData = await queryAssetPrices(null, null, null, logger);
|
|
190
|
-
|
|
191
|
-
if (priceData && Object.keys(priceData).length > 0) {
|
|
192
|
-
logger.log('INFO', `[DataLoader] ✅ Loaded ${Object.keys(priceData).length} instruments from BigQuery for price data`);
|
|
193
|
-
return priceData;
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
// If BigQuery returns empty, fallback to Firestore
|
|
197
|
-
logger.log('WARN', `[DataLoader] BigQuery returned no price data, falling back to Firestore`);
|
|
198
|
-
} catch (bqError) {
|
|
199
|
-
logger.log('WARN', `[DataLoader] BigQuery price load failed, falling back to Firestore: ${bqError.message}`);
|
|
200
|
-
// Fall through to Firestore
|
|
48
|
+
// B. Firestore (Retail / Fallback)
|
|
49
|
+
// Note: If we need Retail data, we MUST check Firestore as it wasn't migrated.
|
|
50
|
+
if (isRetail) {
|
|
51
|
+
if (types.includes('NORMAL')) {
|
|
52
|
+
const normalData = await loadRetailFirestore(db, 'NormalUserPortfolios', dateStr);
|
|
53
|
+
Object.assign(results, normalData);
|
|
201
54
|
}
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
// SEPARATE BIGQUERY AND FIRESTORE REFS (for portfolio/history data)
|
|
206
|
-
// =========================================================================
|
|
207
|
-
const bigqueryRefs = refObjects.filter(r => r.collectionType === 'BIGQUERY');
|
|
208
|
-
const firestoreRefs = refObjects.filter(r => r.collectionType !== 'BIGQUERY' && !r._bigquery);
|
|
209
|
-
|
|
210
|
-
const mergedPortfolios = {};
|
|
211
|
-
|
|
212
|
-
// Load from BigQuery cache (data already fetched in getPortfolioPartRefs/getHistoryPartRefs)
|
|
213
|
-
if (bigqueryRefs.length > 0) {
|
|
214
|
-
logger.log('INFO', `[DataLoader] 📊 Loading ${bigqueryRefs.length} records from BigQuery cache`);
|
|
215
|
-
bigqueryRefs.forEach(ref => {
|
|
216
|
-
if (ref.bigqueryData) {
|
|
217
|
-
// Extract the actual data (portfolio_data or history_data)
|
|
218
|
-
// The bigqueryData is the JSON object from BigQuery
|
|
219
|
-
mergedPortfolios[ref.cid] = ref.bigqueryData;
|
|
220
|
-
}
|
|
221
|
-
});
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
// Load from Firestore (existing logic)
|
|
225
|
-
if (firestoreRefs.length === 0) {
|
|
226
|
-
return mergedPortfolios; // All data came from BigQuery
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
logger.log('INFO', `[DataLoader] 📂 Loading ${firestoreRefs.length} records from Firestore`);
|
|
230
|
-
const batchSize = config.partRefBatchSize || 10;
|
|
231
|
-
|
|
232
|
-
for (let i = 0; i < firestoreRefs.length; i += batchSize) {
|
|
233
|
-
const batch = firestoreRefs.slice(i, i + batchSize);
|
|
234
|
-
const refs = batch.map(b => b.ref).filter(ref => ref !== null); // Filter out null refs (BigQuery refs)
|
|
235
|
-
|
|
236
|
-
if (refs.length === 0) continue; // Skip if all refs are BigQuery refs
|
|
237
|
-
|
|
238
|
-
const snapshots = await withRetry(() => db.getAll(...refs), `getAll(batch ${Math.floor(i/batchSize)})`);
|
|
239
|
-
|
|
240
|
-
const deepFetchPromises = [];
|
|
241
|
-
|
|
242
|
-
for (let j = 0; j < snapshots.length; j++) {
|
|
243
|
-
const doc = snapshots[j];
|
|
244
|
-
const meta = batch[j];
|
|
245
|
-
|
|
246
|
-
if (!doc.exists) continue;
|
|
247
|
-
|
|
248
|
-
const rawData = doc.data();
|
|
249
|
-
let chunkData;
|
|
250
|
-
|
|
251
|
-
if (meta.collectionType === 'NEW_STRUCTURE') {
|
|
252
|
-
const cid = meta.cid || doc.id;
|
|
253
|
-
const userData = tryDecompress(rawData);
|
|
254
|
-
chunkData = { [cid]: userData };
|
|
255
|
-
|
|
256
|
-
if (meta.type === 'POPULAR_INVESTOR') {
|
|
257
|
-
chunkData[cid]._userType = 'POPULAR_INVESTOR';
|
|
258
|
-
if (chunkData[cid].deepPositions) chunkData[cid].DeepPositions = chunkData[cid].deepPositions;
|
|
259
|
-
} else if (meta.type === 'SIGNED_IN_USER') {
|
|
260
|
-
chunkData[cid]._userType = 'SIGNED_IN_USER';
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
deepFetchPromises.push(Promise.resolve(chunkData));
|
|
264
|
-
} else {
|
|
265
|
-
chunkData = tryDecompress(rawData);
|
|
266
|
-
|
|
267
|
-
if (meta.type === 'POPULAR_INVESTOR' && config.piDeepPortfolioCollection) {
|
|
268
|
-
const pathSegments = doc.ref.path.split('/');
|
|
269
|
-
const deepCollection = config.piDeepPortfolioCollection;
|
|
270
|
-
const deepPath = `${deepCollection}/${pathSegments[1]}/${pathSegments[2]}/${pathSegments[3]}/${pathSegments[4]}/${pathSegments[5]}`;
|
|
271
|
-
|
|
272
|
-
deepFetchPromises.push(
|
|
273
|
-
db.doc(deepPath).get().then(deepSnap => {
|
|
274
|
-
if (deepSnap.exists) {
|
|
275
|
-
const deepChunk = tryDecompress(deepSnap.data());
|
|
276
|
-
for (const [uid, pData] of Object.entries(chunkData)) {
|
|
277
|
-
if (deepChunk[uid] && deepChunk[uid].positions) pData.DeepPositions = deepChunk[uid].positions;
|
|
278
|
-
}
|
|
279
|
-
}
|
|
280
|
-
for (const pData of Object.values(chunkData)) pData._userType = 'POPULAR_INVESTOR';
|
|
281
|
-
return chunkData;
|
|
282
|
-
}).catch(() => chunkData)
|
|
283
|
-
);
|
|
284
|
-
} else if (meta.type === 'SIGNED_IN_USER') {
|
|
285
|
-
for (const pData of Object.values(chunkData)) pData._userType = 'SIGNED_IN_USER';
|
|
286
|
-
deepFetchPromises.push(Promise.resolve(chunkData));
|
|
287
|
-
} else {
|
|
288
|
-
deepFetchPromises.push(Promise.resolve(chunkData));
|
|
289
|
-
}
|
|
290
|
-
}
|
|
55
|
+
if (types.includes('SPECULATOR')) {
|
|
56
|
+
const specData = await loadRetailFirestore(db, 'SpeculatorPortfolios', dateStr);
|
|
57
|
+
Object.assign(results, specData);
|
|
291
58
|
}
|
|
292
|
-
|
|
293
|
-
const resolvedChunks = await Promise.all(deepFetchPromises);
|
|
294
|
-
resolvedChunks.forEach(chunk => {
|
|
295
|
-
if (chunk && typeof chunk === 'object') Object.assign(mergedPortfolios, chunk);
|
|
296
|
-
});
|
|
297
59
|
}
|
|
298
|
-
return mergedPortfolios;
|
|
299
|
-
}
|
|
300
|
-
|
|
301
|
-
/** Stage 3: Load a full day map by delegating to loadDataByRefs */
|
|
302
|
-
async function loadFullDayMap(config, deps, partRefs, dateString) {
|
|
303
|
-
// 1. GCS FAST PATH
|
|
304
|
-
const cached = await tryLoadFromGCS(config, dateString, 'portfolios', deps.logger);
|
|
305
|
-
if (cached) return cached;
|
|
306
|
-
|
|
307
|
-
// 2. FIRESTORE FALLBACK
|
|
308
|
-
const { logger } = deps;
|
|
309
|
-
if (!partRefs.length) return {};
|
|
310
|
-
logger.log('TRACE', `Loading full day map from ${partRefs.length} references...`);
|
|
311
|
-
const fullMap = await loadDataByRefs(config, deps, partRefs);
|
|
312
|
-
logger.log('TRACE', `Full day map loaded with ${Object.keys(fullMap).length} users`);
|
|
313
|
-
return fullMap;
|
|
314
|
-
}
|
|
315
60
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
// 1. GCS FAST PATH
|
|
319
|
-
const cached = await tryLoadFromGCS(config, dateString, 'portfolios', deps.logger);
|
|
320
|
-
if (cached) return cached;
|
|
61
|
+
return results;
|
|
62
|
+
};
|
|
321
63
|
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
64
|
+
// =============================================================================
|
|
65
|
+
// 2. TRADE HISTORY
|
|
66
|
+
// =============================================================================
|
|
67
|
+
exports.loadDailyHistory = async (config, deps, dateStr, userTypes = []) => {
|
|
68
|
+
const { db, logger } = deps;
|
|
69
|
+
const types = Array.isArray(userTypes) ? userTypes : [userTypes];
|
|
70
|
+
const isRetail = types.some(t => ['NORMAL', 'SPECULATOR'].includes(t.toUpperCase()));
|
|
71
|
+
const isMigrated = types.some(t => ['POPULAR_INVESTOR', 'SIGNED_IN_USER'].includes(t.toUpperCase()));
|
|
328
72
|
|
|
329
|
-
|
|
330
|
-
async function loadDailyInsights(config, deps, dateString) {
|
|
331
|
-
const { db, logger, calculationUtils } = deps;
|
|
332
|
-
const { withRetry } = calculationUtils;
|
|
333
|
-
|
|
334
|
-
// 1. GCS FAST PATH
|
|
335
|
-
const cached = await tryLoadFromGCS(config, dateString, 'insights', logger);
|
|
336
|
-
if (cached) return cached;
|
|
337
|
-
|
|
338
|
-
// 2. BIGQUERY FIRST (if enabled)
|
|
339
|
-
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
340
|
-
try {
|
|
341
|
-
const { queryInstrumentInsights } = require('../../core/utils/bigquery_utils');
|
|
342
|
-
const bigqueryData = await queryInstrumentInsights(dateString, logger);
|
|
343
|
-
|
|
344
|
-
if (bigqueryData && Array.isArray(bigqueryData) && bigqueryData.length > 0) {
|
|
345
|
-
logger.log('INFO', `[DataLoader] ✅ Using BigQuery for instrument insights (${dateString}): ${bigqueryData.length} instruments`);
|
|
346
|
-
// Return in same format as Firestore: { insights: [...] }
|
|
347
|
-
return { insights: bigqueryData };
|
|
348
|
-
}
|
|
349
|
-
} catch (bqError) {
|
|
350
|
-
logger.log('WARN', `[DataLoader] BigQuery insights query failed, falling back to Firestore: ${bqError.message}`);
|
|
351
|
-
// Fall through to Firestore
|
|
352
|
-
}
|
|
353
|
-
}
|
|
73
|
+
let results = {};
|
|
354
74
|
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
try {
|
|
359
|
-
const docRef = db.collection(insightsCollectionName).doc(dateString);
|
|
360
|
-
const docSnap = await withRetry(() => docRef.get(), `getInsights(${dateString})`);
|
|
361
|
-
if (!docSnap.exists) { logger.log('WARN', `Insights not found for ${dateString}`); return null; }
|
|
362
|
-
logger.log('TRACE', `Successfully loaded insights for ${dateString}`);
|
|
363
|
-
return tryDecompress(docSnap.data());
|
|
364
|
-
} catch (error) {
|
|
365
|
-
logger.log('ERROR', `Failed to load daily insights for ${dateString}`, { errorMessage: error.message });
|
|
366
|
-
return null;
|
|
75
|
+
if (isMigrated && process.env.BIGQUERY_ENABLED !== 'false') {
|
|
76
|
+
const bqData = await queryHistoryData(dateStr, null, types, logger);
|
|
77
|
+
if (bqData) Object.assign(results, bqData);
|
|
367
78
|
}
|
|
368
|
-
}
|
|
369
79
|
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
// 2. BIGQUERY FIRST (if enabled)
|
|
380
|
-
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
381
|
-
try {
|
|
382
|
-
const { querySocialData } = require('../../core/utils/bigquery_utils');
|
|
383
|
-
const bigqueryData = await querySocialData(dateString, null, null, logger);
|
|
384
|
-
|
|
385
|
-
if (bigqueryData && Object.keys(bigqueryData).length > 0) {
|
|
386
|
-
logger.log('INFO', `[DataLoader] ✅ Using BigQuery for social data (${dateString}): ${Object.keys(bigqueryData).length} users`);
|
|
387
|
-
|
|
388
|
-
// Transform BigQuery data to expected format: { generic: {}, pi: {}, signedIn: {} }
|
|
389
|
-
// BigQuery returns: { userId: { posts_data: { posts: {...}, postCount: N }, user_type: '...' } }
|
|
390
|
-
const result = { generic: {}, pi: {}, signedIn: {} };
|
|
391
|
-
|
|
392
|
-
for (const [userId, userData] of Object.entries(bigqueryData)) {
|
|
393
|
-
const userType = userData.user_type || 'UNKNOWN';
|
|
394
|
-
|
|
395
|
-
// Handle posts_data - may be object (parsed JSON) or string (needs parsing)
|
|
396
|
-
let postsData = userData.posts_data || {};
|
|
397
|
-
if (typeof postsData === 'string') {
|
|
398
|
-
try {
|
|
399
|
-
postsData = JSON.parse(postsData);
|
|
400
|
-
} catch (e) {
|
|
401
|
-
logger.log('WARN', `[DataLoader] Failed to parse posts_data for user ${userId}: ${e.message}`);
|
|
402
|
-
continue;
|
|
403
|
-
}
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
// Extract posts map from posts_data structure: { posts: {...}, postCount: N }
|
|
407
|
-
const posts = postsData.posts || {};
|
|
408
|
-
|
|
409
|
-
// Partition by user type
|
|
410
|
-
if (userType === 'POPULAR_INVESTOR') {
|
|
411
|
-
result.pi[userId] = posts;
|
|
412
|
-
} else if (userType === 'SIGNED_IN_USER') {
|
|
413
|
-
result.signedIn[userId] = posts;
|
|
414
|
-
} else {
|
|
415
|
-
// Generic/unknown user types go to generic
|
|
416
|
-
result.generic[userId] = posts;
|
|
417
|
-
}
|
|
418
|
-
}
|
|
419
|
-
|
|
420
|
-
logger.log('INFO', `[DataLoader] ✅ Loaded Social Data from BigQuery: ${Object.keys(result.generic).length} Generic, ${Object.keys(result.pi).length} PIs, ${Object.keys(result.signedIn).length} Signed-In`);
|
|
421
|
-
return result;
|
|
422
|
-
}
|
|
423
|
-
} catch (bqError) {
|
|
424
|
-
logger.log('WARN', `[DataLoader] BigQuery social query failed for ${dateString}, falling back to Firestore: ${bqError.message}`);
|
|
425
|
-
// Fall through to Firestore
|
|
80
|
+
if (isRetail) {
|
|
81
|
+
if (types.includes('NORMAL')) {
|
|
82
|
+
const normalData = await loadRetailFirestore(db, 'NormalUserTradeHistory', dateStr);
|
|
83
|
+
Object.assign(results, normalData);
|
|
84
|
+
}
|
|
85
|
+
if (types.includes('SPECULATOR')) {
|
|
86
|
+
const specData = await loadRetailFirestore(db, 'SpeculatorTradeHistory', dateStr);
|
|
87
|
+
Object.assign(results, specData);
|
|
426
88
|
}
|
|
427
89
|
}
|
|
90
|
+
return results;
|
|
91
|
+
};
|
|
428
92
|
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
93
|
+
// =============================================================================
|
|
94
|
+
// 3. SOCIAL
|
|
95
|
+
// =============================================================================
|
|
96
|
+
exports.loadDailySocialPostInsights = async (config, deps, dateStr, userTypes = []) => {
|
|
97
|
+
const { db, logger } = deps;
|
|
98
|
+
const types = Array.isArray(userTypes) ? userTypes : (userTypes ? [userTypes] : []);
|
|
433
99
|
|
|
434
|
-
//
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
const signedInSocialDateDoc = db.collection(signedInSocialCollectionName).doc(dateString);
|
|
438
|
-
const signedInSocialSubcollections = await withRetry(() => signedInSocialDateDoc.listCollections(), `listSignedInSocial(${dateString})`);
|
|
439
|
-
|
|
440
|
-
for (const subcol of signedInSocialSubcollections) {
|
|
441
|
-
const cid = subcol.id;
|
|
442
|
-
const cidDoc = await subcol.doc(cid).get();
|
|
443
|
-
if (cidDoc.exists) {
|
|
444
|
-
const cidData = tryDecompress(cidDoc.data());
|
|
445
|
-
if (cidData.posts && typeof cidData.posts === 'object') {
|
|
446
|
-
if (!result.signedIn[cid]) result.signedIn[cid] = {};
|
|
447
|
-
Object.assign(result.signedIn[cid], cidData.posts);
|
|
448
|
-
}
|
|
449
|
-
}
|
|
450
|
-
}
|
|
451
|
-
|
|
452
|
-
const piSocialCollectionName = 'PopularInvestorSocialPostData';
|
|
453
|
-
const piSocialDateDoc = db.collection(piSocialCollectionName).doc(dateString);
|
|
454
|
-
const piSocialSubcollections = await withRetry(() => piSocialDateDoc.listCollections(), `listPISocial(${dateString})`);
|
|
455
|
-
|
|
456
|
-
for (const subcol of piSocialSubcollections) {
|
|
457
|
-
const cid = subcol.id;
|
|
458
|
-
const cidDoc = await subcol.doc(cid).get();
|
|
459
|
-
if (cidDoc.exists) {
|
|
460
|
-
const cidData = tryDecompress(cidDoc.data());
|
|
461
|
-
if (cidData.posts && typeof cidData.posts === 'object') {
|
|
462
|
-
if (!result.pi[cid]) result.pi[cid] = {};
|
|
463
|
-
Object.assign(result.pi[cid], cidData.posts);
|
|
464
|
-
}
|
|
465
|
-
}
|
|
466
|
-
}
|
|
467
|
-
|
|
468
|
-
const instrumentSocialCollectionName = 'InstrumentFeedSocialPostData';
|
|
469
|
-
const instrumentSocialDateDoc = db.collection(instrumentSocialCollectionName).doc(dateString);
|
|
470
|
-
const instrumentSocialPostsCol = instrumentSocialDateDoc.collection('posts');
|
|
471
|
-
const instrumentSocialSnapshot = await withRetry(() => instrumentSocialPostsCol.limit(1000).get(), `getInstrumentSocial(${dateString})`);
|
|
472
|
-
|
|
473
|
-
instrumentSocialSnapshot.forEach(doc => {
|
|
474
|
-
const data = tryDecompress(doc.data());
|
|
475
|
-
result.generic[doc.id] = data;
|
|
476
|
-
});
|
|
477
|
-
|
|
478
|
-
logger.log('INFO', `Loaded Social Data (NEW): ${Object.keys(result.generic).length} Generic, ${Object.keys(result.pi).length} PIs.`);
|
|
479
|
-
} catch (newStructError) {
|
|
480
|
-
logger.log('WARN', `Failed to load from new structure: ${newStructError.message}`);
|
|
100
|
+
// A. BigQuery (User-Specific Social)
|
|
101
|
+
if (types.length > 0 && process.env.BIGQUERY_ENABLED !== 'false') {
|
|
102
|
+
return querySocialData(dateStr, null, types, logger);
|
|
481
103
|
}
|
|
482
104
|
|
|
483
|
-
//
|
|
484
|
-
|
|
485
|
-
const
|
|
486
|
-
const startDate = new Date(dateString + 'T00:00:00Z');
|
|
487
|
-
const endDate = new Date(dateString + 'T23:59:59Z');
|
|
488
|
-
|
|
105
|
+
// B. Firestore (Generic Feed - Legacy)
|
|
106
|
+
// If no user types specified, assume generic feed fetch
|
|
107
|
+
const collection = config.socialInsightsCollection || 'daily_social_insights';
|
|
489
108
|
try {
|
|
490
|
-
const
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
const parts = path.split('/');
|
|
499
|
-
const colIndex = parts.indexOf(PI_COL_NAME);
|
|
500
|
-
if (colIndex !== -1 && parts[colIndex + 1]) {
|
|
501
|
-
const userId = parts[colIndex + 1];
|
|
502
|
-
if (!result.pi[userId]) result.pi[userId] = {};
|
|
503
|
-
result.pi[userId][doc.id] = data;
|
|
504
|
-
}
|
|
505
|
-
} else if (path.includes(SIGNED_IN_COL_NAME)) {
|
|
506
|
-
const parts = path.split('/');
|
|
507
|
-
const colIndex = parts.indexOf(SIGNED_IN_COL_NAME);
|
|
508
|
-
if (colIndex !== -1 && parts[colIndex + 1]) {
|
|
509
|
-
const userId = parts[colIndex + 1];
|
|
510
|
-
if (!result.signedIn[userId]) result.signedIn[userId] = {};
|
|
511
|
-
result.signedIn[userId][doc.id] = data;
|
|
512
|
-
}
|
|
513
|
-
} else {
|
|
514
|
-
result.generic[doc.id] = data;
|
|
515
|
-
}
|
|
516
|
-
});
|
|
517
|
-
}
|
|
518
|
-
} catch (error) {
|
|
519
|
-
logger.log('ERROR', `Failed to load social posts (legacy): ${error.message}`);
|
|
109
|
+
const snap = await db.collection(collection).doc(dateStr).collection('posts').get();
|
|
110
|
+
if (snap.empty) return {};
|
|
111
|
+
const data = {};
|
|
112
|
+
snap.forEach(doc => data[doc.id] = doc.data());
|
|
113
|
+
return data;
|
|
114
|
+
} catch (e) {
|
|
115
|
+
logger.log('WARN', `[DataLoader] Failed to load generic social for ${dateStr}: ${e.message}`);
|
|
116
|
+
return {};
|
|
520
117
|
}
|
|
118
|
+
};
|
|
521
119
|
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
const { withRetry } = calculationUtils;
|
|
529
|
-
|
|
530
|
-
// Normalize required types
|
|
531
|
-
const types = requiredUserTypes ? new Set(requiredUserTypes.map(t => t.toUpperCase())) : null;
|
|
532
|
-
const fetchAll = !types || types.size === 0 || types.has('ALL');
|
|
533
|
-
|
|
534
|
-
// =========================================================================
|
|
535
|
-
// BIGQUERY FIRST: Try BigQuery before Firestore
|
|
536
|
-
// =========================================================================
|
|
120
|
+
// =============================================================================
|
|
121
|
+
// 4. MARKET DATA (Prices)
|
|
122
|
+
// =============================================================================
|
|
123
|
+
exports.getPriceShardRefs = async (config, deps) => {
|
|
124
|
+
// Legacy Shard Helper - In BQ world, we don't use shards but CachedDataLoader expects this structure.
|
|
125
|
+
// We return a "virtual" shard array that signals CachedDataLoader to load from BQ.
|
|
537
126
|
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
bigqueryData: dataObj.history_data || {} // Extract the actual history data
|
|
554
|
-
};
|
|
555
|
-
});
|
|
556
|
-
|
|
557
|
-
return bigqueryRefs;
|
|
558
|
-
} else {
|
|
559
|
-
logger.log('INFO', `[DataLoader] ⚠️ BigQuery returned no history data for ${dateString}, falling back to Firestore`);
|
|
560
|
-
}
|
|
561
|
-
} catch (bqError) {
|
|
562
|
-
logger.log('WARN', `[DataLoader] BigQuery history query failed for ${dateString}, falling back to Firestore: ${bqError.message}`);
|
|
563
|
-
}
|
|
564
|
-
}
|
|
565
|
-
|
|
566
|
-
// =========================================================================
|
|
567
|
-
// FIRESTORE FALLBACK: Original logic (backwards compatibility)
|
|
568
|
-
// =========================================================================
|
|
569
|
-
logger.log('INFO', `[DataLoader] 📂 Using Firestore for trade history data (${dateString}). Filter: ${fetchAll ? 'ALL' : Array.from(types || []).join(',')}`);
|
|
570
|
-
|
|
571
|
-
const allPartRefs = [];
|
|
572
|
-
|
|
573
|
-
// NEW STRUCTURE
|
|
574
|
-
try {
|
|
575
|
-
if (fetchAll || types.has('SIGNED_IN_USER')) {
|
|
576
|
-
const signedInHistCollectionName = 'SignedInUserTradeHistoryData';
|
|
577
|
-
const signedInHistDateDoc = db.collection(signedInHistCollectionName).doc(dateString);
|
|
578
|
-
const signedInHistSubcollections = await withRetry(
|
|
579
|
-
() => signedInHistDateDoc.listCollections(),
|
|
580
|
-
`listSignedInHistory(${dateString})`
|
|
581
|
-
);
|
|
582
|
-
signedInHistSubcollections.forEach(subcol => {
|
|
583
|
-
allPartRefs.push({ ref: subcol.doc(subcol.id), type: 'SIGNED_IN_USER', cid: subcol.id, collectionType: 'NEW_STRUCTURE' });
|
|
584
|
-
});
|
|
585
|
-
}
|
|
586
|
-
|
|
587
|
-
if (fetchAll || types.has('POPULAR_INVESTOR')) {
|
|
588
|
-
const piHistCollectionName = 'PopularInvestorTradeHistoryData';
|
|
589
|
-
const piHistDateDoc = db.collection(piHistCollectionName).doc(dateString);
|
|
590
|
-
const piHistSubcollections = await withRetry(
|
|
591
|
-
() => piHistDateDoc.listCollections(),
|
|
592
|
-
`listPIHistory(${dateString})`
|
|
593
|
-
);
|
|
594
|
-
piHistSubcollections.forEach(subcol => {
|
|
595
|
-
allPartRefs.push({ ref: subcol.doc(subcol.id), type: 'POPULAR_INVESTOR', cid: subcol.id, collectionType: 'NEW_STRUCTURE' });
|
|
596
|
-
});
|
|
597
|
-
}
|
|
598
|
-
} catch (newStructError) {
|
|
599
|
-
logger.log('WARN', `Failed to load from new structure: ${newStructError.message}`);
|
|
127
|
+
return [ { _bigquery: true } ];
|
|
128
|
+
}
|
|
129
|
+
// Fallback to Firestore Logic - return array of doc refs
|
|
130
|
+
const { db } = deps;
|
|
131
|
+
const collection = config.assetPricesCollection || 'asset_prices';
|
|
132
|
+
const snapshot = await db.collection(collection).listDocuments();
|
|
133
|
+
const refs = [];
|
|
134
|
+
snapshot.forEach(doc => refs.push(doc));
|
|
135
|
+
return refs;
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
exports.getRelevantShardRefs = async (config, deps, targetIds) => {
|
|
139
|
+
// In BQ mode, we don't shard by instrument; return single virtual shard
|
|
140
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
141
|
+
return [ { _bigquery: true, targetIds: targetIds || [] } ];
|
|
600
142
|
}
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
if ((fetchAll || types.has('NORMAL')) && config.normalUserHistoryCollection) collectionsToQuery.push(config.normalUserHistoryCollection);
|
|
605
|
-
if ((fetchAll || types.has('SPECULATOR')) && config.speculatorHistoryCollection) collectionsToQuery.push(config.speculatorHistoryCollection);
|
|
606
|
-
if ((fetchAll || types.has('POPULAR_INVESTOR')) && config.piHistoryCollection) collectionsToQuery.push(config.piHistoryCollection);
|
|
607
|
-
if ((fetchAll || types.has('SIGNED_IN_USER')) && config.signedInHistoryCollection) collectionsToQuery.push(config.signedInHistoryCollection);
|
|
143
|
+
// Firestore behavior - return array of doc refs (same as getPriceShardRefs for now)
|
|
144
|
+
return exports.getPriceShardRefs(config, deps);
|
|
145
|
+
};
|
|
608
146
|
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
const blockDocRefs = await withRetry(() => blockDocsQuery.listDocuments(), `listDocuments(${collectionName})`);
|
|
613
|
-
if (!blockDocRefs.length) continue;
|
|
614
|
-
|
|
615
|
-
const partsPromises = blockDocRefs.map(blockDocRef => {
|
|
616
|
-
const partsCollectionRef = blockDocRef.collection(config.snapshotsSubcollection || 'snapshots').doc(dateString).collection(config.partsSubcollection || 'parts');
|
|
617
|
-
return withRetry(() => partsCollectionRef.listDocuments(), `listParts(${partsCollectionRef.path})`);
|
|
618
|
-
});
|
|
619
|
-
|
|
620
|
-
const partDocArrays = await Promise.all(partsPromises);
|
|
621
|
-
partDocArrays.forEach(partDocs => {
|
|
622
|
-
allPartRefs.push(...partDocs.map(ref => ({ ref, type: 'PART', collectionType: 'LEGACY' })));
|
|
623
|
-
});
|
|
624
|
-
} catch (legacyError) {
|
|
625
|
-
logger.log('WARN', `Failed to load legacy history collection ${collectionName}: ${legacyError.message}`);
|
|
626
|
-
}
|
|
627
|
-
}
|
|
628
|
-
|
|
629
|
-
logger.log('INFO', `Found ${allPartRefs.length} total history refs for ${dateString}`);
|
|
630
|
-
return allPartRefs;
|
|
631
|
-
}
|
|
147
|
+
// =============================================================================
|
|
148
|
+
// 5. ROOT DATA TYPES (Simple Mappings)
|
|
149
|
+
// =============================================================================
|
|
632
150
|
|
|
633
|
-
|
|
634
|
-
async function* streamPortfolioData(config, deps, dateString, providedRefs = null, requiredUserTypes = null) {
|
|
151
|
+
exports.loadDailyInsights = async (config, deps, dateStr) => {
|
|
635
152
|
const { logger } = deps;
|
|
636
153
|
|
|
637
|
-
|
|
638
|
-
// BIGQUERY FIRST: Try BigQuery before GCS/Firestore
|
|
639
|
-
// =========================================================================
|
|
640
|
-
if (!providedRefs && process.env.BIGQUERY_ENABLED !== 'false') {
|
|
154
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
641
155
|
try {
|
|
642
|
-
const
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
// Transform to expected format and yield
|
|
649
|
-
const transformedData = {};
|
|
650
|
-
Object.keys(bigqueryData).forEach(userId => {
|
|
651
|
-
transformedData[userId] = bigqueryData[userId].portfolio_data || {};
|
|
652
|
-
});
|
|
653
|
-
|
|
654
|
-
yield transformedData;
|
|
655
|
-
return; // Exit early, data loaded from BigQuery
|
|
656
|
-
}
|
|
657
|
-
} catch (bqError) {
|
|
658
|
-
logger.log('WARN', `[DataLoader] BigQuery portfolio stream failed for ${dateString}, falling back: ${bqError.message}`);
|
|
659
|
-
}
|
|
660
|
-
}
|
|
661
|
-
|
|
662
|
-
// 1. GCS FAST PATH (Snapshot) - Only if full run (no providedRefs)
|
|
663
|
-
if (!providedRefs) {
|
|
664
|
-
try {
|
|
665
|
-
const bucketName = config.gcsBucketName || 'bulltrackers';
|
|
666
|
-
const bucket = storage.bucket(bucketName);
|
|
667
|
-
const file = bucket.file(`${dateString}/snapshots/portfolios.json.gz`);
|
|
668
|
-
const [exists] = await file.exists();
|
|
669
|
-
|
|
670
|
-
if (exists) {
|
|
671
|
-
logger.log('INFO', `[DataLoader] ⚡️ STREAMING: Hydrating Portfolios from GCS Snapshot`);
|
|
672
|
-
const [content] = await file.download();
|
|
673
|
-
|
|
674
|
-
// FIX: Handle Double Decompression
|
|
675
|
-
let fullData;
|
|
676
|
-
try {
|
|
677
|
-
fullData = JSON.parse(zlib.gunzipSync(content).toString());
|
|
678
|
-
} catch (zipError) {
|
|
679
|
-
if (zipError.message && zipError.message.includes('incorrect header check')) {
|
|
680
|
-
fullData = JSON.parse(content.toString());
|
|
681
|
-
} else {
|
|
682
|
-
throw zipError;
|
|
683
|
-
}
|
|
684
|
-
}
|
|
685
|
-
|
|
686
|
-
yield fullData; // Yield all in one chunk as it fits in memory
|
|
687
|
-
return;
|
|
156
|
+
const rows = await queryInstrumentInsights(dateStr, logger);
|
|
157
|
+
if (Array.isArray(rows) && rows.length > 0) {
|
|
158
|
+
logger.log('INFO', `[DataLoader] ✅ Using BigQuery for instrument insights (${dateStr}): ${rows.length} instruments`);
|
|
159
|
+
// Wrap in Firestore-shaped document format for InsightsExtractor compatibility
|
|
160
|
+
return { insights: rows };
|
|
688
161
|
}
|
|
689
162
|
} catch (e) {
|
|
690
|
-
logger.log('WARN', `[DataLoader]
|
|
163
|
+
logger.log('WARN', `[DataLoader] BigQuery insights query failed for ${dateStr}: ${e.message}`);
|
|
691
164
|
}
|
|
692
165
|
}
|
|
693
166
|
|
|
694
|
-
//
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
const batchSize = config.partRefBatchSize || 10;
|
|
699
|
-
logger.log('INFO', `[streamPortfolioData] Streaming ${refs.length} portfolio parts in chunks of ${batchSize}...`);
|
|
700
|
-
|
|
701
|
-
for (let i = 0; i < refs.length; i += batchSize) {
|
|
702
|
-
const batchRefs = refs.slice(i, i + batchSize);
|
|
703
|
-
const data = await loadDataByRefs(config, deps, batchRefs);
|
|
704
|
-
yield data;
|
|
705
|
-
}
|
|
706
|
-
logger.log('INFO', `[streamPortfolioData] Finished streaming for ${dateString}.`);
|
|
707
|
-
}
|
|
167
|
+
// No Firestore fallback by design – return empty but correctly shaped
|
|
168
|
+
return { insights: [] };
|
|
169
|
+
};
|
|
708
170
|
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
171
|
+
exports.loadPopularInvestorRankings = async (config, deps, dateStr) => {
|
|
172
|
+
const data = await queryPIRankings(dateStr, deps.logger);
|
|
173
|
+
return data ? data.Items : [];
|
|
174
|
+
};
|
|
712
175
|
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
if (!providedRefs && process.env.BIGQUERY_ENABLED !== 'false') {
|
|
717
|
-
try {
|
|
718
|
-
const { queryHistoryData } = require('../../core/utils/bigquery_utils');
|
|
719
|
-
const bigqueryData = await queryHistoryData(dateString, null, requiredUserTypes, logger);
|
|
720
|
-
|
|
721
|
-
if (bigqueryData && Object.keys(bigqueryData).length > 0) {
|
|
722
|
-
logger.log('INFO', `[DataLoader] ✅ Streaming history data from BigQuery (${dateString}): ${Object.keys(bigqueryData).length} users`);
|
|
723
|
-
|
|
724
|
-
// Transform to expected format and yield
|
|
725
|
-
const transformedData = {};
|
|
726
|
-
Object.keys(bigqueryData).forEach(userId => {
|
|
727
|
-
transformedData[userId] = bigqueryData[userId].history_data || {};
|
|
728
|
-
});
|
|
729
|
-
|
|
730
|
-
yield transformedData;
|
|
731
|
-
return; // Exit early, data loaded from BigQuery
|
|
732
|
-
}
|
|
733
|
-
} catch (bqError) {
|
|
734
|
-
logger.log('WARN', `[DataLoader] BigQuery history stream failed for ${dateString}, falling back: ${bqError.message}`);
|
|
735
|
-
}
|
|
736
|
-
}
|
|
176
|
+
exports.loadPIRatings = async (config, deps, dateStr) => {
|
|
177
|
+
return queryPIRatings(dateStr, deps.logger);
|
|
178
|
+
};
|
|
737
179
|
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
const bucketName = config.gcsBucketName || 'bulltrackers';
|
|
742
|
-
const bucket = storage.bucket(bucketName);
|
|
743
|
-
const file = bucket.file(`${dateString}/snapshots/history.jsonl.gz`);
|
|
744
|
-
const [exists] = await file.exists();
|
|
180
|
+
exports.loadPIPageViews = async (config, deps, dateStr) => {
|
|
181
|
+
return queryPIPageViews(dateStr, deps.logger);
|
|
182
|
+
};
|
|
745
183
|
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
const fileStream = file.createReadStream();
|
|
750
|
-
const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity });
|
|
184
|
+
exports.loadWatchlistMembership = async (config, deps, dateStr) => {
|
|
185
|
+
return queryWatchlistMembership(dateStr, deps.logger);
|
|
186
|
+
};
|
|
751
187
|
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
188
|
+
exports.loadPIAlertHistory = async (config, deps, dateStr) => {
|
|
189
|
+
return queryPIAlertHistory(dateStr, deps.logger);
|
|
190
|
+
};
|
|
755
191
|
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
const userEntry = JSON.parse(line);
|
|
760
|
-
Object.assign(currentBatch, userEntry);
|
|
761
|
-
count++;
|
|
762
|
-
if (count >= BATCH_SIZE) {
|
|
763
|
-
yield currentBatch;
|
|
764
|
-
currentBatch = {};
|
|
765
|
-
count = 0;
|
|
766
|
-
}
|
|
767
|
-
} catch (parseErr) {
|
|
768
|
-
logger.log('ERROR', `[DataLoader] JSONL Parse Error: ${parseErr.message}`);
|
|
769
|
-
}
|
|
770
|
-
}
|
|
771
|
-
if (Object.keys(currentBatch).length > 0) yield currentBatch;
|
|
772
|
-
logger.log('INFO', `[DataLoader] Finished streaming History from GCS.`);
|
|
773
|
-
return;
|
|
774
|
-
}
|
|
775
|
-
} catch (e) {
|
|
776
|
-
logger.log('WARN', `[DataLoader] GCS History Stream failed: ${e.message}. Falling back.`);
|
|
777
|
-
}
|
|
778
|
-
}
|
|
192
|
+
exports.loadPopularInvestorMasterList = async (config, deps) => {
|
|
193
|
+
return queryPIMasterList(deps.logger);
|
|
194
|
+
};
|
|
779
195
|
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
const data = await loadDataByRefs(config, deps, batchRefs);
|
|
790
|
-
yield data;
|
|
791
|
-
}
|
|
792
|
-
logger.log('INFO', `[streamHistoryData] Finished streaming for ${dateString}.`);
|
|
793
|
-
}
|
|
196
|
+
exports.loadPIWatchlistData = async (config, deps, piCid) => {
|
|
197
|
+
// Watchlist data is time-series in BQ. For "Current State" (ID based),
|
|
198
|
+
// we query the most recent date available for this PI.
|
|
199
|
+
// This is a specialized query not in standard utils, so we implement it here or assume caller passes date.
|
|
200
|
+
// However, CachedDataLoader expects (cid) -> Data.
|
|
201
|
+
// We'll return null here as WatchlistMembership (by date) is the preferred method now.
|
|
202
|
+
deps.logger.log('WARN', '[DataLoader] loadPIWatchlistData (by CID) is deprecated in favor of loadWatchlistMembership (by Date).');
|
|
203
|
+
return null;
|
|
204
|
+
};
|
|
794
205
|
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
// Try BigQuery first if enabled
|
|
802
|
-
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
803
|
-
// Return a special marker object to indicate BigQuery mode
|
|
804
|
-
// The loader will detect this and load from BigQuery instead
|
|
805
|
-
return [{ _bigquery: true }];
|
|
806
|
-
}
|
|
807
|
-
|
|
808
|
-
// Fallback to Firestore
|
|
809
|
-
const { db, calculationUtils } = deps;
|
|
810
|
-
const { withRetry } = calculationUtils;
|
|
811
|
-
const collection = config.priceCollection || 'asset_prices';
|
|
206
|
+
// =============================================================================
|
|
207
|
+
// 6. EXCEPTIONS (Firestore Only)
|
|
208
|
+
// =============================================================================
|
|
209
|
+
|
|
210
|
+
exports.loadVerificationProfiles = async (config, deps, dateStr) => {
|
|
211
|
+
const { db, logger } = deps;
|
|
812
212
|
try {
|
|
813
|
-
|
|
814
|
-
const
|
|
815
|
-
|
|
213
|
+
// Verifications are a single collection, not date-partitioned snapshots
|
|
214
|
+
const snap = await db.collection('user_verifications').get();
|
|
215
|
+
const verifications = {};
|
|
216
|
+
snap.forEach(doc => verifications[doc.id] = doc.data());
|
|
217
|
+
return verifications;
|
|
816
218
|
} catch (e) {
|
|
817
|
-
logger.log('ERROR', `Failed to
|
|
818
|
-
return
|
|
219
|
+
logger.log('ERROR', `[DataLoader] Failed to load verifications: ${e.message}`);
|
|
220
|
+
return {};
|
|
819
221
|
}
|
|
820
|
-
}
|
|
222
|
+
};
|
|
821
223
|
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
}
|
|
224
|
+
// =============================================================================
|
|
225
|
+
// HELPERS
|
|
226
|
+
// =============================================================================
|
|
826
227
|
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
return getPriceShardRefs(config, deps);
|
|
831
|
-
}
|
|
832
|
-
|
|
833
|
-
/** Stage 11: Load Popular Investor Rankings */
|
|
834
|
-
async function loadPopularInvestorRankings(config, deps, dateString) {
|
|
835
|
-
const { db, logger, calculationUtils } = deps;
|
|
836
|
-
const { withRetry } = calculationUtils;
|
|
228
|
+
// =============================================================================
|
|
229
|
+
// 7. PRICE DATA BY REFS (For PriceBatchExecutor)
|
|
230
|
+
// =============================================================================
|
|
837
231
|
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
const { queryPIRankings } = require('../../core/utils/bigquery_utils');
|
|
846
|
-
const bigqueryData = await queryPIRankings(dateString, logger);
|
|
847
|
-
|
|
848
|
-
if (bigqueryData && bigqueryData.Items && bigqueryData.Items.length > 0) {
|
|
849
|
-
logger.log('INFO', `[DataLoader] ✅ Using BigQuery for PI rankings (${dateString}): ${bigqueryData.Items.length} items`);
|
|
850
|
-
return bigqueryData.Items;
|
|
851
|
-
}
|
|
852
|
-
} catch (bqError) {
|
|
853
|
-
logger.log('WARN', `[DataLoader] BigQuery rankings query failed, falling back to Firestore: ${bqError.message}`);
|
|
854
|
-
// Fall through to Firestore
|
|
855
|
-
}
|
|
856
|
-
}
|
|
857
|
-
|
|
858
|
-
// 3. FIRESTORE FALLBACK
|
|
859
|
-
const collectionName = config.popularInvestorRankingsCollection || 'popular_investor_rankings';
|
|
860
|
-
logger.log('INFO', `Loading Popular Investor Rankings for ${dateString} from Firestore`);
|
|
861
|
-
try {
|
|
862
|
-
const docRef = db.collection(collectionName).doc(dateString);
|
|
863
|
-
const docSnap = await withRetry(() => docRef.get(), `getRankings(${dateString})`);
|
|
864
|
-
if (!docSnap.exists) { logger.log('WARN', `Rankings not found for ${dateString}`); return null; }
|
|
865
|
-
const data = tryDecompress(docSnap.data());
|
|
866
|
-
return data.Items || [];
|
|
867
|
-
} catch (error) {
|
|
868
|
-
logger.log('ERROR', `Failed to load Rankings for ${dateString}: ${error.message}`);
|
|
869
|
-
return null;
|
|
870
|
-
}
|
|
871
|
-
}
|
|
872
|
-
|
|
873
|
-
/** Stage 12: Load User Verification Profiles
|
|
874
|
-
* [UPDATED] Scans global verification data via CollectionGroup since it's now stored per-user.
|
|
875
|
-
* [UPDATED] Added optional dateString param to support GCS snapshot checks.
|
|
232
|
+
/**
|
|
233
|
+
* Load price data from an array of shard references (virtual or Firestore doc refs).
|
|
234
|
+
* Used by PriceBatchExecutor for batch price computations.
|
|
235
|
+
* @param {object} config - Configuration object
|
|
236
|
+
* @param {object} deps - Dependencies (db, logger, etc.)
|
|
237
|
+
* @param {Array} shardRefs - Array of shard references (virtual BigQuery objects or Firestore doc refs)
|
|
238
|
+
* @returns {Promise<object>} Combined price data object keyed by instrument ID
|
|
876
239
|
*/
|
|
877
|
-
async
|
|
878
|
-
const {
|
|
879
|
-
const { withRetry } = calculationUtils;
|
|
880
|
-
|
|
881
|
-
// 1. GCS FAST PATH (If date provided)
|
|
882
|
-
if (dateString) {
|
|
883
|
-
const cached = await tryLoadFromGCS(config, dateString, 'verification', logger);
|
|
884
|
-
if (cached) return cached;
|
|
885
|
-
}
|
|
240
|
+
exports.loadDataByRefs = async (config, deps, shardRefs) => {
|
|
241
|
+
const { logger } = deps;
|
|
886
242
|
|
|
887
|
-
|
|
888
|
-
logger.log('INFO', `Loading Verification Profiles (CollectionGroup: verification/data)`);
|
|
889
|
-
try {
|
|
890
|
-
const snapshot = await withRetry(() => db.collectionGroup('verification').get(), 'getVerificationsGroup');
|
|
891
|
-
if (snapshot.empty) return {};
|
|
892
|
-
|
|
893
|
-
const profiles = {};
|
|
894
|
-
let count = 0;
|
|
895
|
-
snapshot.forEach(doc => {
|
|
896
|
-
if (doc.id !== 'data') return;
|
|
897
|
-
const raw = tryDecompress(doc.data());
|
|
898
|
-
if (raw.etoroCID) {
|
|
899
|
-
profiles[raw.etoroCID] = {
|
|
900
|
-
cid: raw.etoroCID,
|
|
901
|
-
username: raw.etoroUsername,
|
|
902
|
-
aboutMe: "",
|
|
903
|
-
aboutMeShort: "",
|
|
904
|
-
isVerified: !!(raw.verifiedAt),
|
|
905
|
-
restrictions: []
|
|
906
|
-
};
|
|
907
|
-
count++;
|
|
908
|
-
}
|
|
909
|
-
});
|
|
910
|
-
logger.log('INFO', `Loaded ${count} verification profiles.`);
|
|
911
|
-
return profiles;
|
|
912
|
-
} catch (error) {
|
|
913
|
-
logger.log('ERROR', `Failed to load Verification Profiles: ${error.message}`);
|
|
243
|
+
if (!Array.isArray(shardRefs) || shardRefs.length === 0) {
|
|
914
244
|
return {};
|
|
915
245
|
}
|
|
916
|
-
}
|
|
917
|
-
|
|
918
|
-
/** Stage 13: Load PI Ratings Data */
|
|
919
|
-
async function loadPIRatings(config, deps, dateString) {
|
|
920
|
-
const { db, logger, calculationUtils } = deps;
|
|
921
|
-
const { withRetry } = calculationUtils;
|
|
922
|
-
|
|
923
|
-
// 1. GCS FAST PATH
|
|
924
|
-
const cached = await tryLoadFromGCS(config, dateString, 'ratings', logger);
|
|
925
|
-
if (cached) return cached;
|
|
926
|
-
|
|
927
|
-
// 2. BIGQUERY FIRST (if enabled)
|
|
928
|
-
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
929
|
-
try {
|
|
930
|
-
const { queryPIRatings } = require('../../core/utils/bigquery_utils');
|
|
931
|
-
const bigqueryData = await queryPIRatings(dateString, logger);
|
|
932
|
-
if (bigqueryData) {
|
|
933
|
-
logger.log('INFO', `[DataLoader] ✅ Loaded PI Ratings from BigQuery for ${dateString}`);
|
|
934
|
-
return bigqueryData;
|
|
935
|
-
}
|
|
936
|
-
} catch (error) {
|
|
937
|
-
logger.log('WARN', `[DataLoader] BigQuery PI Ratings query failed, falling back to Firestore: ${error.message}`);
|
|
938
|
-
}
|
|
939
|
-
}
|
|
940
246
|
|
|
941
|
-
//
|
|
942
|
-
const
|
|
943
|
-
logger.log('INFO', `Loading PI Ratings from Firestore for ${dateString}`);
|
|
944
|
-
try {
|
|
945
|
-
const docRef = db.collection(collectionName).doc(dateString);
|
|
946
|
-
const docSnap = await withRetry(() => docRef.get(), `getPIRatings(${dateString})`);
|
|
947
|
-
if (!docSnap.exists) {
|
|
948
|
-
logger.log('WARN', `PI Ratings not found for ${dateString}`);
|
|
949
|
-
return {};
|
|
950
|
-
}
|
|
951
|
-
const data = tryDecompress(docSnap.data());
|
|
952
|
-
const { date, lastUpdated, ...piRatings } = data;
|
|
953
|
-
return piRatings;
|
|
954
|
-
} catch (error) {
|
|
955
|
-
logger.log('ERROR', `Failed to load PI Ratings: ${error.message}`);
|
|
956
|
-
return {};
|
|
957
|
-
}
|
|
958
|
-
}
|
|
959
|
-
|
|
960
|
-
/** Stage 14: Load PI Page Views Data */
|
|
961
|
-
async function loadPIPageViews(config, deps, dateString) {
|
|
962
|
-
const { db, logger, calculationUtils } = deps;
|
|
963
|
-
const { withRetry } = calculationUtils;
|
|
964
|
-
|
|
965
|
-
// 1. GCS FAST PATH
|
|
966
|
-
const cached = await tryLoadFromGCS(config, dateString, 'page_views', logger);
|
|
967
|
-
if (cached) return cached;
|
|
968
|
-
|
|
969
|
-
// 2. BIGQUERY FIRST (if enabled)
|
|
970
|
-
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
971
|
-
try {
|
|
972
|
-
const { queryPIPageViews } = require('../../core/utils/bigquery_utils');
|
|
973
|
-
const bigqueryData = await queryPIPageViews(dateString, logger);
|
|
974
|
-
if (bigqueryData) {
|
|
975
|
-
logger.log('INFO', `[DataLoader] ✅ Loaded PI Page Views from BigQuery for ${dateString}`);
|
|
976
|
-
return bigqueryData;
|
|
977
|
-
}
|
|
978
|
-
} catch (error) {
|
|
979
|
-
logger.log('WARN', `[DataLoader] BigQuery PI Page Views query failed, falling back to Firestore: ${error.message}`);
|
|
980
|
-
}
|
|
981
|
-
}
|
|
982
|
-
|
|
983
|
-
// 3. FIRESTORE FALLBACK
|
|
984
|
-
const collectionName = config.piPageViewsCollection || 'PIPageViewsData';
|
|
985
|
-
logger.log('INFO', `Loading PI Page Views from Firestore for ${dateString}`);
|
|
986
|
-
try {
|
|
987
|
-
const docRef = db.collection(collectionName).doc(dateString);
|
|
988
|
-
const docSnap = await withRetry(() => docRef.get(), `getPIPageViews(${dateString})`);
|
|
989
|
-
if (!docSnap.exists) { logger.log('WARN', `PI Page Views not found`); return null; }
|
|
990
|
-
const data = tryDecompress(docSnap.data());
|
|
991
|
-
const { date, lastUpdated, ...piPageViews } = data;
|
|
992
|
-
return piPageViews;
|
|
993
|
-
} catch (error) {
|
|
994
|
-
logger.log('ERROR', `Failed to load PI Page Views: ${error.message}`);
|
|
995
|
-
return null;
|
|
996
|
-
}
|
|
997
|
-
}
|
|
998
|
-
|
|
999
|
-
/** Stage 15: Load Watchlist Membership Data */
|
|
1000
|
-
async function loadWatchlistMembership(config, deps, dateString) {
|
|
1001
|
-
const { db, logger, calculationUtils } = deps;
|
|
1002
|
-
const { withRetry } = calculationUtils;
|
|
247
|
+
// Check if we're in BigQuery mode (virtual shards)
|
|
248
|
+
const isBigQuery = shardRefs.some(ref => ref && ref._bigquery === true);
|
|
1003
249
|
|
|
1004
|
-
|
|
1005
|
-
const cached = await tryLoadFromGCS(config, dateString, 'watchlist', logger);
|
|
1006
|
-
if (cached) return cached;
|
|
1007
|
-
|
|
1008
|
-
// 2. BIGQUERY FIRST (if enabled)
|
|
1009
|
-
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
250
|
+
if (isBigQuery && process.env.BIGQUERY_ENABLED !== 'false') {
|
|
1010
251
|
try {
|
|
1011
|
-
|
|
1012
|
-
const
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
252
|
+
// Extract targetIds from virtual shards if present
|
|
253
|
+
const targetIds = shardRefs
|
|
254
|
+
.filter(ref => ref._bigquery && ref.targetIds && ref.targetIds.length > 0)
|
|
255
|
+
.flatMap(ref => ref.targetIds);
|
|
256
|
+
|
|
257
|
+
// Query BigQuery for prices
|
|
258
|
+
// queryAssetPrices signature: (startDateStr, endDateStr, instrumentIds, logger)
|
|
259
|
+
const { queryAssetPrices } = require('../../core/utils/bigquery_utils');
|
|
260
|
+
const pricesData = await queryAssetPrices(null, null, targetIds.length > 0 ? targetIds : null, logger);
|
|
261
|
+
|
|
262
|
+
// Filter by targetIds if specified
|
|
263
|
+
if (targetIds.length > 0 && pricesData) {
|
|
264
|
+
const targetSet = new Set(targetIds.map(id => String(id)));
|
|
265
|
+
const filtered = {};
|
|
266
|
+
for (const [instrumentId, priceData] of Object.entries(pricesData)) {
|
|
267
|
+
if (targetSet.has(String(instrumentId))) {
|
|
268
|
+
filtered[instrumentId] = priceData;
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
return filtered;
|
|
1016
272
|
}
|
|
1017
|
-
|
|
1018
|
-
|
|
273
|
+
|
|
274
|
+
return pricesData || {};
|
|
275
|
+
} catch (e) {
|
|
276
|
+
logger.log('ERROR', `[DataLoader] BigQuery price load failed: ${e.message}`);
|
|
277
|
+
return {};
|
|
1019
278
|
}
|
|
1020
279
|
}
|
|
1021
280
|
|
|
1022
|
-
//
|
|
1023
|
-
const
|
|
1024
|
-
logger.log('INFO', `Loading Watchlist Membership from Firestore for ${dateString}`);
|
|
281
|
+
// Firestore fallback - load from doc refs
|
|
282
|
+
const combined = {};
|
|
1025
283
|
try {
|
|
1026
|
-
const
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
}
|
|
1037
|
-
|
|
1038
|
-
/** Stage 16: Load PI Alert History Data */
|
|
1039
|
-
async function loadPIAlertHistory(config, deps, dateString) {
|
|
1040
|
-
const { db, logger, calculationUtils } = deps;
|
|
1041
|
-
const { withRetry } = calculationUtils;
|
|
1042
|
-
|
|
1043
|
-
// 1. GCS FAST PATH
|
|
1044
|
-
const cached = await tryLoadFromGCS(config, dateString, 'alerts', logger);
|
|
1045
|
-
if (cached) return cached;
|
|
1046
|
-
|
|
1047
|
-
// 2. BIGQUERY FIRST (if enabled)
|
|
1048
|
-
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
1049
|
-
try {
|
|
1050
|
-
const { queryPIAlertHistory } = require('../../core/utils/bigquery_utils');
|
|
1051
|
-
const bigqueryData = await queryPIAlertHistory(dateString, logger);
|
|
1052
|
-
if (bigqueryData) {
|
|
1053
|
-
logger.log('INFO', `[DataLoader] ✅ Loaded PI Alert History from BigQuery for ${dateString}`);
|
|
1054
|
-
return bigqueryData;
|
|
284
|
+
const loadPromises = shardRefs.map(async (docRef) => {
|
|
285
|
+
try {
|
|
286
|
+
const snap = await docRef.get();
|
|
287
|
+
if (snap.exists) {
|
|
288
|
+
const data = snap.data();
|
|
289
|
+
// Firestore price shards are nested: { instrumentId: { prices: {...} } }
|
|
290
|
+
Object.assign(combined, data);
|
|
291
|
+
}
|
|
292
|
+
} catch (e) {
|
|
293
|
+
logger.log('WARN', `[DataLoader] Failed to load price shard: ${e.message}`);
|
|
1055
294
|
}
|
|
1056
|
-
}
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
}
|
|
1060
|
-
|
|
1061
|
-
// 3. FIRESTORE FALLBACK
|
|
1062
|
-
const collectionName = config.piAlertHistoryCollection || 'PIAlertHistoryData';
|
|
1063
|
-
logger.log('INFO', `Loading PI Alert History from Firestore for ${dateString}`);
|
|
1064
|
-
try {
|
|
1065
|
-
const docRef = db.collection(collectionName).doc(dateString);
|
|
1066
|
-
const docSnap = await withRetry(() => docRef.get(), `getPIAlertHistory(${dateString})`);
|
|
1067
|
-
if (!docSnap.exists) { logger.log('WARN', `PI Alert History not found`); return null; }
|
|
1068
|
-
const data = tryDecompress(docSnap.data());
|
|
1069
|
-
const { date, lastUpdated, ...piAlertHistory } = data;
|
|
1070
|
-
return piAlertHistory;
|
|
1071
|
-
} catch (error) {
|
|
1072
|
-
logger.log('ERROR', `Failed to load PI Alert History: ${error.message}`);
|
|
1073
|
-
return null;
|
|
1074
|
-
}
|
|
1075
|
-
}
|
|
1076
|
-
|
|
1077
|
-
/** Stage 17: Load PI-Centric Watchlist Data (Targeted - Keep as Firestore) */
|
|
1078
|
-
async function loadPIWatchlistData(config, deps, piCid) {
|
|
1079
|
-
const { db, logger, calculationUtils } = deps;
|
|
1080
|
-
const { withRetry } = calculationUtils;
|
|
1081
|
-
const piCidStr = String(piCid);
|
|
1082
|
-
|
|
1083
|
-
logger.log('INFO', `Loading PI Watchlist Data for PI ${piCid}`);
|
|
1084
|
-
try {
|
|
1085
|
-
const docRef = db.collection('PopularInvestors').doc(piCidStr).collection('watchlistData').doc('current');
|
|
1086
|
-
const docSnap = await withRetry(() => docRef.get(), `getPIWatchlistData(${piCidStr})`);
|
|
1087
|
-
if (!docSnap.exists) { logger.log('WARN', `PI Watchlist Data not found for PI ${piCidStr}`); return null; }
|
|
1088
|
-
return tryDecompress(docSnap.data());
|
|
1089
|
-
} catch (error) {
|
|
1090
|
-
logger.log('ERROR', `Failed to load PI Watchlist Data for PI ${piCidStr}: ${error.message}`);
|
|
1091
|
-
return null;
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
await Promise.all(loadPromises);
|
|
298
|
+
} catch (e) {
|
|
299
|
+
logger.log('ERROR', `[DataLoader] Failed to load price data from refs: ${e.message}`);
|
|
1092
300
|
}
|
|
1093
|
-
}
|
|
1094
|
-
|
|
1095
|
-
// Load Popular Investor Master List
|
|
1096
|
-
async function loadPopularInvestorMasterList(config, deps, dateString = null) {
|
|
1097
|
-
const { db, logger, calculationUtils } = deps;
|
|
1098
|
-
const { withRetry } = calculationUtils;
|
|
1099
301
|
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
const cached = await tryLoadFromGCS(config, dateString, 'master_list', logger);
|
|
1103
|
-
if (cached) return cached;
|
|
1104
|
-
}
|
|
302
|
+
return combined;
|
|
303
|
+
};
|
|
1105
304
|
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
const { queryPIMasterList } = require('../../core/utils/bigquery_utils');
|
|
1110
|
-
const bigqueryData = await queryPIMasterList(logger);
|
|
1111
|
-
|
|
1112
|
-
if (bigqueryData && Object.keys(bigqueryData).length > 0) {
|
|
1113
|
-
logger.log('INFO', `[DataLoader] ✅ Using BigQuery for PI master list: ${Object.keys(bigqueryData).length} PIs`);
|
|
1114
|
-
return bigqueryData;
|
|
1115
|
-
}
|
|
1116
|
-
} catch (bqError) {
|
|
1117
|
-
logger.log('WARN', `[DataLoader] BigQuery master list query failed, falling back to Firestore: ${bqError.message}`);
|
|
1118
|
-
// Fall through to Firestore
|
|
1119
|
-
}
|
|
1120
|
-
}
|
|
305
|
+
// =============================================================================
|
|
306
|
+
// HELPERS
|
|
307
|
+
// =============================================================================
|
|
1121
308
|
|
|
1122
|
-
|
|
1123
|
-
const
|
|
1124
|
-
const docId = config.piMasterListDocId || 'popular_investor_master_list';
|
|
1125
|
-
logger.log('INFO', `Loading Popular Investor Master List from ${collectionName}/${docId} (Firestore)`);
|
|
309
|
+
async function loadRetailFirestore(db, collectionName, dateStr) {
|
|
310
|
+
const CANARY_ID = '19M'; // Legacy Block
|
|
1126
311
|
try {
|
|
1127
|
-
const
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
const
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
312
|
+
const partsRef = db.collection(collectionName).doc(CANARY_ID)
|
|
313
|
+
.collection('snapshots').doc(dateStr).collection('parts');
|
|
314
|
+
|
|
315
|
+
const snap = await partsRef.get();
|
|
316
|
+
if (snap.empty) return {};
|
|
317
|
+
|
|
318
|
+
const combined = {};
|
|
319
|
+
snap.forEach(doc => Object.assign(combined, doc.data()));
|
|
320
|
+
return combined;
|
|
321
|
+
} catch (e) {
|
|
1134
322
|
return {};
|
|
1135
323
|
}
|
|
1136
|
-
}
|
|
1137
|
-
|
|
1138
|
-
module.exports = {
|
|
1139
|
-
getPortfolioPartRefs,
|
|
1140
|
-
loadDataByRefs,
|
|
1141
|
-
loadFullDayMap,
|
|
1142
|
-
loadDailyPortfolios,
|
|
1143
|
-
loadDailyInsights,
|
|
1144
|
-
loadDailySocialPostInsights,
|
|
1145
|
-
getHistoryPartRefs,
|
|
1146
|
-
streamPortfolioData,
|
|
1147
|
-
streamHistoryData,
|
|
1148
|
-
getPriceShardRefs,
|
|
1149
|
-
ensurePriceShardIndex,
|
|
1150
|
-
getRelevantShardRefs,
|
|
1151
|
-
loadPopularInvestorRankings,
|
|
1152
|
-
loadVerificationProfiles,
|
|
1153
|
-
loadPIRatings,
|
|
1154
|
-
loadPIPageViews,
|
|
1155
|
-
loadWatchlistMembership,
|
|
1156
|
-
loadPIAlertHistory,
|
|
1157
|
-
loadPopularInvestorMasterList,
|
|
1158
|
-
loadPIWatchlistData,
|
|
1159
|
-
};
|
|
324
|
+
}
|