bulltrackers-module 1.0.709 → 1.0.712
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/api-v2/helpers/data-fetchers/firestore.js +217 -135
- package/functions/computation-system/data/CachedDataLoader.js +22 -1
- package/functions/computation-system/data/DependencyFetcher.js +118 -0
- package/functions/computation-system/persistence/ResultCommitter.js +94 -3
- package/functions/computation-system/utils/data_loader.js +244 -13
- package/functions/core/utils/bigquery_utils.js +1655 -0
- package/functions/core/utils/firestore_utils.js +99 -30
- package/functions/etoro-price-fetcher/helpers/handler_helpers.js +85 -13
- package/functions/fetch-insights/helpers/handler_helpers.js +26 -0
- package/functions/fetch-popular-investors/helpers/fetch_helpers.js +66 -0
- package/functions/price-backfill/helpers/handler_helpers.js +59 -10
- package/functions/root-data-indexer/index.js +79 -27
- package/functions/task-engine/helpers/data_storage_helpers.js +194 -102
- package/functions/task-engine/helpers/popular_investor_helpers.js +13 -7
- package/functions/task-engine/utils/bigquery_batch_manager.js +201 -0
- package/functions/task-engine/utils/firestore_batch_manager.js +21 -1
- package/index.js +34 -2
- package/package.json +3 -2
|
@@ -16,6 +16,7 @@ const validationOverrides = require('../config/validation_overr
|
|
|
16
16
|
const pLimit = require('p-limit');
|
|
17
17
|
const zlib = require('zlib');
|
|
18
18
|
const { Storage } = require('@google-cloud/storage');
|
|
19
|
+
const { ensureComputationResultsTable, insertRows } = require('../../core/utils/bigquery_utils');
|
|
19
20
|
|
|
20
21
|
const storage = new Storage(); // Singleton GCS Client
|
|
21
22
|
const NON_RETRYABLE_ERRORS = [ 'PERMISSION_DENIED', 'DATA_LOSS', 'FAILED_PRECONDITION' ];
|
|
@@ -210,7 +211,7 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
210
211
|
const historicalDocRef = db.collection(config.resultsCollection).doc(historicalDate).collection(config.resultsSubcollection).doc(calc.manifest.category).collection(config.computationsSubcollection).doc(name);
|
|
211
212
|
|
|
212
213
|
// Recursive call allows GCS logic to apply per-day
|
|
213
|
-
const stats = await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, calc.manifest.category, logger, config, deps, 0, 'STANDARD', false, dailyExpireAt);
|
|
214
|
+
const stats = await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, calc.manifest.category, logger, config, deps, 0, 'STANDARD', false, dailyExpireAt, isAlertComputation, isPageComputation);
|
|
214
215
|
runMetrics.io.writes += stats.opCounts.writes;
|
|
215
216
|
runMetrics.io.deletes += stats.opCounts.deletes;
|
|
216
217
|
|
|
@@ -225,7 +226,7 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
225
226
|
const runExpireAt = calculateExpirationDate(dStr, ttlDays);
|
|
226
227
|
|
|
227
228
|
const mainDocRef = db.collection(config.resultsCollection).doc(dStr).collection(config.resultsSubcollection).doc(calc.manifest.category).collection(config.computationsSubcollection).doc(name);
|
|
228
|
-
const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, calc.manifest.category, logger, config, deps, currentShardIndex, flushMode, isInitialWrite, runExpireAt);
|
|
229
|
+
const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, calc.manifest.category, logger, config, deps, currentShardIndex, flushMode, isInitialWrite, runExpireAt, isAlertComputation, isPageComputation);
|
|
229
230
|
|
|
230
231
|
runMetrics.storage.sizeBytes = writeStats.totalSize;
|
|
231
232
|
runMetrics.storage.isSharded = writeStats.isSharded;
|
|
@@ -294,7 +295,7 @@ async function fetchContracts(db, calcNames) {
|
|
|
294
295
|
return map;
|
|
295
296
|
}
|
|
296
297
|
|
|
297
|
-
async function writeSingleResult(result, docRef, name, dateContext, category, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD', isInitialWrite = false, expireAt = null) {
|
|
298
|
+
async function writeSingleResult(result, docRef, name, dateContext, category, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD', isInitialWrite = false, expireAt = null, isAlertComputation = false) {
|
|
298
299
|
const opCounts = { writes: 0, deletes: 0 };
|
|
299
300
|
|
|
300
301
|
// Check if previously sharded (so we can clean up if moving to GCS or Compressed)
|
|
@@ -358,6 +359,13 @@ async function writeSingleResult(result, docRef, name, dateContext, category, lo
|
|
|
358
359
|
|
|
359
360
|
logger.log('INFO', `[GCS] ${name}: Offloaded ${(totalSize/1024).toFixed(0)}KB to ${fileName}`);
|
|
360
361
|
|
|
362
|
+
// Write to BigQuery (await to ensure completion before function returns)
|
|
363
|
+
// Errors are caught and logged but don't fail the operation
|
|
364
|
+
// Pass isAlertComputation flag to use streaming for alerts, load jobs for others
|
|
365
|
+
await writeToBigQuery(result, name, dateContext, category, logger, isAlertComputation).catch(err => {
|
|
366
|
+
logger.log('WARN', `[BigQuery] Failed to write ${name} for ${dateContext}: ${err.message}`);
|
|
367
|
+
});
|
|
368
|
+
|
|
361
369
|
return { totalSize, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, opCounts, location: 'GCS' };
|
|
362
370
|
|
|
363
371
|
} catch (gcsErr) {
|
|
@@ -394,6 +402,12 @@ async function writeSingleResult(result, docRef, name, dateContext, category, lo
|
|
|
394
402
|
opCounts.writes += 1;
|
|
395
403
|
logger.log('INFO', `[Compression] ${name}: Compressed ${(totalSize/1024).toFixed(0)}KB -> ${(compressedBuffer.length/1024).toFixed(0)}KB.`);
|
|
396
404
|
|
|
405
|
+
// Write to BigQuery (await to ensure completion before function returns)
|
|
406
|
+
// Errors are caught and logged but don't fail the operation
|
|
407
|
+
await writeToBigQuery(result, name, dateContext, category, logger).catch(err => {
|
|
408
|
+
logger.log('WARN', `[BigQuery] Failed to write ${name} for ${dateContext}: ${err.message}`);
|
|
409
|
+
});
|
|
410
|
+
|
|
397
411
|
return { totalSize: compressedBuffer.length, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex, opCounts, location: 'FIRESTORE' };
|
|
398
412
|
}
|
|
399
413
|
} catch (compErr) {
|
|
@@ -475,6 +489,13 @@ async function writeSingleResult(result, docRef, name, dateContext, category, lo
|
|
|
475
489
|
shardingError.stage = 'SHARDING_LIMIT_EXCEEDED';
|
|
476
490
|
throw shardingError;
|
|
477
491
|
}
|
|
492
|
+
|
|
493
|
+
// Write to BigQuery (await to ensure completion before function returns)
|
|
494
|
+
// Errors are caught and logged but don't fail the operation
|
|
495
|
+
await writeToBigQuery(result, name, dateContext, category, logger).catch(err => {
|
|
496
|
+
logger.log('WARN', `[BigQuery] Failed to write ${name} for ${dateContext}: ${err.message}`);
|
|
497
|
+
});
|
|
498
|
+
|
|
478
499
|
finalStats.opCounts = opCounts;
|
|
479
500
|
return finalStats;
|
|
480
501
|
}
|
|
@@ -483,6 +504,76 @@ async function writeSingleResult(result, docRef, name, dateContext, category, lo
|
|
|
483
504
|
// HELPERS
|
|
484
505
|
// =============================================================================
|
|
485
506
|
|
|
507
|
+
/**
|
|
508
|
+
* Write computation result to BigQuery (errors are logged but don't fail Firestore writes)
|
|
509
|
+
* @param {object} result - Computation result data
|
|
510
|
+
* @param {string} name - Computation name
|
|
511
|
+
* @param {string} dateContext - Date string (YYYY-MM-DD)
|
|
512
|
+
* @param {string} category - Category (e.g., 'popular-investor', 'alerts')
|
|
513
|
+
* @param {object} logger - Logger instance
|
|
514
|
+
* @param {boolean} isAlertComputation - If true, uses streaming inserts (immediate). If false, uses load jobs (batched, free).
|
|
515
|
+
*/
|
|
516
|
+
async function writeToBigQuery(result, name, dateContext, category, logger, isAlertComputation = false) {
|
|
517
|
+
// Skip if BigQuery is disabled via environment variable
|
|
518
|
+
if (process.env.BIGQUERY_ENABLED === 'false') {
|
|
519
|
+
return;
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
try {
|
|
523
|
+
// Size check: BigQuery streaming inserts have a 10MB limit per row
|
|
524
|
+
// Estimate size by stringifying the result
|
|
525
|
+
const estimatedSize = JSON.stringify(result).length;
|
|
526
|
+
const MAX_BIGQUERY_ROW_SIZE = 9 * 1024 * 1024; // 9MB safety limit (10MB is hard limit)
|
|
527
|
+
|
|
528
|
+
if (estimatedSize > MAX_BIGQUERY_ROW_SIZE) {
|
|
529
|
+
if (logger) {
|
|
530
|
+
logger.log('WARN', `[BigQuery] Skipping ${name} (${dateContext}): Result too large for streaming (${(estimatedSize/1024/1024).toFixed(2)}MB). Data is in GCS/Firestore.`);
|
|
531
|
+
}
|
|
532
|
+
// Return early - don't attempt insert that will fail
|
|
533
|
+
// The data is still available in Firestore/GCS, so this is acceptable
|
|
534
|
+
return;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
// Ensure table exists
|
|
538
|
+
await ensureComputationResultsTable(logger);
|
|
539
|
+
|
|
540
|
+
// Extract metadata (cids if present)
|
|
541
|
+
const metadata = {};
|
|
542
|
+
if (result.cids && Array.isArray(result.cids)) {
|
|
543
|
+
metadata.cids = result.cids;
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
// Prepare row for BigQuery
|
|
547
|
+
const row = {
|
|
548
|
+
date: dateContext,
|
|
549
|
+
computation_name: name,
|
|
550
|
+
category: category,
|
|
551
|
+
result_data: result, // Full result as JSON
|
|
552
|
+
metadata: Object.keys(metadata).length > 0 ? metadata : null,
|
|
553
|
+
created_at: new Date().toISOString()
|
|
554
|
+
};
|
|
555
|
+
|
|
556
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
557
|
+
|
|
558
|
+
// Use streaming inserts for alert computations (immediate, time-sensitive)
|
|
559
|
+
// Use load jobs for non-alert computations (batched, free)
|
|
560
|
+
const { insertRows: insertRowsLoadJob, insertRowsStreaming } = require('../../core/utils/bigquery_utils');
|
|
561
|
+
|
|
562
|
+
if (isAlertComputation) {
|
|
563
|
+
await insertRowsStreaming(datasetId, 'computation_results', [row], logger);
|
|
564
|
+
} else {
|
|
565
|
+
await insertRowsLoadJob(datasetId, 'computation_results', [row], logger);
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
} catch (error) {
|
|
569
|
+
// Log but don't throw - BigQuery write failure shouldn't break Firestore writes
|
|
570
|
+
if (logger) {
|
|
571
|
+
logger.log('WARN', `[BigQuery] Write failed for ${name} (${dateContext}): ${error.message}`);
|
|
572
|
+
}
|
|
573
|
+
// Don't re-throw - ensure Firestore writes always succeed
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
|
|
486
577
|
async function cleanupOldShards(docRef, name, config, deps, metrics) {
|
|
487
578
|
const shardCol = docRef.collection('_shards');
|
|
488
579
|
const shardDocs = await shardCol.listDocuments();
|
|
@@ -68,7 +68,42 @@ async function getPortfolioPartRefs(config, deps, dateString, requiredUserTypes
|
|
|
68
68
|
const types = requiredUserTypes ? new Set(requiredUserTypes.map(t => t.toUpperCase())) : null;
|
|
69
69
|
const fetchAll = !types || types.size === 0 || types.has('ALL');
|
|
70
70
|
|
|
71
|
-
|
|
71
|
+
// =========================================================================
|
|
72
|
+
// BIGQUERY FIRST: Try BigQuery before Firestore
|
|
73
|
+
// =========================================================================
|
|
74
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
75
|
+
try {
|
|
76
|
+
const { queryPortfolioData } = require('../../core/utils/bigquery_utils');
|
|
77
|
+
const bigqueryData = await queryPortfolioData(dateString, null, requiredUserTypes, logger);
|
|
78
|
+
|
|
79
|
+
if (bigqueryData && Object.keys(bigqueryData).length > 0) {
|
|
80
|
+
logger.log('INFO', `[DataLoader] ✅ Using BigQuery for portfolio data (${dateString}): ${Object.keys(bigqueryData).length} users`);
|
|
81
|
+
|
|
82
|
+
// Transform BigQuery data into "ref-like" objects for compatibility
|
|
83
|
+
const bigqueryRefs = Object.keys(bigqueryData).map(userId => {
|
|
84
|
+
const dataObj = bigqueryData[userId];
|
|
85
|
+
return {
|
|
86
|
+
ref: null, // No Firestore ref needed
|
|
87
|
+
type: dataObj.user_type || 'UNKNOWN',
|
|
88
|
+
cid: userId,
|
|
89
|
+
collectionType: 'BIGQUERY',
|
|
90
|
+
bigqueryData: dataObj.portfolio_data || {} // Extract the actual portfolio data
|
|
91
|
+
};
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
return bigqueryRefs;
|
|
95
|
+
} else {
|
|
96
|
+
logger.log('INFO', `[DataLoader] ⚠️ BigQuery returned no portfolio data for ${dateString}, falling back to Firestore`);
|
|
97
|
+
}
|
|
98
|
+
} catch (bqError) {
|
|
99
|
+
logger.log('WARN', `[DataLoader] BigQuery portfolio query failed for ${dateString}, falling back to Firestore: ${bqError.message}`);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// =========================================================================
|
|
104
|
+
// FIRESTORE FALLBACK: Original logic (backwards compatibility)
|
|
105
|
+
// =========================================================================
|
|
106
|
+
logger.log('INFO', `[DataLoader] 📂 Using Firestore for portfolio data (${dateString}). Filter: ${fetchAll ? 'ALL' : Array.from(types || []).join(',')}`);
|
|
72
107
|
|
|
73
108
|
const allPartRefs = [];
|
|
74
109
|
|
|
@@ -143,12 +178,62 @@ async function loadDataByRefs(config, deps, refObjects) {
|
|
|
143
178
|
|
|
144
179
|
if (!refObjects || !refObjects.length) return {};
|
|
145
180
|
|
|
181
|
+
// =========================================================================
|
|
182
|
+
// CHECK FOR PRICE DATA (BigQuery marker)
|
|
183
|
+
// =========================================================================
|
|
184
|
+
const priceBigQueryMarker = refObjects.find(r => r._bigquery === true);
|
|
185
|
+
if (priceBigQueryMarker) {
|
|
186
|
+
// This is a price data load request - use BigQuery
|
|
187
|
+
try {
|
|
188
|
+
const { queryAssetPrices } = require('../../core/utils/bigquery_utils');
|
|
189
|
+
const priceData = await queryAssetPrices(null, null, null, logger);
|
|
190
|
+
|
|
191
|
+
if (priceData && Object.keys(priceData).length > 0) {
|
|
192
|
+
logger.log('INFO', `[DataLoader] ✅ Loaded ${Object.keys(priceData).length} instruments from BigQuery for price data`);
|
|
193
|
+
return priceData;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// If BigQuery returns empty, fallback to Firestore
|
|
197
|
+
logger.log('WARN', `[DataLoader] BigQuery returned no price data, falling back to Firestore`);
|
|
198
|
+
} catch (bqError) {
|
|
199
|
+
logger.log('WARN', `[DataLoader] BigQuery price load failed, falling back to Firestore: ${bqError.message}`);
|
|
200
|
+
// Fall through to Firestore
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// =========================================================================
|
|
205
|
+
// SEPARATE BIGQUERY AND FIRESTORE REFS (for portfolio/history data)
|
|
206
|
+
// =========================================================================
|
|
207
|
+
const bigqueryRefs = refObjects.filter(r => r.collectionType === 'BIGQUERY');
|
|
208
|
+
const firestoreRefs = refObjects.filter(r => r.collectionType !== 'BIGQUERY' && !r._bigquery);
|
|
209
|
+
|
|
146
210
|
const mergedPortfolios = {};
|
|
211
|
+
|
|
212
|
+
// Load from BigQuery cache (data already fetched in getPortfolioPartRefs/getHistoryPartRefs)
|
|
213
|
+
if (bigqueryRefs.length > 0) {
|
|
214
|
+
logger.log('INFO', `[DataLoader] 📊 Loading ${bigqueryRefs.length} records from BigQuery cache`);
|
|
215
|
+
bigqueryRefs.forEach(ref => {
|
|
216
|
+
if (ref.bigqueryData) {
|
|
217
|
+
// Extract the actual data (portfolio_data or history_data)
|
|
218
|
+
// The bigqueryData is the JSON object from BigQuery
|
|
219
|
+
mergedPortfolios[ref.cid] = ref.bigqueryData;
|
|
220
|
+
}
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Load from Firestore (existing logic)
|
|
225
|
+
if (firestoreRefs.length === 0) {
|
|
226
|
+
return mergedPortfolios; // All data came from BigQuery
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
logger.log('INFO', `[DataLoader] 📂 Loading ${firestoreRefs.length} records from Firestore`);
|
|
147
230
|
const batchSize = config.partRefBatchSize || 10;
|
|
148
231
|
|
|
149
|
-
for (let i = 0; i <
|
|
150
|
-
const batch =
|
|
151
|
-
const refs = batch.map(b => b.ref);
|
|
232
|
+
for (let i = 0; i < firestoreRefs.length; i += batchSize) {
|
|
233
|
+
const batch = firestoreRefs.slice(i, i + batchSize);
|
|
234
|
+
const refs = batch.map(b => b.ref).filter(ref => ref !== null); // Filter out null refs (BigQuery refs)
|
|
235
|
+
|
|
236
|
+
if (refs.length === 0) continue; // Skip if all refs are BigQuery refs
|
|
152
237
|
|
|
153
238
|
const snapshots = await withRetry(() => db.getAll(...refs), `getAll(batch ${Math.floor(i/batchSize)})`);
|
|
154
239
|
|
|
@@ -250,9 +335,26 @@ async function loadDailyInsights(config, deps, dateString) {
|
|
|
250
335
|
const cached = await tryLoadFromGCS(config, dateString, 'insights', logger);
|
|
251
336
|
if (cached) return cached;
|
|
252
337
|
|
|
253
|
-
// 2.
|
|
338
|
+
// 2. BIGQUERY FIRST (if enabled)
|
|
339
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
340
|
+
try {
|
|
341
|
+
const { queryInstrumentInsights } = require('../../core/utils/bigquery_utils');
|
|
342
|
+
const bigqueryData = await queryInstrumentInsights(dateString, logger);
|
|
343
|
+
|
|
344
|
+
if (bigqueryData && Array.isArray(bigqueryData) && bigqueryData.length > 0) {
|
|
345
|
+
logger.log('INFO', `[DataLoader] ✅ Using BigQuery for instrument insights (${dateString}): ${bigqueryData.length} instruments`);
|
|
346
|
+
// Return in same format as Firestore: { insights: [...] }
|
|
347
|
+
return { insights: bigqueryData };
|
|
348
|
+
}
|
|
349
|
+
} catch (bqError) {
|
|
350
|
+
logger.log('WARN', `[DataLoader] BigQuery insights query failed, falling back to Firestore: ${bqError.message}`);
|
|
351
|
+
// Fall through to Firestore
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// 3. FIRESTORE FALLBACK
|
|
254
356
|
const insightsCollectionName = config.insightsCollectionName || 'daily_instrument_insights';
|
|
255
|
-
logger.log('INFO', `Loading daily insights for ${dateString} from ${insightsCollectionName}`);
|
|
357
|
+
logger.log('INFO', `Loading daily insights for ${dateString} from ${insightsCollectionName} (Firestore)`);
|
|
256
358
|
try {
|
|
257
359
|
const docRef = db.collection(insightsCollectionName).doc(dateString);
|
|
258
360
|
const docSnap = await withRetry(() => docRef.get(), `getInsights(${dateString})`);
|
|
@@ -379,7 +481,42 @@ async function getHistoryPartRefs(config, deps, dateString, requiredUserTypes =
|
|
|
379
481
|
const types = requiredUserTypes ? new Set(requiredUserTypes.map(t => t.toUpperCase())) : null;
|
|
380
482
|
const fetchAll = !types || types.size === 0 || types.has('ALL');
|
|
381
483
|
|
|
382
|
-
|
|
484
|
+
// =========================================================================
|
|
485
|
+
// BIGQUERY FIRST: Try BigQuery before Firestore
|
|
486
|
+
// =========================================================================
|
|
487
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
488
|
+
try {
|
|
489
|
+
const { queryHistoryData } = require('../../core/utils/bigquery_utils');
|
|
490
|
+
const bigqueryData = await queryHistoryData(dateString, null, requiredUserTypes, logger);
|
|
491
|
+
|
|
492
|
+
if (bigqueryData && Object.keys(bigqueryData).length > 0) {
|
|
493
|
+
logger.log('INFO', `[DataLoader] ✅ Using BigQuery for trade history data (${dateString}): ${Object.keys(bigqueryData).length} users`);
|
|
494
|
+
|
|
495
|
+
// Transform BigQuery data into "ref-like" objects for compatibility
|
|
496
|
+
const bigqueryRefs = Object.keys(bigqueryData).map(userId => {
|
|
497
|
+
const dataObj = bigqueryData[userId];
|
|
498
|
+
return {
|
|
499
|
+
ref: null, // No Firestore ref needed
|
|
500
|
+
type: dataObj.user_type || 'UNKNOWN',
|
|
501
|
+
cid: userId,
|
|
502
|
+
collectionType: 'BIGQUERY',
|
|
503
|
+
bigqueryData: dataObj.history_data || {} // Extract the actual history data
|
|
504
|
+
};
|
|
505
|
+
});
|
|
506
|
+
|
|
507
|
+
return bigqueryRefs;
|
|
508
|
+
} else {
|
|
509
|
+
logger.log('INFO', `[DataLoader] ⚠️ BigQuery returned no history data for ${dateString}, falling back to Firestore`);
|
|
510
|
+
}
|
|
511
|
+
} catch (bqError) {
|
|
512
|
+
logger.log('WARN', `[DataLoader] BigQuery history query failed for ${dateString}, falling back to Firestore: ${bqError.message}`);
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
// =========================================================================
|
|
517
|
+
// FIRESTORE FALLBACK: Original logic (backwards compatibility)
|
|
518
|
+
// =========================================================================
|
|
519
|
+
logger.log('INFO', `[DataLoader] 📂 Using Firestore for trade history data (${dateString}). Filter: ${fetchAll ? 'ALL' : Array.from(types || []).join(',')}`);
|
|
383
520
|
|
|
384
521
|
const allPartRefs = [];
|
|
385
522
|
|
|
@@ -447,6 +584,31 @@ async function getHistoryPartRefs(config, deps, dateString, requiredUserTypes =
|
|
|
447
584
|
async function* streamPortfolioData(config, deps, dateString, providedRefs = null, requiredUserTypes = null) {
|
|
448
585
|
const { logger } = deps;
|
|
449
586
|
|
|
587
|
+
// =========================================================================
|
|
588
|
+
// BIGQUERY FIRST: Try BigQuery before GCS/Firestore
|
|
589
|
+
// =========================================================================
|
|
590
|
+
if (!providedRefs && process.env.BIGQUERY_ENABLED !== 'false') {
|
|
591
|
+
try {
|
|
592
|
+
const { queryPortfolioData } = require('../../core/utils/bigquery_utils');
|
|
593
|
+
const bigqueryData = await queryPortfolioData(dateString, null, requiredUserTypes, logger);
|
|
594
|
+
|
|
595
|
+
if (bigqueryData && Object.keys(bigqueryData).length > 0) {
|
|
596
|
+
logger.log('INFO', `[DataLoader] ✅ Streaming portfolio data from BigQuery (${dateString}): ${Object.keys(bigqueryData).length} users`);
|
|
597
|
+
|
|
598
|
+
// Transform to expected format and yield
|
|
599
|
+
const transformedData = {};
|
|
600
|
+
Object.keys(bigqueryData).forEach(userId => {
|
|
601
|
+
transformedData[userId] = bigqueryData[userId].portfolio_data || {};
|
|
602
|
+
});
|
|
603
|
+
|
|
604
|
+
yield transformedData;
|
|
605
|
+
return; // Exit early, data loaded from BigQuery
|
|
606
|
+
}
|
|
607
|
+
} catch (bqError) {
|
|
608
|
+
logger.log('WARN', `[DataLoader] BigQuery portfolio stream failed for ${dateString}, falling back: ${bqError.message}`);
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
|
|
450
612
|
// 1. GCS FAST PATH (Snapshot) - Only if full run (no providedRefs)
|
|
451
613
|
if (!providedRefs) {
|
|
452
614
|
try {
|
|
@@ -498,6 +660,31 @@ async function* streamPortfolioData(config, deps, dateString, providedRefs = nul
|
|
|
498
660
|
async function* streamHistoryData(config, deps, dateString, providedRefs = null, requiredUserTypes = null) {
|
|
499
661
|
const { logger } = deps;
|
|
500
662
|
|
|
663
|
+
// =========================================================================
|
|
664
|
+
// BIGQUERY FIRST: Try BigQuery before Firestore
|
|
665
|
+
// =========================================================================
|
|
666
|
+
if (!providedRefs && process.env.BIGQUERY_ENABLED !== 'false') {
|
|
667
|
+
try {
|
|
668
|
+
const { queryHistoryData } = require('../../core/utils/bigquery_utils');
|
|
669
|
+
const bigqueryData = await queryHistoryData(dateString, null, requiredUserTypes, logger);
|
|
670
|
+
|
|
671
|
+
if (bigqueryData && Object.keys(bigqueryData).length > 0) {
|
|
672
|
+
logger.log('INFO', `[DataLoader] ✅ Streaming history data from BigQuery (${dateString}): ${Object.keys(bigqueryData).length} users`);
|
|
673
|
+
|
|
674
|
+
// Transform to expected format and yield
|
|
675
|
+
const transformedData = {};
|
|
676
|
+
Object.keys(bigqueryData).forEach(userId => {
|
|
677
|
+
transformedData[userId] = bigqueryData[userId].history_data || {};
|
|
678
|
+
});
|
|
679
|
+
|
|
680
|
+
yield transformedData;
|
|
681
|
+
return; // Exit early, data loaded from BigQuery
|
|
682
|
+
}
|
|
683
|
+
} catch (bqError) {
|
|
684
|
+
logger.log('WARN', `[DataLoader] BigQuery history stream failed for ${dateString}, falling back: ${bqError.message}`);
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
|
|
501
688
|
// 1. GCS FAST PATH (JSONL Streaming) - Only if full run
|
|
502
689
|
if (!providedRefs) {
|
|
503
690
|
try {
|
|
@@ -555,9 +742,21 @@ async function* streamHistoryData(config, deps, dateString, providedRefs = null,
|
|
|
555
742
|
logger.log('INFO', `[streamHistoryData] Finished streaming for ${dateString}.`);
|
|
556
743
|
}
|
|
557
744
|
|
|
558
|
-
/** Stage 9: Get all price shard references (Basic)
|
|
745
|
+
/** Stage 9: Get all price shard references (Basic)
|
|
746
|
+
* [UPDATED] Returns special marker for BigQuery mode, or Firestore refs for fallback
|
|
747
|
+
*/
|
|
559
748
|
async function getPriceShardRefs(config, deps) {
|
|
560
|
-
const {
|
|
749
|
+
const { logger } = deps;
|
|
750
|
+
|
|
751
|
+
// Try BigQuery first if enabled
|
|
752
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
753
|
+
// Return a special marker object to indicate BigQuery mode
|
|
754
|
+
// The loader will detect this and load from BigQuery instead
|
|
755
|
+
return [{ _bigquery: true }];
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
// Fallback to Firestore
|
|
759
|
+
const { db, calculationUtils } = deps;
|
|
561
760
|
const { withRetry } = calculationUtils;
|
|
562
761
|
const collection = config.priceCollection || 'asset_prices';
|
|
563
762
|
try {
|
|
@@ -590,9 +789,25 @@ async function loadPopularInvestorRankings(config, deps, dateString) {
|
|
|
590
789
|
const cached = await tryLoadFromGCS(config, dateString, 'rankings', logger);
|
|
591
790
|
if (cached) return cached;
|
|
592
791
|
|
|
593
|
-
// 2.
|
|
792
|
+
// 2. BIGQUERY FIRST (if enabled)
|
|
793
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
794
|
+
try {
|
|
795
|
+
const { queryPIRankings } = require('../../core/utils/bigquery_utils');
|
|
796
|
+
const bigqueryData = await queryPIRankings(dateString, logger);
|
|
797
|
+
|
|
798
|
+
if (bigqueryData && bigqueryData.Items && bigqueryData.Items.length > 0) {
|
|
799
|
+
logger.log('INFO', `[DataLoader] ✅ Using BigQuery for PI rankings (${dateString}): ${bigqueryData.Items.length} items`);
|
|
800
|
+
return bigqueryData.Items;
|
|
801
|
+
}
|
|
802
|
+
} catch (bqError) {
|
|
803
|
+
logger.log('WARN', `[DataLoader] BigQuery rankings query failed, falling back to Firestore: ${bqError.message}`);
|
|
804
|
+
// Fall through to Firestore
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
// 3. FIRESTORE FALLBACK
|
|
594
809
|
const collectionName = config.popularInvestorRankingsCollection || 'popular_investor_rankings';
|
|
595
|
-
logger.log('INFO', `Loading Popular Investor Rankings for ${dateString}`);
|
|
810
|
+
logger.log('INFO', `Loading Popular Investor Rankings for ${dateString} from Firestore`);
|
|
596
811
|
try {
|
|
597
812
|
const docRef = db.collection(collectionName).doc(dateString);
|
|
598
813
|
const docSnap = await withRetry(() => docRef.get(), `getRankings(${dateString})`);
|
|
@@ -794,10 +1009,26 @@ async function loadPopularInvestorMasterList(config, deps, dateString = null) {
|
|
|
794
1009
|
if (cached) return cached;
|
|
795
1010
|
}
|
|
796
1011
|
|
|
797
|
-
// 2.
|
|
1012
|
+
// 2. BIGQUERY FIRST (if enabled)
|
|
1013
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
1014
|
+
try {
|
|
1015
|
+
const { queryPIMasterList } = require('../../core/utils/bigquery_utils');
|
|
1016
|
+
const bigqueryData = await queryPIMasterList(logger);
|
|
1017
|
+
|
|
1018
|
+
if (bigqueryData && Object.keys(bigqueryData).length > 0) {
|
|
1019
|
+
logger.log('INFO', `[DataLoader] ✅ Using BigQuery for PI master list: ${Object.keys(bigqueryData).length} PIs`);
|
|
1020
|
+
return bigqueryData;
|
|
1021
|
+
}
|
|
1022
|
+
} catch (bqError) {
|
|
1023
|
+
logger.log('WARN', `[DataLoader] BigQuery master list query failed, falling back to Firestore: ${bqError.message}`);
|
|
1024
|
+
// Fall through to Firestore
|
|
1025
|
+
}
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
// 3. FIRESTORE FALLBACK
|
|
798
1029
|
const collectionName = config.piMasterListCollection || 'system_state';
|
|
799
1030
|
const docId = config.piMasterListDocId || 'popular_investor_master_list';
|
|
800
|
-
logger.log('INFO', `Loading Popular Investor Master List from ${collectionName}/${docId}`);
|
|
1031
|
+
logger.log('INFO', `Loading Popular Investor Master List from ${collectionName}/${docId} (Firestore)`);
|
|
801
1032
|
try {
|
|
802
1033
|
const docRef = db.collection(collectionName).doc(docId);
|
|
803
1034
|
const docSnap = await withRetry(() => docRef.get(), 'getPIMasterList');
|