bulltrackers-module 1.0.710 → 1.0.713
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/api-v2/helpers/data-fetchers/firestore.js +119 -63
- package/functions/computation-system/data/CachedDataLoader.js +22 -1
- package/functions/computation-system/data/DependencyFetcher.js +118 -0
- package/functions/computation-system/persistence/ResultCommitter.js +94 -3
- package/functions/computation-system/utils/data_loader.js +244 -13
- package/functions/core/utils/bigquery_utils.js +1655 -0
- package/functions/core/utils/firestore_utils.js +99 -30
- package/functions/etoro-price-fetcher/helpers/handler_helpers.js +85 -13
- package/functions/fetch-insights/helpers/handler_helpers.js +26 -0
- package/functions/fetch-popular-investors/helpers/fetch_helpers.js +66 -0
- package/functions/maintenance/backfill-instrument-insights/index.js +180 -0
- package/functions/maintenance/backfill-pi-master-list-rankings/index.js +293 -0
- package/functions/maintenance/backfill-task-engine-data/README.md +72 -0
- package/functions/maintenance/backfill-task-engine-data/index.js +844 -0
- package/functions/price-backfill/helpers/handler_helpers.js +59 -10
- package/functions/root-data-indexer/index.js +79 -27
- package/functions/task-engine/helpers/data_storage_helpers.js +194 -102
- package/functions/task-engine/helpers/popular_investor_helpers.js +13 -7
- package/functions/task-engine/utils/bigquery_batch_manager.js +201 -0
- package/functions/task-engine/utils/firestore_batch_manager.js +21 -1
- package/index.js +34 -2
- package/package.json +7 -3
|
@@ -68,7 +68,42 @@ async function getPortfolioPartRefs(config, deps, dateString, requiredUserTypes
|
|
|
68
68
|
const types = requiredUserTypes ? new Set(requiredUserTypes.map(t => t.toUpperCase())) : null;
|
|
69
69
|
const fetchAll = !types || types.size === 0 || types.has('ALL');
|
|
70
70
|
|
|
71
|
-
|
|
71
|
+
// =========================================================================
|
|
72
|
+
// BIGQUERY FIRST: Try BigQuery before Firestore
|
|
73
|
+
// =========================================================================
|
|
74
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
75
|
+
try {
|
|
76
|
+
const { queryPortfolioData } = require('../../core/utils/bigquery_utils');
|
|
77
|
+
const bigqueryData = await queryPortfolioData(dateString, null, requiredUserTypes, logger);
|
|
78
|
+
|
|
79
|
+
if (bigqueryData && Object.keys(bigqueryData).length > 0) {
|
|
80
|
+
logger.log('INFO', `[DataLoader] ✅ Using BigQuery for portfolio data (${dateString}): ${Object.keys(bigqueryData).length} users`);
|
|
81
|
+
|
|
82
|
+
// Transform BigQuery data into "ref-like" objects for compatibility
|
|
83
|
+
const bigqueryRefs = Object.keys(bigqueryData).map(userId => {
|
|
84
|
+
const dataObj = bigqueryData[userId];
|
|
85
|
+
return {
|
|
86
|
+
ref: null, // No Firestore ref needed
|
|
87
|
+
type: dataObj.user_type || 'UNKNOWN',
|
|
88
|
+
cid: userId,
|
|
89
|
+
collectionType: 'BIGQUERY',
|
|
90
|
+
bigqueryData: dataObj.portfolio_data || {} // Extract the actual portfolio data
|
|
91
|
+
};
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
return bigqueryRefs;
|
|
95
|
+
} else {
|
|
96
|
+
logger.log('INFO', `[DataLoader] ⚠️ BigQuery returned no portfolio data for ${dateString}, falling back to Firestore`);
|
|
97
|
+
}
|
|
98
|
+
} catch (bqError) {
|
|
99
|
+
logger.log('WARN', `[DataLoader] BigQuery portfolio query failed for ${dateString}, falling back to Firestore: ${bqError.message}`);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// =========================================================================
|
|
104
|
+
// FIRESTORE FALLBACK: Original logic (backwards compatibility)
|
|
105
|
+
// =========================================================================
|
|
106
|
+
logger.log('INFO', `[DataLoader] 📂 Using Firestore for portfolio data (${dateString}). Filter: ${fetchAll ? 'ALL' : Array.from(types || []).join(',')}`);
|
|
72
107
|
|
|
73
108
|
const allPartRefs = [];
|
|
74
109
|
|
|
@@ -143,12 +178,62 @@ async function loadDataByRefs(config, deps, refObjects) {
|
|
|
143
178
|
|
|
144
179
|
if (!refObjects || !refObjects.length) return {};
|
|
145
180
|
|
|
181
|
+
// =========================================================================
|
|
182
|
+
// CHECK FOR PRICE DATA (BigQuery marker)
|
|
183
|
+
// =========================================================================
|
|
184
|
+
const priceBigQueryMarker = refObjects.find(r => r._bigquery === true);
|
|
185
|
+
if (priceBigQueryMarker) {
|
|
186
|
+
// This is a price data load request - use BigQuery
|
|
187
|
+
try {
|
|
188
|
+
const { queryAssetPrices } = require('../../core/utils/bigquery_utils');
|
|
189
|
+
const priceData = await queryAssetPrices(null, null, null, logger);
|
|
190
|
+
|
|
191
|
+
if (priceData && Object.keys(priceData).length > 0) {
|
|
192
|
+
logger.log('INFO', `[DataLoader] ✅ Loaded ${Object.keys(priceData).length} instruments from BigQuery for price data`);
|
|
193
|
+
return priceData;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// If BigQuery returns empty, fallback to Firestore
|
|
197
|
+
logger.log('WARN', `[DataLoader] BigQuery returned no price data, falling back to Firestore`);
|
|
198
|
+
} catch (bqError) {
|
|
199
|
+
logger.log('WARN', `[DataLoader] BigQuery price load failed, falling back to Firestore: ${bqError.message}`);
|
|
200
|
+
// Fall through to Firestore
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// =========================================================================
|
|
205
|
+
// SEPARATE BIGQUERY AND FIRESTORE REFS (for portfolio/history data)
|
|
206
|
+
// =========================================================================
|
|
207
|
+
const bigqueryRefs = refObjects.filter(r => r.collectionType === 'BIGQUERY');
|
|
208
|
+
const firestoreRefs = refObjects.filter(r => r.collectionType !== 'BIGQUERY' && !r._bigquery);
|
|
209
|
+
|
|
146
210
|
const mergedPortfolios = {};
|
|
211
|
+
|
|
212
|
+
// Load from BigQuery cache (data already fetched in getPortfolioPartRefs/getHistoryPartRefs)
|
|
213
|
+
if (bigqueryRefs.length > 0) {
|
|
214
|
+
logger.log('INFO', `[DataLoader] 📊 Loading ${bigqueryRefs.length} records from BigQuery cache`);
|
|
215
|
+
bigqueryRefs.forEach(ref => {
|
|
216
|
+
if (ref.bigqueryData) {
|
|
217
|
+
// Extract the actual data (portfolio_data or history_data)
|
|
218
|
+
// The bigqueryData is the JSON object from BigQuery
|
|
219
|
+
mergedPortfolios[ref.cid] = ref.bigqueryData;
|
|
220
|
+
}
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Load from Firestore (existing logic)
|
|
225
|
+
if (firestoreRefs.length === 0) {
|
|
226
|
+
return mergedPortfolios; // All data came from BigQuery
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
logger.log('INFO', `[DataLoader] 📂 Loading ${firestoreRefs.length} records from Firestore`);
|
|
147
230
|
const batchSize = config.partRefBatchSize || 10;
|
|
148
231
|
|
|
149
|
-
for (let i = 0; i <
|
|
150
|
-
const batch =
|
|
151
|
-
const refs = batch.map(b => b.ref);
|
|
232
|
+
for (let i = 0; i < firestoreRefs.length; i += batchSize) {
|
|
233
|
+
const batch = firestoreRefs.slice(i, i + batchSize);
|
|
234
|
+
const refs = batch.map(b => b.ref).filter(ref => ref !== null); // Filter out null refs (BigQuery refs)
|
|
235
|
+
|
|
236
|
+
if (refs.length === 0) continue; // Skip if all refs are BigQuery refs
|
|
152
237
|
|
|
153
238
|
const snapshots = await withRetry(() => db.getAll(...refs), `getAll(batch ${Math.floor(i/batchSize)})`);
|
|
154
239
|
|
|
@@ -250,9 +335,26 @@ async function loadDailyInsights(config, deps, dateString) {
|
|
|
250
335
|
const cached = await tryLoadFromGCS(config, dateString, 'insights', logger);
|
|
251
336
|
if (cached) return cached;
|
|
252
337
|
|
|
253
|
-
// 2.
|
|
338
|
+
// 2. BIGQUERY FIRST (if enabled)
|
|
339
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
340
|
+
try {
|
|
341
|
+
const { queryInstrumentInsights } = require('../../core/utils/bigquery_utils');
|
|
342
|
+
const bigqueryData = await queryInstrumentInsights(dateString, logger);
|
|
343
|
+
|
|
344
|
+
if (bigqueryData && Array.isArray(bigqueryData) && bigqueryData.length > 0) {
|
|
345
|
+
logger.log('INFO', `[DataLoader] ✅ Using BigQuery for instrument insights (${dateString}): ${bigqueryData.length} instruments`);
|
|
346
|
+
// Return in same format as Firestore: { insights: [...] }
|
|
347
|
+
return { insights: bigqueryData };
|
|
348
|
+
}
|
|
349
|
+
} catch (bqError) {
|
|
350
|
+
logger.log('WARN', `[DataLoader] BigQuery insights query failed, falling back to Firestore: ${bqError.message}`);
|
|
351
|
+
// Fall through to Firestore
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// 3. FIRESTORE FALLBACK
|
|
254
356
|
const insightsCollectionName = config.insightsCollectionName || 'daily_instrument_insights';
|
|
255
|
-
logger.log('INFO', `Loading daily insights for ${dateString} from ${insightsCollectionName}`);
|
|
357
|
+
logger.log('INFO', `Loading daily insights for ${dateString} from ${insightsCollectionName} (Firestore)`);
|
|
256
358
|
try {
|
|
257
359
|
const docRef = db.collection(insightsCollectionName).doc(dateString);
|
|
258
360
|
const docSnap = await withRetry(() => docRef.get(), `getInsights(${dateString})`);
|
|
@@ -379,7 +481,42 @@ async function getHistoryPartRefs(config, deps, dateString, requiredUserTypes =
|
|
|
379
481
|
const types = requiredUserTypes ? new Set(requiredUserTypes.map(t => t.toUpperCase())) : null;
|
|
380
482
|
const fetchAll = !types || types.size === 0 || types.has('ALL');
|
|
381
483
|
|
|
382
|
-
|
|
484
|
+
// =========================================================================
|
|
485
|
+
// BIGQUERY FIRST: Try BigQuery before Firestore
|
|
486
|
+
// =========================================================================
|
|
487
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
488
|
+
try {
|
|
489
|
+
const { queryHistoryData } = require('../../core/utils/bigquery_utils');
|
|
490
|
+
const bigqueryData = await queryHistoryData(dateString, null, requiredUserTypes, logger);
|
|
491
|
+
|
|
492
|
+
if (bigqueryData && Object.keys(bigqueryData).length > 0) {
|
|
493
|
+
logger.log('INFO', `[DataLoader] ✅ Using BigQuery for trade history data (${dateString}): ${Object.keys(bigqueryData).length} users`);
|
|
494
|
+
|
|
495
|
+
// Transform BigQuery data into "ref-like" objects for compatibility
|
|
496
|
+
const bigqueryRefs = Object.keys(bigqueryData).map(userId => {
|
|
497
|
+
const dataObj = bigqueryData[userId];
|
|
498
|
+
return {
|
|
499
|
+
ref: null, // No Firestore ref needed
|
|
500
|
+
type: dataObj.user_type || 'UNKNOWN',
|
|
501
|
+
cid: userId,
|
|
502
|
+
collectionType: 'BIGQUERY',
|
|
503
|
+
bigqueryData: dataObj.history_data || {} // Extract the actual history data
|
|
504
|
+
};
|
|
505
|
+
});
|
|
506
|
+
|
|
507
|
+
return bigqueryRefs;
|
|
508
|
+
} else {
|
|
509
|
+
logger.log('INFO', `[DataLoader] ⚠️ BigQuery returned no history data for ${dateString}, falling back to Firestore`);
|
|
510
|
+
}
|
|
511
|
+
} catch (bqError) {
|
|
512
|
+
logger.log('WARN', `[DataLoader] BigQuery history query failed for ${dateString}, falling back to Firestore: ${bqError.message}`);
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
// =========================================================================
|
|
517
|
+
// FIRESTORE FALLBACK: Original logic (backwards compatibility)
|
|
518
|
+
// =========================================================================
|
|
519
|
+
logger.log('INFO', `[DataLoader] 📂 Using Firestore for trade history data (${dateString}). Filter: ${fetchAll ? 'ALL' : Array.from(types || []).join(',')}`);
|
|
383
520
|
|
|
384
521
|
const allPartRefs = [];
|
|
385
522
|
|
|
@@ -447,6 +584,31 @@ async function getHistoryPartRefs(config, deps, dateString, requiredUserTypes =
|
|
|
447
584
|
async function* streamPortfolioData(config, deps, dateString, providedRefs = null, requiredUserTypes = null) {
|
|
448
585
|
const { logger } = deps;
|
|
449
586
|
|
|
587
|
+
// =========================================================================
|
|
588
|
+
// BIGQUERY FIRST: Try BigQuery before GCS/Firestore
|
|
589
|
+
// =========================================================================
|
|
590
|
+
if (!providedRefs && process.env.BIGQUERY_ENABLED !== 'false') {
|
|
591
|
+
try {
|
|
592
|
+
const { queryPortfolioData } = require('../../core/utils/bigquery_utils');
|
|
593
|
+
const bigqueryData = await queryPortfolioData(dateString, null, requiredUserTypes, logger);
|
|
594
|
+
|
|
595
|
+
if (bigqueryData && Object.keys(bigqueryData).length > 0) {
|
|
596
|
+
logger.log('INFO', `[DataLoader] ✅ Streaming portfolio data from BigQuery (${dateString}): ${Object.keys(bigqueryData).length} users`);
|
|
597
|
+
|
|
598
|
+
// Transform to expected format and yield
|
|
599
|
+
const transformedData = {};
|
|
600
|
+
Object.keys(bigqueryData).forEach(userId => {
|
|
601
|
+
transformedData[userId] = bigqueryData[userId].portfolio_data || {};
|
|
602
|
+
});
|
|
603
|
+
|
|
604
|
+
yield transformedData;
|
|
605
|
+
return; // Exit early, data loaded from BigQuery
|
|
606
|
+
}
|
|
607
|
+
} catch (bqError) {
|
|
608
|
+
logger.log('WARN', `[DataLoader] BigQuery portfolio stream failed for ${dateString}, falling back: ${bqError.message}`);
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
|
|
450
612
|
// 1. GCS FAST PATH (Snapshot) - Only if full run (no providedRefs)
|
|
451
613
|
if (!providedRefs) {
|
|
452
614
|
try {
|
|
@@ -498,6 +660,31 @@ async function* streamPortfolioData(config, deps, dateString, providedRefs = nul
|
|
|
498
660
|
async function* streamHistoryData(config, deps, dateString, providedRefs = null, requiredUserTypes = null) {
|
|
499
661
|
const { logger } = deps;
|
|
500
662
|
|
|
663
|
+
// =========================================================================
|
|
664
|
+
// BIGQUERY FIRST: Try BigQuery before Firestore
|
|
665
|
+
// =========================================================================
|
|
666
|
+
if (!providedRefs && process.env.BIGQUERY_ENABLED !== 'false') {
|
|
667
|
+
try {
|
|
668
|
+
const { queryHistoryData } = require('../../core/utils/bigquery_utils');
|
|
669
|
+
const bigqueryData = await queryHistoryData(dateString, null, requiredUserTypes, logger);
|
|
670
|
+
|
|
671
|
+
if (bigqueryData && Object.keys(bigqueryData).length > 0) {
|
|
672
|
+
logger.log('INFO', `[DataLoader] ✅ Streaming history data from BigQuery (${dateString}): ${Object.keys(bigqueryData).length} users`);
|
|
673
|
+
|
|
674
|
+
// Transform to expected format and yield
|
|
675
|
+
const transformedData = {};
|
|
676
|
+
Object.keys(bigqueryData).forEach(userId => {
|
|
677
|
+
transformedData[userId] = bigqueryData[userId].history_data || {};
|
|
678
|
+
});
|
|
679
|
+
|
|
680
|
+
yield transformedData;
|
|
681
|
+
return; // Exit early, data loaded from BigQuery
|
|
682
|
+
}
|
|
683
|
+
} catch (bqError) {
|
|
684
|
+
logger.log('WARN', `[DataLoader] BigQuery history stream failed for ${dateString}, falling back: ${bqError.message}`);
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
|
|
501
688
|
// 1. GCS FAST PATH (JSONL Streaming) - Only if full run
|
|
502
689
|
if (!providedRefs) {
|
|
503
690
|
try {
|
|
@@ -555,9 +742,21 @@ async function* streamHistoryData(config, deps, dateString, providedRefs = null,
|
|
|
555
742
|
logger.log('INFO', `[streamHistoryData] Finished streaming for ${dateString}.`);
|
|
556
743
|
}
|
|
557
744
|
|
|
558
|
-
/** Stage 9: Get all price shard references (Basic)
|
|
745
|
+
/** Stage 9: Get all price shard references (Basic)
|
|
746
|
+
* [UPDATED] Returns special marker for BigQuery mode, or Firestore refs for fallback
|
|
747
|
+
*/
|
|
559
748
|
async function getPriceShardRefs(config, deps) {
|
|
560
|
-
const {
|
|
749
|
+
const { logger } = deps;
|
|
750
|
+
|
|
751
|
+
// Try BigQuery first if enabled
|
|
752
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
753
|
+
// Return a special marker object to indicate BigQuery mode
|
|
754
|
+
// The loader will detect this and load from BigQuery instead
|
|
755
|
+
return [{ _bigquery: true }];
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
// Fallback to Firestore
|
|
759
|
+
const { db, calculationUtils } = deps;
|
|
561
760
|
const { withRetry } = calculationUtils;
|
|
562
761
|
const collection = config.priceCollection || 'asset_prices';
|
|
563
762
|
try {
|
|
@@ -590,9 +789,25 @@ async function loadPopularInvestorRankings(config, deps, dateString) {
|
|
|
590
789
|
const cached = await tryLoadFromGCS(config, dateString, 'rankings', logger);
|
|
591
790
|
if (cached) return cached;
|
|
592
791
|
|
|
593
|
-
// 2.
|
|
792
|
+
// 2. BIGQUERY FIRST (if enabled)
|
|
793
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
794
|
+
try {
|
|
795
|
+
const { queryPIRankings } = require('../../core/utils/bigquery_utils');
|
|
796
|
+
const bigqueryData = await queryPIRankings(dateString, logger);
|
|
797
|
+
|
|
798
|
+
if (bigqueryData && bigqueryData.Items && bigqueryData.Items.length > 0) {
|
|
799
|
+
logger.log('INFO', `[DataLoader] ✅ Using BigQuery for PI rankings (${dateString}): ${bigqueryData.Items.length} items`);
|
|
800
|
+
return bigqueryData.Items;
|
|
801
|
+
}
|
|
802
|
+
} catch (bqError) {
|
|
803
|
+
logger.log('WARN', `[DataLoader] BigQuery rankings query failed, falling back to Firestore: ${bqError.message}`);
|
|
804
|
+
// Fall through to Firestore
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
// 3. FIRESTORE FALLBACK
|
|
594
809
|
const collectionName = config.popularInvestorRankingsCollection || 'popular_investor_rankings';
|
|
595
|
-
logger.log('INFO', `Loading Popular Investor Rankings for ${dateString}`);
|
|
810
|
+
logger.log('INFO', `Loading Popular Investor Rankings for ${dateString} from Firestore`);
|
|
596
811
|
try {
|
|
597
812
|
const docRef = db.collection(collectionName).doc(dateString);
|
|
598
813
|
const docSnap = await withRetry(() => docRef.get(), `getRankings(${dateString})`);
|
|
@@ -794,10 +1009,26 @@ async function loadPopularInvestorMasterList(config, deps, dateString = null) {
|
|
|
794
1009
|
if (cached) return cached;
|
|
795
1010
|
}
|
|
796
1011
|
|
|
797
|
-
// 2.
|
|
1012
|
+
// 2. BIGQUERY FIRST (if enabled)
|
|
1013
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
1014
|
+
try {
|
|
1015
|
+
const { queryPIMasterList } = require('../../core/utils/bigquery_utils');
|
|
1016
|
+
const bigqueryData = await queryPIMasterList(logger);
|
|
1017
|
+
|
|
1018
|
+
if (bigqueryData && Object.keys(bigqueryData).length > 0) {
|
|
1019
|
+
logger.log('INFO', `[DataLoader] ✅ Using BigQuery for PI master list: ${Object.keys(bigqueryData).length} PIs`);
|
|
1020
|
+
return bigqueryData;
|
|
1021
|
+
}
|
|
1022
|
+
} catch (bqError) {
|
|
1023
|
+
logger.log('WARN', `[DataLoader] BigQuery master list query failed, falling back to Firestore: ${bqError.message}`);
|
|
1024
|
+
// Fall through to Firestore
|
|
1025
|
+
}
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
// 3. FIRESTORE FALLBACK
|
|
798
1029
|
const collectionName = config.piMasterListCollection || 'system_state';
|
|
799
1030
|
const docId = config.piMasterListDocId || 'popular_investor_master_list';
|
|
800
|
-
logger.log('INFO', `Loading Popular Investor Master List from ${collectionName}/${docId}`);
|
|
1031
|
+
logger.log('INFO', `Loading Popular Investor Master List from ${collectionName}/${docId} (Firestore)`);
|
|
801
1032
|
try {
|
|
802
1033
|
const docRef = db.collection(collectionName).doc(docId);
|
|
803
1034
|
const docSnap = await withRetry(() => docRef.get(), 'getPIMasterList');
|