bulltrackers-module 1.0.710 → 1.0.713
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/api-v2/helpers/data-fetchers/firestore.js +119 -63
- package/functions/computation-system/data/CachedDataLoader.js +22 -1
- package/functions/computation-system/data/DependencyFetcher.js +118 -0
- package/functions/computation-system/persistence/ResultCommitter.js +94 -3
- package/functions/computation-system/utils/data_loader.js +244 -13
- package/functions/core/utils/bigquery_utils.js +1655 -0
- package/functions/core/utils/firestore_utils.js +99 -30
- package/functions/etoro-price-fetcher/helpers/handler_helpers.js +85 -13
- package/functions/fetch-insights/helpers/handler_helpers.js +26 -0
- package/functions/fetch-popular-investors/helpers/fetch_helpers.js +66 -0
- package/functions/maintenance/backfill-instrument-insights/index.js +180 -0
- package/functions/maintenance/backfill-pi-master-list-rankings/index.js +293 -0
- package/functions/maintenance/backfill-task-engine-data/README.md +72 -0
- package/functions/maintenance/backfill-task-engine-data/index.js +844 -0
- package/functions/price-backfill/helpers/handler_helpers.js +59 -10
- package/functions/root-data-indexer/index.js +79 -27
- package/functions/task-engine/helpers/data_storage_helpers.js +194 -102
- package/functions/task-engine/helpers/popular_investor_helpers.js +13 -7
- package/functions/task-engine/utils/bigquery_batch_manager.js +201 -0
- package/functions/task-engine/utils/firestore_batch_manager.js +21 -1
- package/index.js +34 -2
- package/package.json +7 -3
|
@@ -281,8 +281,8 @@ function needsUpdate(lastUpdated, todayStr) {
|
|
|
281
281
|
|
|
282
282
|
/**
|
|
283
283
|
* [NEW] Fetches Popular Investors from the master list and filters by last updated times.
|
|
284
|
-
* UPDATED: Uses
|
|
285
|
-
*
|
|
284
|
+
* UPDATED: Uses BigQuery first for master list, then checks BigQuery for today's data to filter users.
|
|
285
|
+
* Falls back to Firestore if BigQuery is disabled or fails.
|
|
286
286
|
*/
|
|
287
287
|
async function getPopularInvestorsToUpdate(dependencies, config) {
|
|
288
288
|
const { db, logger, collectionRegistry } = dependencies;
|
|
@@ -291,53 +291,122 @@ async function getPopularInvestorsToUpdate(dependencies, config) {
|
|
|
291
291
|
logger.log('INFO', `[Core Utils] Getting Popular Investors to update (Checking against date: ${todayStr})...`);
|
|
292
292
|
|
|
293
293
|
try {
|
|
294
|
-
|
|
295
|
-
let
|
|
294
|
+
let investors = {};
|
|
295
|
+
let masterListSource = 'UNKNOWN';
|
|
296
296
|
|
|
297
|
-
|
|
297
|
+
// =========================================================================
|
|
298
|
+
// BIGQUERY FIRST: Try BigQuery for master list
|
|
299
|
+
// =========================================================================
|
|
300
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
298
301
|
try {
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
+
const { queryPIMasterList } = require('../../core/utils/bigquery_utils');
|
|
303
|
+
const bigqueryMasterList = await queryPIMasterList(logger);
|
|
304
|
+
|
|
305
|
+
if (bigqueryMasterList && Object.keys(bigqueryMasterList).length > 0) {
|
|
306
|
+
investors = bigqueryMasterList;
|
|
307
|
+
masterListSource = 'BIGQUERY';
|
|
308
|
+
logger.log('INFO', `[Core Utils] ✅ Loaded PI master list from BigQuery: ${Object.keys(investors).length} investors`);
|
|
309
|
+
}
|
|
310
|
+
} catch (bqError) {
|
|
311
|
+
logger.log('WARN', `[Core Utils] BigQuery master list query failed, falling back to Firestore: ${bqError.message}`);
|
|
302
312
|
}
|
|
303
313
|
}
|
|
304
314
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
if (
|
|
309
|
-
|
|
310
|
-
|
|
315
|
+
// =========================================================================
|
|
316
|
+
// FIRESTORE FALLBACK: If BigQuery didn't return data
|
|
317
|
+
// =========================================================================
|
|
318
|
+
if (Object.keys(investors).length === 0) {
|
|
319
|
+
let masterListPath = 'system_state/popular_investor_master_list';
|
|
320
|
+
|
|
321
|
+
if (collectionRegistry && collectionRegistry.getCollectionPath) {
|
|
322
|
+
try {
|
|
323
|
+
masterListPath = collectionRegistry.getCollectionPath('system', 'popularInvestorMasterList', {});
|
|
324
|
+
} catch (err) {
|
|
325
|
+
logger.log('WARN', `[Core Utils] Failed to get master list path from registry, using default: ${err.message}`);
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
const masterListRef = db.doc(masterListPath);
|
|
330
|
+
const masterListDoc = await masterListRef.get();
|
|
331
|
+
|
|
332
|
+
if (!masterListDoc.exists) {
|
|
333
|
+
logger.log('WARN', `[Core Utils] Master list not found in Firestore. Falling back to legacy method.`);
|
|
334
|
+
return await getPopularInvestorsToUpdateLegacy(dependencies, config);
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
const masterListData = masterListDoc.data();
|
|
338
|
+
investors = masterListData.investors || {};
|
|
339
|
+
masterListSource = 'FIRESTORE';
|
|
340
|
+
logger.log('INFO', `[Core Utils] Loaded PI master list from Firestore: ${Object.keys(investors).length} investors`);
|
|
311
341
|
}
|
|
312
342
|
|
|
313
|
-
const masterListData = masterListDoc.data();
|
|
314
|
-
const investors = masterListData.investors || {};
|
|
315
|
-
|
|
316
343
|
if (Object.keys(investors).length === 0) {
|
|
317
344
|
logger.log('WARN', `[Core Utils] Master list is empty. Returning empty array.`);
|
|
318
345
|
return [];
|
|
319
346
|
}
|
|
320
347
|
|
|
348
|
+
// =========================================================================
|
|
349
|
+
// FILTER: Check BigQuery for today's data to determine who needs updating
|
|
350
|
+
// =========================================================================
|
|
321
351
|
const targets = [];
|
|
322
352
|
let skippedCount = 0;
|
|
323
353
|
|
|
354
|
+
// Get today's data from BigQuery to check who's already updated
|
|
355
|
+
let todayPortfolioUsers = new Set();
|
|
356
|
+
let todayHistoryUsers = new Set();
|
|
357
|
+
let todaySocialUsers = new Set();
|
|
358
|
+
|
|
359
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
360
|
+
try {
|
|
361
|
+
const { queryPortfolioData, queryHistoryData, querySocialData } = require('../../core/utils/bigquery_utils');
|
|
362
|
+
|
|
363
|
+
// Query BigQuery for today's data (only for POPULAR_INVESTOR user type)
|
|
364
|
+
const [portfolioData, historyData, socialData] = await Promise.all([
|
|
365
|
+
queryPortfolioData(todayStr, null, ['POPULAR_INVESTOR'], logger).catch(() => null),
|
|
366
|
+
queryHistoryData(todayStr, null, ['POPULAR_INVESTOR'], logger).catch(() => null),
|
|
367
|
+
querySocialData(todayStr, null, ['POPULAR_INVESTOR'], logger).catch(() => null)
|
|
368
|
+
]);
|
|
369
|
+
|
|
370
|
+
if (portfolioData) {
|
|
371
|
+
todayPortfolioUsers = new Set(Object.keys(portfolioData));
|
|
372
|
+
}
|
|
373
|
+
if (historyData) {
|
|
374
|
+
todayHistoryUsers = new Set(Object.keys(historyData));
|
|
375
|
+
}
|
|
376
|
+
if (socialData) {
|
|
377
|
+
todaySocialUsers = new Set(Object.keys(socialData));
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
logger.log('INFO', `[Core Utils] BigQuery filter: ${todayPortfolioUsers.size} portfolios, ${todayHistoryUsers.size} histories, ${todaySocialUsers.size} social posts for ${todayStr}`);
|
|
381
|
+
} catch (bqError) {
|
|
382
|
+
logger.log('WARN', `[Core Utils] BigQuery filter query failed, using Firestore fallback: ${bqError.message}`);
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// Process each investor from master list
|
|
324
387
|
for (const [cid, piData] of Object.entries(investors)) {
|
|
325
388
|
const username = piData.username || String(cid);
|
|
326
389
|
|
|
327
|
-
//
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
390
|
+
// Check if already updated today (BigQuery first, then Firestore)
|
|
391
|
+
let components = {
|
|
392
|
+
portfolio: !todayPortfolioUsers.has(cid),
|
|
393
|
+
tradeHistory: !todayHistoryUsers.has(cid),
|
|
394
|
+
socialPosts: !todaySocialUsers.has(cid)
|
|
395
|
+
};
|
|
333
396
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
const
|
|
397
|
+
// If BigQuery didn't have data, check Firestore
|
|
398
|
+
if (todayPortfolioUsers.size === 0 && todayHistoryUsers.size === 0 && todaySocialUsers.size === 0) {
|
|
399
|
+
const piDocPath = `PopularInvestors/${cid}`;
|
|
400
|
+
const piDoc = await db.doc(piDocPath).get();
|
|
337
401
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
402
|
+
if (piDoc.exists) {
|
|
403
|
+
const data = piDoc.data();
|
|
404
|
+
const lastUpdated = data.lastUpdated || {};
|
|
405
|
+
|
|
406
|
+
components.portfolio = needsUpdate(lastUpdated.portfolio, todayStr);
|
|
407
|
+
components.tradeHistory = needsUpdate(lastUpdated.tradeHistory, todayStr);
|
|
408
|
+
components.socialPosts = needsUpdate(lastUpdated.socialPosts, todayStr);
|
|
409
|
+
}
|
|
341
410
|
}
|
|
342
411
|
|
|
343
412
|
// Only add if at least one component needs updating
|
|
@@ -348,7 +417,7 @@ async function getPopularInvestorsToUpdate(dependencies, config) {
|
|
|
348
417
|
}
|
|
349
418
|
}
|
|
350
419
|
|
|
351
|
-
logger.log('INFO', `[Core Utils] Found ${Object.keys(investors).length} Popular Investors in master list. Skipped ${skippedCount} (updated today). ${targets.length} queued for update.`);
|
|
420
|
+
logger.log('INFO', `[Core Utils] Found ${Object.keys(investors).length} Popular Investors in master list (${masterListSource}). Skipped ${skippedCount} (updated today). ${targets.length} queued for update.`);
|
|
352
421
|
return targets;
|
|
353
422
|
|
|
354
423
|
} catch (error) {
|
|
@@ -16,11 +16,24 @@ const SHARD_SIZE = 40;
|
|
|
16
16
|
* @returns {Promise<{success: boolean, message: string, instrumentsProcessed?: number}>}
|
|
17
17
|
*/
|
|
18
18
|
exports.fetchAndStorePrices = async (config, dependencies) => {
|
|
19
|
-
const { db, logger, headerManager, proxyManager, collectionRegistry } = dependencies;
|
|
19
|
+
const { db, logger, headerManager, proxyManager, collectionRegistry, calculationUtils } = dependencies;
|
|
20
20
|
logger.log('INFO', '[PriceFetcherHelpers] Starting Daily Closing Price Update...');
|
|
21
21
|
let selectedHeader = null;
|
|
22
22
|
let wasSuccessful = false;
|
|
23
23
|
|
|
24
|
+
// Load instrument mappings for ticker information (needed for BigQuery)
|
|
25
|
+
let instrumentMappings = null;
|
|
26
|
+
if (process.env.BIGQUERY_ENABLED !== 'false' && calculationUtils?.loadInstrumentMappings) {
|
|
27
|
+
try {
|
|
28
|
+
instrumentMappings = await calculationUtils.loadInstrumentMappings();
|
|
29
|
+
if (instrumentMappings?.instrumentToTicker) {
|
|
30
|
+
logger.log('INFO', `[PriceFetcherHelpers] Loaded ${Object.keys(instrumentMappings.instrumentToTicker).length} instrument mappings for ticker lookup`);
|
|
31
|
+
}
|
|
32
|
+
} catch (mappingError) {
|
|
33
|
+
logger.log('WARN', `[PriceFetcherHelpers] Failed to load instrument mappings: ${mappingError.message}. Ticker will be set to 'unknown_{instrumentId}'`);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
24
37
|
// Get collection names from registry if available, fallback to hardcoded
|
|
25
38
|
const { getCollectionPath } = collectionRegistry || {};
|
|
26
39
|
let priceCollectionName = 'asset_prices';
|
|
@@ -50,23 +63,82 @@ exports.fetchAndStorePrices = async (config, dependencies) => {
|
|
|
50
63
|
wasSuccessful = true;
|
|
51
64
|
const results = await response.json();
|
|
52
65
|
if (!Array.isArray(results)) { throw new Error('Invalid response format from API. Expected an array.'); }
|
|
53
|
-
logger.log('INFO', `[PriceFetcherHelpers] Received ${results.length} instrument prices.
|
|
54
|
-
|
|
66
|
+
logger.log('INFO', `[PriceFetcherHelpers] Received ${results.length} instrument prices. Processing for BigQuery...`);
|
|
67
|
+
|
|
68
|
+
// Transform daily prices to BigQuery rows
|
|
69
|
+
const fetchedAt = new Date().toISOString();
|
|
70
|
+
const bigqueryRows = [];
|
|
71
|
+
const shardUpdates = {}; // Keep for Firestore backward compatibility
|
|
72
|
+
|
|
55
73
|
for (const instrumentData of results) {
|
|
56
74
|
const dailyData = instrumentData?.ClosingPrices?.Daily;
|
|
57
75
|
const instrumentId = instrumentData.InstrumentId;
|
|
76
|
+
|
|
58
77
|
if (instrumentId && dailyData?.Price && dailyData?.Date) {
|
|
59
78
|
const instrumentIdStr = String(instrumentId);
|
|
60
|
-
const dateKey = dailyData.Date.substring(0, 10);
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
79
|
+
const dateKey = dailyData.Date.substring(0, 10); // Extract YYYY-MM-DD
|
|
80
|
+
|
|
81
|
+
// Get ticker from mappings if available
|
|
82
|
+
let ticker = `unknown_${instrumentId}`;
|
|
83
|
+
if (instrumentMappings?.instrumentToTicker?.[instrumentIdStr]) {
|
|
84
|
+
ticker = instrumentMappings.instrumentToTicker[instrumentIdStr];
|
|
85
|
+
} else if (instrumentData.Ticker) {
|
|
86
|
+
ticker = instrumentData.Ticker;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Prepare BigQuery row
|
|
90
|
+
bigqueryRows.push({
|
|
91
|
+
date: dateKey,
|
|
92
|
+
instrument_id: parseInt(instrumentId, 10),
|
|
93
|
+
ticker: ticker,
|
|
94
|
+
price: dailyData.Price,
|
|
95
|
+
open: null, // Daily API doesn't provide OHLC, only closing price
|
|
96
|
+
high: null,
|
|
97
|
+
low: null,
|
|
98
|
+
close: dailyData.Price,
|
|
99
|
+
volume: null,
|
|
100
|
+
fetched_at: fetchedAt
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
// Also prepare Firestore update for backward compatibility
|
|
104
|
+
if (process.env.FIRESTORE_PRICE_FETCH !== 'false') {
|
|
105
|
+
const shardId = `shard_${parseInt(instrumentIdStr, 10) % SHARD_SIZE}`;
|
|
106
|
+
if (!shardUpdates[shardId]) { shardUpdates[shardId] = {}; }
|
|
107
|
+
const pricePath = `${instrumentIdStr}.prices.${dateKey}`;
|
|
108
|
+
const updatePath = `${instrumentIdStr}.lastUpdated`;
|
|
109
|
+
shardUpdates[shardId][pricePath] = dailyData.Price;
|
|
110
|
+
shardUpdates[shardId][updatePath] = FieldValue.serverTimestamp();
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Write to BigQuery using load jobs (free, batched)
|
|
116
|
+
if (process.env.BIGQUERY_ENABLED !== 'false' && bigqueryRows.length > 0) {
|
|
117
|
+
try {
|
|
118
|
+
const { insertRows, ensureAssetPricesTable } = require('../../core/utils/bigquery_utils');
|
|
119
|
+
await ensureAssetPricesTable(logger);
|
|
120
|
+
|
|
121
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
122
|
+
await insertRows(datasetId, 'asset_prices', bigqueryRows, logger);
|
|
123
|
+
|
|
124
|
+
logger.log('INFO', `[PriceFetcherHelpers] Successfully stored ${bigqueryRows.length} daily price records to BigQuery`);
|
|
125
|
+
} catch (bqError) {
|
|
126
|
+
logger.log('ERROR', `[PriceFetcherHelpers] BigQuery write failed: ${bqError.message}`);
|
|
127
|
+
// Continue - don't fail the entire fetch for BigQuery errors
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Also write to Firestore for backward compatibility (if needed)
|
|
132
|
+
if (process.env.FIRESTORE_PRICE_FETCH !== 'false' && Object.keys(shardUpdates).length > 0) {
|
|
133
|
+
const batchPromises = [];
|
|
134
|
+
for (const shardId in shardUpdates) {
|
|
135
|
+
const docRef = db.collection(priceCollectionName).doc(shardId);
|
|
136
|
+
const payload = shardUpdates[shardId];
|
|
137
|
+
batchPromises.push(docRef.update(payload));
|
|
138
|
+
}
|
|
139
|
+
await Promise.all(batchPromises);
|
|
140
|
+
logger.log('INFO', `[PriceFetcherHelpers] Also stored prices to ${batchPromises.length} Firestore shards`);
|
|
141
|
+
}
|
|
70
142
|
|
|
71
143
|
// Extract all dates from the price data and create a date tracking document
|
|
72
144
|
const priceDatesSet = new Set();
|
|
@@ -133,6 +133,32 @@ exports.fetchAndStoreInsights = async (config, dependencies) => {
|
|
|
133
133
|
|
|
134
134
|
await docRef.set(firestorePayload);
|
|
135
135
|
|
|
136
|
+
// Write insights to BigQuery (one row per instrument)
|
|
137
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
138
|
+
try {
|
|
139
|
+
const { insertRows, ensureInstrumentInsightsTable } = require('../../core/utils/bigquery_utils');
|
|
140
|
+
await ensureInstrumentInsightsTable(logger);
|
|
141
|
+
|
|
142
|
+
const fetchedAt = new Date().toISOString();
|
|
143
|
+
const bigqueryRows = insightsData.map(insight => {
|
|
144
|
+
return {
|
|
145
|
+
date: today,
|
|
146
|
+
instrument_id: parseInt(insight.instrumentId, 10),
|
|
147
|
+
insights_data: insight, // Store full insight object as JSON
|
|
148
|
+
fetched_at: fetchedAt
|
|
149
|
+
};
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
153
|
+
await insertRows(datasetId, 'instrument_insights', bigqueryRows, logger);
|
|
154
|
+
|
|
155
|
+
logger.log('INFO', `[FetchInsightsHelpers] Successfully stored ${bigqueryRows.length} insight records to BigQuery`);
|
|
156
|
+
} catch (bqError) {
|
|
157
|
+
logger.log('WARN', `[FetchInsightsHelpers] BigQuery insights write failed: ${bqError.message}`);
|
|
158
|
+
// Continue - Firestore write succeeded
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
136
162
|
// Update root data indexer for today's date after insights data is stored
|
|
137
163
|
try {
|
|
138
164
|
const { runRootDataIndexer } = require('../../root-data-indexer/index');
|
|
@@ -315,6 +315,35 @@ async function fetchAndStorePopularInvestors(config, dependencies) {
|
|
|
315
315
|
|
|
316
316
|
logger.log('SUCCESS', `[PopularInvestorFetch] Stored ${data.TotalRows} rankings into ${finalRankingsCollectionName}/${today}${firestorePayload._compressed ? ' (compressed)' : ''}`);
|
|
317
317
|
|
|
318
|
+
// Write rankings to BigQuery (one row per PI)
|
|
319
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
320
|
+
try {
|
|
321
|
+
const { insertRows, ensurePIRankingsTable } = require('../../core/utils/bigquery_utils');
|
|
322
|
+
await ensurePIRankingsTable(logger);
|
|
323
|
+
|
|
324
|
+
const fetchedAt = new Date().toISOString();
|
|
325
|
+
const bigqueryRows = data.Items.map((item, index) => {
|
|
326
|
+
return {
|
|
327
|
+
date: today,
|
|
328
|
+
pi_id: parseInt(item.CustomerId, 10),
|
|
329
|
+
username: item.UserName || null,
|
|
330
|
+
rank: index + 1, // Rank is position in array (1-indexed)
|
|
331
|
+
category: item.Category || null,
|
|
332
|
+
rankings_data: item, // Store full item data as JSON
|
|
333
|
+
fetched_at: fetchedAt
|
|
334
|
+
};
|
|
335
|
+
});
|
|
336
|
+
|
|
337
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
338
|
+
await insertRows(datasetId, 'pi_rankings', bigqueryRows, logger);
|
|
339
|
+
|
|
340
|
+
logger.log('INFO', `[PopularInvestorFetch] Successfully stored ${bigqueryRows.length} ranking records to BigQuery`);
|
|
341
|
+
} catch (bqError) {
|
|
342
|
+
logger.log('WARN', `[PopularInvestorFetch] BigQuery rankings write failed: ${bqError.message}`);
|
|
343
|
+
// Continue - Firestore write succeeded
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
318
347
|
// Update the master list of Popular Investors
|
|
319
348
|
// Use batched writes to avoid 500 field transform limit
|
|
320
349
|
try {
|
|
@@ -426,6 +455,43 @@ async function fetchAndStorePopularInvestors(config, dependencies) {
|
|
|
426
455
|
}
|
|
427
456
|
|
|
428
457
|
logger.log('SUCCESS', `[PopularInvestorFetch] Updated master list: ${newInvestorsCount} new, ${updatedInvestorsCount} updated. Total unique PIs: ${Object.keys({ ...existingInvestors, ...investorsToUpdate }).length}`);
|
|
458
|
+
|
|
459
|
+
// Write master list updates to BigQuery
|
|
460
|
+
if (process.env.BIGQUERY_ENABLED !== 'false' && Object.keys(investorsToUpdate).length > 0) {
|
|
461
|
+
try {
|
|
462
|
+
const { insertRowsWithMerge, ensurePIMasterListTable } = require('../../core/utils/bigquery_utils');
|
|
463
|
+
await ensurePIMasterListTable(logger);
|
|
464
|
+
|
|
465
|
+
const now = new Date().toISOString();
|
|
466
|
+
const bigqueryRows = Object.entries(investorsToUpdate).map(([cid, investorData]) => {
|
|
467
|
+
// Handle Firestore Timestamp objects
|
|
468
|
+
const convertTimestamp = (ts) => {
|
|
469
|
+
if (!ts) return now;
|
|
470
|
+
if (ts instanceof Date) return ts.toISOString();
|
|
471
|
+
if (ts.toDate && typeof ts.toDate === 'function') return ts.toDate().toISOString();
|
|
472
|
+
if (typeof ts === 'string') return ts;
|
|
473
|
+
return now;
|
|
474
|
+
};
|
|
475
|
+
|
|
476
|
+
return {
|
|
477
|
+
cid: parseInt(cid, 10),
|
|
478
|
+
username: investorData.username,
|
|
479
|
+
first_seen_at: convertTimestamp(investorData.firstSeenAt),
|
|
480
|
+
last_seen_at: convertTimestamp(investorData.lastSeenAt),
|
|
481
|
+
last_updated: now
|
|
482
|
+
};
|
|
483
|
+
});
|
|
484
|
+
|
|
485
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
486
|
+
// Use MERGE to update existing records or insert new ones
|
|
487
|
+
await insertRowsWithMerge(datasetId, 'pi_master_list', bigqueryRows, ['cid'], logger);
|
|
488
|
+
|
|
489
|
+
logger.log('INFO', `[PopularInvestorFetch] Successfully stored ${bigqueryRows.length} master list records to BigQuery`);
|
|
490
|
+
} catch (bqError) {
|
|
491
|
+
logger.log('WARN', `[PopularInvestorFetch] BigQuery master list write failed: ${bqError.message}`);
|
|
492
|
+
// Continue - Firestore write succeeded
|
|
493
|
+
}
|
|
494
|
+
}
|
|
429
495
|
} catch (masterListError) {
|
|
430
496
|
logger.log('WARN', `[PopularInvestorFetch] Failed to update master list: ${masterListError.message}`);
|
|
431
497
|
// Non-critical, continue
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Backfill Instrument Insights from Firestore to BigQuery
|
|
3
|
+
*
|
|
4
|
+
* This function reads existing insights data from Firestore
|
|
5
|
+
* and writes it to BigQuery table.
|
|
6
|
+
*
|
|
7
|
+
* Usage (Local Node.js script):
|
|
8
|
+
* node index.js --startDate=2024-01-01 --endDate=2024-12-31
|
|
9
|
+
* node index.js (backfills all dates)
|
|
10
|
+
*
|
|
11
|
+
* Features:
|
|
12
|
+
* - Backfills insights for date range (or all dates)
|
|
13
|
+
* - Uses load jobs (free) for efficient batching
|
|
14
|
+
* - Handles compressed Firestore data
|
|
15
|
+
* - Does NOT delete any Firestore data
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
const { Firestore } = require('@google-cloud/firestore');
|
|
19
|
+
const zlib = require('zlib');
|
|
20
|
+
const {
|
|
21
|
+
ensureInstrumentInsightsTable,
|
|
22
|
+
insertRows
|
|
23
|
+
} = require('../../core/utils/bigquery_utils');
|
|
24
|
+
|
|
25
|
+
const db = new Firestore();
|
|
26
|
+
|
|
27
|
+
// Helper to decompress Firestore data
|
|
28
|
+
function tryDecompress(data) {
|
|
29
|
+
if (!data) return null;
|
|
30
|
+
if (data._compressed && data.payload) {
|
|
31
|
+
try {
|
|
32
|
+
const buffer = Buffer.from(data.payload);
|
|
33
|
+
return JSON.parse(zlib.gunzipSync(buffer).toString('utf8'));
|
|
34
|
+
} catch (e) {
|
|
35
|
+
console.error(`[Backfill] Decompression failed: ${e.message}`);
|
|
36
|
+
return data;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return data;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Backfill insights from Firestore to BigQuery for a date range
|
|
44
|
+
*/
|
|
45
|
+
async function backfillInsights(startDate = null, endDate = null, logger = console) {
|
|
46
|
+
logger.log('INFO', `[Backfill] Starting insights backfill${startDate && endDate ? ` from ${startDate} to ${endDate}` : ' (all dates)'}...`);
|
|
47
|
+
|
|
48
|
+
try {
|
|
49
|
+
await ensureInstrumentInsightsTable(logger);
|
|
50
|
+
|
|
51
|
+
const insightsCollection = db.collection('daily_instrument_insights');
|
|
52
|
+
|
|
53
|
+
// Get all insights documents
|
|
54
|
+
logger.log('INFO', '[Backfill] Fetching insights documents from Firestore...');
|
|
55
|
+
const snapshot = await insightsCollection.get();
|
|
56
|
+
|
|
57
|
+
if (snapshot.empty) {
|
|
58
|
+
logger.log('WARN', '[Backfill] No insights documents found in Firestore');
|
|
59
|
+
return { success: false, message: 'No insights found' };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
logger.log('INFO', `[Backfill] Found ${snapshot.size} insights documents`);
|
|
63
|
+
|
|
64
|
+
let totalRows = 0;
|
|
65
|
+
let processedDates = 0;
|
|
66
|
+
let skippedDates = 0;
|
|
67
|
+
|
|
68
|
+
// Process each document
|
|
69
|
+
for (const doc of snapshot.docs) {
|
|
70
|
+
const dateStr = doc.id; // Document ID is the date (YYYY-MM-DD)
|
|
71
|
+
|
|
72
|
+
// Filter by date range if provided
|
|
73
|
+
if (startDate && endDate) {
|
|
74
|
+
if (dateStr < startDate || dateStr > endDate) {
|
|
75
|
+
skippedDates++;
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
try {
|
|
81
|
+
const data = tryDecompress(doc.data());
|
|
82
|
+
const insights = data.insights || [];
|
|
83
|
+
|
|
84
|
+
if (insights.length === 0) {
|
|
85
|
+
logger.log('WARN', `[Backfill] No insights in document for ${dateStr}`);
|
|
86
|
+
skippedDates++;
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Transform to BigQuery rows
|
|
91
|
+
const fetchedAt = data.fetchedAt
|
|
92
|
+
? (data.fetchedAt.toDate ? data.fetchedAt.toDate().toISOString() : data.fetchedAt)
|
|
93
|
+
: new Date().toISOString();
|
|
94
|
+
|
|
95
|
+
const bigqueryRows = insights.map(insight => {
|
|
96
|
+
return {
|
|
97
|
+
date: dateStr,
|
|
98
|
+
instrument_id: parseInt(insight.instrumentId, 10),
|
|
99
|
+
insights_data: insight, // Store full insight object as JSON
|
|
100
|
+
fetched_at: fetchedAt
|
|
101
|
+
};
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
// Write to BigQuery using load jobs (free, batched)
|
|
105
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
106
|
+
await insertRows(datasetId, 'instrument_insights', bigqueryRows, logger);
|
|
107
|
+
|
|
108
|
+
totalRows += bigqueryRows.length;
|
|
109
|
+
processedDates++;
|
|
110
|
+
|
|
111
|
+
if (processedDates % 10 === 0) {
|
|
112
|
+
logger.log('INFO', `[Backfill] Processed ${processedDates} dates, ${totalRows} rows so far...`);
|
|
113
|
+
}
|
|
114
|
+
} catch (dateError) {
|
|
115
|
+
logger.log('ERROR', `[Backfill] Failed to process insights for ${dateStr}: ${dateError.message}`);
|
|
116
|
+
skippedDates++;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
logger.log('SUCCESS', `[Backfill] ✅ Insights backfill complete: ${processedDates} dates processed, ${totalRows} rows, ${skippedDates} skipped`);
|
|
121
|
+
|
|
122
|
+
return { success: true, processedDates, totalRows, skippedDates };
|
|
123
|
+
} catch (error) {
|
|
124
|
+
logger.log('ERROR', `[Backfill] Insights backfill failed: ${error.message}`);
|
|
125
|
+
throw error;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Main entry point
|
|
131
|
+
*/
|
|
132
|
+
async function backfillInstrumentInsights(startDate = null, endDate = null) {
|
|
133
|
+
const logger = {
|
|
134
|
+
log: (level, message, ...args) => {
|
|
135
|
+
const timestamp = new Date().toISOString();
|
|
136
|
+
console.log(`[${timestamp}] [${level}] ${message}`, ...args);
|
|
137
|
+
}
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
logger.log('INFO', '[Backfill] Starting Instrument Insights backfill...');
|
|
141
|
+
|
|
142
|
+
try {
|
|
143
|
+
const result = await backfillInsights(startDate, endDate, logger);
|
|
144
|
+
|
|
145
|
+
logger.log('SUCCESS', '[Backfill] ✅ All backfills completed!');
|
|
146
|
+
return result;
|
|
147
|
+
} catch (error) {
|
|
148
|
+
logger.log('ERROR', `[Backfill] Fatal error: ${error.message}`);
|
|
149
|
+
throw error;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// CLI handling
|
|
154
|
+
if (require.main === module) {
|
|
155
|
+
const args = process.argv.slice(2);
|
|
156
|
+
|
|
157
|
+
let startDate = null;
|
|
158
|
+
let endDate = null;
|
|
159
|
+
|
|
160
|
+
args.forEach(arg => {
|
|
161
|
+
if (arg.startsWith('--startDate=')) {
|
|
162
|
+
startDate = arg.split('=')[1];
|
|
163
|
+
} else if (arg.startsWith('--endDate=')) {
|
|
164
|
+
endDate = arg.split('=')[1];
|
|
165
|
+
}
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
backfillInstrumentInsights(startDate, endDate)
|
|
169
|
+
.then(result => {
|
|
170
|
+
console.log('\n✅ Backfill completed successfully!');
|
|
171
|
+
console.log('Results:', JSON.stringify(result, null, 2));
|
|
172
|
+
process.exit(0);
|
|
173
|
+
})
|
|
174
|
+
.catch(error => {
|
|
175
|
+
console.error('\n❌ Backfill failed:', error);
|
|
176
|
+
process.exit(1);
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
module.exports = { backfillInstrumentInsights, backfillInsights };
|