bulltrackers-module 1.0.723 → 1.0.725
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/context/ManifestBuilder.js +0 -2
- package/functions/computation-system/executors/MetaExecutor.js +1 -1
- package/functions/etoro-price-fetcher/helpers/handler_helpers.js +5 -4
- package/functions/fetch-insights/helpers/handler_helpers.js +5 -4
- package/functions/fetch-popular-investors/helpers/fetch_helpers.js +30 -1
- package/functions/maintenance/backfill-pi-master-list-rankings/index.js +3 -2
- package/functions/price-backfill/helpers/handler_helpers.js +5 -4
- package/functions/task-engine/helpers/data_storage_helpers.js +24 -13
- package/functions/task-engine/utils/bigquery_batch_manager.js +34 -83
- package/index.js +3 -13
- package/package.json +1 -1
|
@@ -231,7 +231,6 @@ function buildManifest(productLinesToRun = [], calculations) {
|
|
|
231
231
|
const manifestEntry = {
|
|
232
232
|
name: normalizedName,
|
|
233
233
|
class: Class,
|
|
234
|
-
// [CHANGED] Strictly use the folderName as the category.
|
|
235
234
|
category: folderName,
|
|
236
235
|
sourcePackage: folderName,
|
|
237
236
|
type: metadata.type,
|
|
@@ -247,7 +246,6 @@ function buildManifest(productLinesToRun = [], calculations) {
|
|
|
247
246
|
userType: metadata.userType,
|
|
248
247
|
dependencies: dependencies,
|
|
249
248
|
schedule: metadata.schedule || null,
|
|
250
|
-
// [NEW] Added TTL Policy to Manifest
|
|
251
249
|
ttlDays: metadata.ttlDays,
|
|
252
250
|
pass: 0,
|
|
253
251
|
hash: intrinsicHash,
|
|
@@ -130,7 +130,7 @@ class MetaExecutor {
|
|
|
130
130
|
previousComputedDependencies: prevDeps, config, deps,
|
|
131
131
|
allRankings: rankings,
|
|
132
132
|
allRankingsYesterday: rankingsYesterday,
|
|
133
|
-
piMasterList,
|
|
133
|
+
piMasterList,
|
|
134
134
|
...variableRoots,
|
|
135
135
|
seriesData
|
|
136
136
|
};
|
|
@@ -112,16 +112,17 @@ exports.fetchAndStorePrices = async (config, dependencies) => {
|
|
|
112
112
|
}
|
|
113
113
|
}
|
|
114
114
|
|
|
115
|
-
// Write to BigQuery using
|
|
115
|
+
// Write to BigQuery using MERGE (prevents duplicates if fetcher runs twice on same day)
|
|
116
116
|
if (process.env.BIGQUERY_ENABLED !== 'false' && bigqueryRows.length > 0) {
|
|
117
117
|
try {
|
|
118
|
-
const {
|
|
118
|
+
const { insertRowsWithMerge, ensureAssetPricesTable } = require('../../core/utils/bigquery_utils');
|
|
119
119
|
await ensureAssetPricesTable(logger);
|
|
120
120
|
|
|
121
121
|
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
122
|
-
|
|
122
|
+
// Use MERGE with dedup keys to prevent duplicates on re-runs
|
|
123
|
+
await insertRowsWithMerge(datasetId, 'asset_prices', bigqueryRows, ['date', 'instrument_id'], logger);
|
|
123
124
|
|
|
124
|
-
logger.log('INFO', `[PriceFetcherHelpers] Successfully stored ${bigqueryRows.length} daily price records to BigQuery`);
|
|
125
|
+
logger.log('INFO', `[PriceFetcherHelpers] Successfully stored ${bigqueryRows.length} daily price records to BigQuery [MERGE]`);
|
|
125
126
|
} catch (bqError) {
|
|
126
127
|
logger.log('ERROR', `[PriceFetcherHelpers] BigQuery write failed: ${bqError.message}`);
|
|
127
128
|
// Continue - don't fail the entire fetch for BigQuery errors
|
|
@@ -90,10 +90,10 @@ exports.fetchAndStoreInsights = async (config, dependencies) => {
|
|
|
90
90
|
let storageSuccess = false;
|
|
91
91
|
let storageMethod = 'NONE';
|
|
92
92
|
|
|
93
|
-
// 1. Attempt BigQuery Write
|
|
93
|
+
// 1. Attempt BigQuery Write (using MERGE to prevent duplicates on re-runs)
|
|
94
94
|
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
95
95
|
try {
|
|
96
|
-
const {
|
|
96
|
+
const { insertRowsWithMerge, ensureInstrumentInsightsTable } = require('../../core/utils/bigquery_utils');
|
|
97
97
|
await ensureInstrumentInsightsTable(logger);
|
|
98
98
|
|
|
99
99
|
const fetchedAt = new Date().toISOString();
|
|
@@ -107,9 +107,10 @@ exports.fetchAndStoreInsights = async (config, dependencies) => {
|
|
|
107
107
|
});
|
|
108
108
|
|
|
109
109
|
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
110
|
-
|
|
110
|
+
// Use MERGE with dedup keys to prevent duplicates if fetch runs twice on same day
|
|
111
|
+
await insertRowsWithMerge(datasetId, 'instrument_insights', bigqueryRows, ['date', 'instrument_id'], logger);
|
|
111
112
|
|
|
112
|
-
logger.log('INFO', `[FetchInsightsHelpers] ✅ Successfully stored ${bigqueryRows.length} insight records to BigQuery.`);
|
|
113
|
+
logger.log('INFO', `[FetchInsightsHelpers] ✅ Successfully stored ${bigqueryRows.length} insight records to BigQuery [MERGE].`);
|
|
113
114
|
storageSuccess = true;
|
|
114
115
|
storageMethod = 'BIGQUERY';
|
|
115
116
|
} catch (bqError) {
|
|
@@ -124,19 +124,48 @@ async function fetchAndStorePopularInvestors(config, dependencies) {
|
|
|
124
124
|
const fetchedCids = new Set(data.Items.map(item => String(item.CustomerId)));
|
|
125
125
|
const knownCids = Object.keys(bqMasterList);
|
|
126
126
|
const missingCids = knownCids.filter(cid => !fetchedCids.has(cid));
|
|
127
|
+
|
|
128
|
+
logger.log('INFO', `[PopularInvestorFetch] Comparison: API returned ${fetchedCids.size} PIs, Master List has ${knownCids.length} known PIs, ${missingCids.length} missing from API response`);
|
|
127
129
|
|
|
128
130
|
if (missingCids.length > 0) {
|
|
131
|
+
// Log the missing users with their usernames for visibility
|
|
132
|
+
const missingUsernames = missingCids.map(cid => {
|
|
133
|
+
const userData = bqMasterList[cid];
|
|
134
|
+
return userData?.username || `unknown_${cid}`;
|
|
135
|
+
});
|
|
129
136
|
logger.log('INFO', `[PopularInvestorFetch] Found ${missingCids.length} missing users. Fetching individually...`);
|
|
137
|
+
logger.log('INFO', `[PopularInvestorFetch] Missing users: ${missingUsernames.slice(0, 20).join(', ')}${missingCids.length > 20 ? ` ... and ${missingCids.length - 20} more` : ''}`);
|
|
138
|
+
|
|
130
139
|
const { header } = await headerManager.selectHeader();
|
|
131
140
|
|
|
132
|
-
|
|
141
|
+
let successCount = 0;
|
|
142
|
+
let failCount = 0;
|
|
143
|
+
|
|
144
|
+
for (let i = 0; i < missingCids.length; i++) {
|
|
145
|
+
const cid = missingCids[i];
|
|
146
|
+
const username = bqMasterList[cid]?.username || `unknown_${cid}`;
|
|
147
|
+
|
|
148
|
+
// Log progress every 10 users or at start
|
|
149
|
+
if (i === 0 || (i + 1) % 10 === 0) {
|
|
150
|
+
logger.log('INFO', `[PopularInvestorFetch] Fetching missing user ${i + 1}/${missingCids.length}: ${username} (CID: ${cid})`);
|
|
151
|
+
}
|
|
152
|
+
|
|
133
153
|
const userData = await fetchIndividualUserRankings(cid, { ...header, 'Referer': 'https://www.etoro.com/' }, proxyManager, logger);
|
|
134
154
|
if (userData) {
|
|
135
155
|
data.Items.push(userData);
|
|
136
156
|
data.TotalRows++;
|
|
157
|
+
successCount++;
|
|
158
|
+
logger.log('TRACE', `[PopularInvestorFetch] ✅ Successfully fetched ${username} (CID: ${cid})`);
|
|
159
|
+
} else {
|
|
160
|
+
failCount++;
|
|
161
|
+
logger.log('WARN', `[PopularInvestorFetch] ❌ Failed to fetch ${username} (CID: ${cid})`);
|
|
137
162
|
}
|
|
138
163
|
await new Promise(r => setTimeout(r, 200)); // Rate limit
|
|
139
164
|
}
|
|
165
|
+
|
|
166
|
+
logger.log('INFO', `[PopularInvestorFetch] Missing user fetch complete: ${successCount} recovered, ${failCount} failed`);
|
|
167
|
+
} else {
|
|
168
|
+
logger.log('INFO', `[PopularInvestorFetch] No missing users - all ${knownCids.length} known PIs were returned by the API`);
|
|
140
169
|
}
|
|
141
170
|
} catch (e) {
|
|
142
171
|
logger.log('WARN', `[PopularInvestorFetch] Failed to check master list: ${e.message}`);
|
|
@@ -191,9 +191,10 @@ async function backfillRankings(startDate = null, endDate = null, logger = conso
|
|
|
191
191
|
};
|
|
192
192
|
});
|
|
193
193
|
|
|
194
|
-
// Write to BigQuery using
|
|
194
|
+
// Write to BigQuery using MERGE (prevents duplicates if backfill is re-run)
|
|
195
195
|
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
196
|
-
|
|
196
|
+
// Use MERGE with dedup keys to prevent duplicates on re-runs
|
|
197
|
+
await insertRowsWithMerge(datasetId, 'pi_rankings', bigqueryRows, ['date', 'pi_id'], logger);
|
|
197
198
|
|
|
198
199
|
totalRows += bigqueryRows.length;
|
|
199
200
|
processedDates++;
|
|
@@ -68,16 +68,17 @@ exports.runBackfillAssetPrices = async (config, dependencies) => {
|
|
|
68
68
|
};
|
|
69
69
|
});
|
|
70
70
|
|
|
71
|
-
// Write to BigQuery using
|
|
71
|
+
// Write to BigQuery using MERGE (prevents duplicates if backfill is re-run)
|
|
72
72
|
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
73
73
|
try {
|
|
74
|
-
const {
|
|
74
|
+
const { insertRowsWithMerge, ensureAssetPricesTable } = require('../../core/utils/bigquery_utils');
|
|
75
75
|
await ensureAssetPricesTable(logger);
|
|
76
76
|
|
|
77
77
|
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
78
|
-
|
|
78
|
+
// Use MERGE with dedup keys to prevent duplicates on re-runs
|
|
79
|
+
await insertRowsWithMerge(datasetId, 'asset_prices', bigqueryRows, ['date', 'instrument_id'], logger);
|
|
79
80
|
|
|
80
|
-
logger.log('TRACE', `[PriceBackfill] Successfully stored ${bigqueryRows.length} price records for ${ticker} (${instrumentId}) to BigQuery`);
|
|
81
|
+
logger.log('TRACE', `[PriceBackfill] Successfully stored ${bigqueryRows.length} price records for ${ticker} (${instrumentId}) to BigQuery [MERGE]`);
|
|
81
82
|
} catch (bqError) {
|
|
82
83
|
logger.log('ERROR', `[PriceBackfill] BigQuery write failed for ${ticker} (${instrumentId}): ${bqError.message}`);
|
|
83
84
|
// Continue - don't fail the entire backfill for one instrument
|
|
@@ -14,9 +14,14 @@ const {
|
|
|
14
14
|
ensurePortfolioSnapshotsTable,
|
|
15
15
|
ensureTradeHistorySnapshotsTable,
|
|
16
16
|
ensureSocialPostSnapshotsTable,
|
|
17
|
-
|
|
17
|
+
insertRowsWithMerge
|
|
18
18
|
} = require('../../core/utils/bigquery_utils');
|
|
19
19
|
|
|
20
|
+
// Deduplication keys for BigQuery MERGE operations
|
|
21
|
+
const PORTFOLIO_DEDUP_KEYS = ['date', 'user_id', 'user_type'];
|
|
22
|
+
const HISTORY_DEDUP_KEYS = ['date', 'user_id', 'user_type'];
|
|
23
|
+
const SOCIAL_DEDUP_KEYS = ['date', 'user_id', 'user_type'];
|
|
24
|
+
|
|
20
25
|
/**
|
|
21
26
|
* Store portfolio data for a signed-in user
|
|
22
27
|
* @param {object} params - Parameters
|
|
@@ -47,9 +52,10 @@ async function storeSignedInUserPortfolio({ db, logger, collectionRegistry, cid,
|
|
|
47
52
|
await bigqueryBatchManager.addPortfolioRow(row);
|
|
48
53
|
} else {
|
|
49
54
|
// Direct write (fallback for when batch manager not available)
|
|
55
|
+
// Uses MERGE to prevent duplicates if user is processed twice on same day
|
|
50
56
|
await ensurePortfolioSnapshotsTable(logger);
|
|
51
|
-
await
|
|
52
|
-
logger.log('INFO', `[DataStorage] ✅ Wrote portfolio to BigQuery for signed-in user ${cid} (date: ${date})`);
|
|
57
|
+
await insertRowsWithMerge(datasetId, 'portfolio_snapshots', [row], PORTFOLIO_DEDUP_KEYS, logger);
|
|
58
|
+
logger.log('INFO', `[DataStorage] ✅ Wrote portfolio to BigQuery for signed-in user ${cid} (date: ${date}) [MERGE]`);
|
|
53
59
|
}
|
|
54
60
|
} catch (bqError) {
|
|
55
61
|
logger.log('WARN', `[DataStorage] BigQuery write failed for signed-in user ${cid} (${date}): ${bqError.message}`);
|
|
@@ -106,9 +112,10 @@ async function storeSignedInUserTradeHistory({ db, logger, collectionRegistry, c
|
|
|
106
112
|
await bigqueryBatchManager.addHistoryRow(row);
|
|
107
113
|
} else {
|
|
108
114
|
// Direct write (fallback for when batch manager not available)
|
|
115
|
+
// Uses MERGE to prevent duplicates if user is processed twice on same day
|
|
109
116
|
await ensureTradeHistorySnapshotsTable(logger);
|
|
110
|
-
await
|
|
111
|
-
logger.log('INFO', `[DataStorage] ✅ Wrote trade history to BigQuery for signed-in user ${cid} (date: ${date})`);
|
|
117
|
+
await insertRowsWithMerge(datasetId, 'trade_history_snapshots', [row], HISTORY_DEDUP_KEYS, logger);
|
|
118
|
+
logger.log('INFO', `[DataStorage] ✅ Wrote trade history to BigQuery for signed-in user ${cid} (date: ${date}) [MERGE]`);
|
|
112
119
|
}
|
|
113
120
|
} catch (bqError) {
|
|
114
121
|
logger.log('WARN', `[DataStorage] BigQuery write failed for signed-in user ${cid} (${date}): ${bqError.message}`);
|
|
@@ -169,8 +176,9 @@ async function storeSignedInUserSocialPosts({ db, logger, collectionRegistry, ci
|
|
|
169
176
|
fetched_at: new Date().toISOString()
|
|
170
177
|
};
|
|
171
178
|
|
|
172
|
-
|
|
173
|
-
|
|
179
|
+
// Uses MERGE to prevent duplicates if user is processed twice on same day
|
|
180
|
+
await insertRowsWithMerge(datasetId, 'social_post_snapshots', [row], SOCIAL_DEDUP_KEYS, logger);
|
|
181
|
+
logger.log('INFO', `[DataStorage] ✅ Wrote social posts to BigQuery for signed-in user ${cid} (date: ${date}, ${posts.length} posts) [MERGE]`);
|
|
174
182
|
} catch (bqError) {
|
|
175
183
|
logger.log('WARN', `[DataStorage] BigQuery write failed for signed-in user ${cid} (${date}): ${bqError.message}`);
|
|
176
184
|
// Continue to Firestore write (fallback)
|
|
@@ -258,9 +266,10 @@ async function storePopularInvestorPortfolio({ db, logger, collectionRegistry, c
|
|
|
258
266
|
await bigqueryBatchManager.addPortfolioRow(row);
|
|
259
267
|
} else {
|
|
260
268
|
// Direct write (fallback for when batch manager not available)
|
|
269
|
+
// Uses MERGE to prevent duplicates if PI is processed twice on same day
|
|
261
270
|
await ensurePortfolioSnapshotsTable(logger);
|
|
262
|
-
await
|
|
263
|
-
logger.log('INFO', `[DataStorage] ✅ Wrote portfolio to BigQuery for PI ${cid} (date: ${date})`);
|
|
271
|
+
await insertRowsWithMerge(datasetId, 'portfolio_snapshots', [row], PORTFOLIO_DEDUP_KEYS, logger);
|
|
272
|
+
logger.log('INFO', `[DataStorage] ✅ Wrote portfolio to BigQuery for PI ${cid} (date: ${date}) [MERGE]`);
|
|
264
273
|
}
|
|
265
274
|
} catch (bqError) {
|
|
266
275
|
logger.log('WARN', `[DataStorage] BigQuery write failed for PI ${cid} (${date}): ${bqError.message}`);
|
|
@@ -316,9 +325,10 @@ async function storePopularInvestorTradeHistory({ db, logger, collectionRegistry
|
|
|
316
325
|
await bigqueryBatchManager.addHistoryRow(row);
|
|
317
326
|
} else {
|
|
318
327
|
// Direct write (fallback for when batch manager not available)
|
|
328
|
+
// Uses MERGE to prevent duplicates if PI is processed twice on same day
|
|
319
329
|
await ensureTradeHistorySnapshotsTable(logger);
|
|
320
|
-
await
|
|
321
|
-
logger.log('INFO', `[DataStorage] ✅ Wrote trade history to BigQuery for PI ${cid} (date: ${date})`);
|
|
330
|
+
await insertRowsWithMerge(datasetId, 'trade_history_snapshots', [row], HISTORY_DEDUP_KEYS, logger);
|
|
331
|
+
logger.log('INFO', `[DataStorage] ✅ Wrote trade history to BigQuery for PI ${cid} (date: ${date}) [MERGE]`);
|
|
322
332
|
}
|
|
323
333
|
} catch (bqError) {
|
|
324
334
|
logger.log('WARN', `[DataStorage] BigQuery write failed for PI ${cid} (${date}): ${bqError.message}`);
|
|
@@ -382,9 +392,10 @@ async function storePopularInvestorSocialPosts({ db, logger, collectionRegistry,
|
|
|
382
392
|
await bigqueryBatchManager.addSocialRow(row);
|
|
383
393
|
} else {
|
|
384
394
|
// Direct write (fallback for when batch manager not available)
|
|
395
|
+
// Uses MERGE to prevent duplicates if PI is processed twice on same day
|
|
385
396
|
await ensureSocialPostSnapshotsTable(logger);
|
|
386
|
-
await
|
|
387
|
-
logger.log('INFO', `[DataStorage] ✅ Wrote social posts to BigQuery for PI ${cid} (date: ${date}, ${posts.length} posts)`);
|
|
397
|
+
await insertRowsWithMerge(datasetId, 'social_post_snapshots', [row], SOCIAL_DEDUP_KEYS, logger);
|
|
398
|
+
logger.log('INFO', `[DataStorage] ✅ Wrote social posts to BigQuery for PI ${cid} (date: ${date}, ${posts.length} posts) [MERGE]`);
|
|
388
399
|
}
|
|
389
400
|
} catch (bqError) {
|
|
390
401
|
logger.log('WARN', `[DataStorage] BigQuery write failed for PI ${cid} (${date}): ${bqError.message}`);
|
|
@@ -1,27 +1,30 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview BigQuery Batch Manager for Task Engine
|
|
3
3
|
*
|
|
4
|
-
* Batches BigQuery writes and flushes them using
|
|
4
|
+
* Batches BigQuery writes and flushes them using MERGE operations.
|
|
5
5
|
* Integrates with FirestoreBatchManager to flush together.
|
|
6
6
|
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
* -
|
|
12
|
-
* -
|
|
7
|
+
* UPDATED: Now uses insertRowsWithMerge to prevent duplicate rows
|
|
8
|
+
* when a user is processed twice on the same day.
|
|
9
|
+
*
|
|
10
|
+
* Deduplication keys:
|
|
11
|
+
* - portfolio_snapshots: ['date', 'user_id', 'user_type']
|
|
12
|
+
* - trade_history_snapshots: ['date', 'user_id', 'user_type']
|
|
13
|
+
* - social_post_snapshots: ['date', 'user_id', 'user_type']
|
|
13
14
|
*/
|
|
14
15
|
|
|
15
|
-
const fs = require('fs');
|
|
16
|
-
const path = require('path');
|
|
17
|
-
const os = require('os');
|
|
18
16
|
const {
|
|
19
|
-
getOrCreateDataset,
|
|
20
17
|
ensurePortfolioSnapshotsTable,
|
|
21
18
|
ensureTradeHistorySnapshotsTable,
|
|
22
|
-
ensureSocialPostSnapshotsTable
|
|
19
|
+
ensureSocialPostSnapshotsTable,
|
|
20
|
+
insertRowsWithMerge
|
|
23
21
|
} = require('../../core/utils/bigquery_utils');
|
|
24
22
|
|
|
23
|
+
// Deduplication keys for each table type
|
|
24
|
+
const PORTFOLIO_DEDUP_KEYS = ['date', 'user_id', 'user_type'];
|
|
25
|
+
const HISTORY_DEDUP_KEYS = ['date', 'user_id', 'user_type'];
|
|
26
|
+
const SOCIAL_DEDUP_KEYS = ['date', 'user_id', 'user_type'];
|
|
27
|
+
|
|
25
28
|
class BigQueryBatchManager {
|
|
26
29
|
constructor(logger) {
|
|
27
30
|
this.logger = logger;
|
|
@@ -74,83 +77,31 @@ class BigQueryBatchManager {
|
|
|
74
77
|
}
|
|
75
78
|
|
|
76
79
|
/**
|
|
77
|
-
* Flush a buffer to BigQuery using
|
|
78
|
-
* Uses
|
|
80
|
+
* Flush a buffer to BigQuery using MERGE operation
|
|
81
|
+
* Uses insertRowsWithMerge to prevent duplicates when a user is processed twice on same day
|
|
79
82
|
*/
|
|
80
|
-
async _flushBuffer(buffer, tableId, tableName) {
|
|
83
|
+
async _flushBuffer(buffer, tableId, tableName, dedupKeys) {
|
|
81
84
|
if (buffer.length === 0) return 0;
|
|
82
85
|
|
|
83
86
|
const rows = [...buffer]; // Copy buffer
|
|
84
87
|
buffer.length = 0; // Clear buffer
|
|
85
88
|
|
|
86
89
|
try {
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
90
|
+
// Use insertRowsWithMerge for deduplication
|
|
91
|
+
// This prevents duplicate rows if a user is processed twice on the same day
|
|
92
|
+
const rowsInserted = await insertRowsWithMerge(
|
|
93
|
+
this.datasetId,
|
|
94
|
+
tableId,
|
|
95
|
+
rows,
|
|
96
|
+
dedupKeys,
|
|
97
|
+
this.logger
|
|
98
|
+
);
|
|
93
99
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
// Create load job (FREE) - using createLoadJob as learned from backfill
|
|
98
|
-
const [job] = await table.createLoadJob(tempFile, {
|
|
99
|
-
sourceFormat: 'NEWLINE_DELIMITED_JSON',
|
|
100
|
-
writeDisposition: 'WRITE_APPEND',
|
|
101
|
-
autodetect: false // Use existing table schema
|
|
102
|
-
});
|
|
103
|
-
|
|
104
|
-
// Wait for job to complete using polling (as learned from backfill)
|
|
105
|
-
let jobMetadata;
|
|
106
|
-
const maxAttempts = 60; // 5 minutes max (5 second intervals)
|
|
107
|
-
const pollInterval = 5000; // 5 seconds
|
|
108
|
-
|
|
109
|
-
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
110
|
-
[jobMetadata] = await job.getMetadata();
|
|
111
|
-
const state = jobMetadata.status?.state;
|
|
112
|
-
|
|
113
|
-
if (state === 'DONE') {
|
|
114
|
-
break;
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
if (state === 'PENDING' || state === 'RUNNING') {
|
|
118
|
-
// Wait before next poll
|
|
119
|
-
await new Promise(resolve => setTimeout(resolve, pollInterval));
|
|
120
|
-
} else {
|
|
121
|
-
throw new Error(`Unexpected job state: ${state}`);
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
// Check if we timed out
|
|
126
|
-
if (jobMetadata.status?.state !== 'DONE') {
|
|
127
|
-
throw new Error(`Load job did not complete within ${maxAttempts * pollInterval / 1000} seconds`);
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
// Check for errors
|
|
131
|
-
if (jobMetadata.status?.errorResult) {
|
|
132
|
-
throw new Error(`Load job failed: ${jobMetadata.status.errorResult.message}`);
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
const rowsLoaded = jobMetadata.statistics?.load?.outputRows || rows.length;
|
|
136
|
-
|
|
137
|
-
if (this.logger) {
|
|
138
|
-
this.logger.log('INFO', `[BigQueryBatch] ✅ Flushed ${rowsLoaded} ${tableName} rows to BigQuery using LOAD JOB (free)`);
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
return rowsLoaded;
|
|
142
|
-
} finally {
|
|
143
|
-
// Clean up temp file (as learned from backfill)
|
|
144
|
-
try {
|
|
145
|
-
if (fs.existsSync(tempFile)) {
|
|
146
|
-
fs.unlinkSync(tempFile);
|
|
147
|
-
}
|
|
148
|
-
} catch (cleanupError) {
|
|
149
|
-
if (this.logger) {
|
|
150
|
-
this.logger.log('WARN', `[BigQueryBatch] Failed to delete temp file ${tempFile}: ${cleanupError.message}`);
|
|
151
|
-
}
|
|
152
|
-
}
|
|
100
|
+
if (this.logger) {
|
|
101
|
+
this.logger.log('INFO', `[BigQueryBatch] ✅ Flushed ${rows.length} ${tableName} rows to BigQuery using MERGE (${rowsInserted} new, ${rows.length - rowsInserted} updated)`);
|
|
153
102
|
}
|
|
103
|
+
|
|
104
|
+
return rows.length;
|
|
154
105
|
} catch (error) {
|
|
155
106
|
// Log error but don't throw - allow Firestore writes to continue
|
|
156
107
|
if (this.logger) {
|
|
@@ -172,9 +123,9 @@ class BigQueryBatchManager {
|
|
|
172
123
|
}
|
|
173
124
|
|
|
174
125
|
const results = await Promise.allSettled([
|
|
175
|
-
this._flushBuffer(this.portfolioBuffer, 'portfolio_snapshots', 'portfolio'),
|
|
176
|
-
this._flushBuffer(this.historyBuffer, 'trade_history_snapshots', 'history'),
|
|
177
|
-
this._flushBuffer(this.socialBuffer, 'social_post_snapshots', 'social')
|
|
126
|
+
this._flushBuffer(this.portfolioBuffer, 'portfolio_snapshots', 'portfolio', PORTFOLIO_DEDUP_KEYS),
|
|
127
|
+
this._flushBuffer(this.historyBuffer, 'trade_history_snapshots', 'history', HISTORY_DEDUP_KEYS),
|
|
128
|
+
this._flushBuffer(this.socialBuffer, 'social_post_snapshots', 'social', SOCIAL_DEDUP_KEYS)
|
|
178
129
|
]);
|
|
179
130
|
|
|
180
131
|
const totalFlushed = results
|
package/index.js
CHANGED
|
@@ -11,7 +11,6 @@ const { FirestoreBatchManager } = require('./functions/task-engine/utils/fire
|
|
|
11
11
|
const firestoreUtils = require('./functions/core/utils/firestore_utils');
|
|
12
12
|
|
|
13
13
|
// Orchestrator
|
|
14
|
-
// [UPDATED] Imported handleOrchestratorHttp
|
|
15
14
|
const {
|
|
16
15
|
runDiscoveryOrchestrator,
|
|
17
16
|
runUpdateOrchestrator,
|
|
@@ -47,19 +46,15 @@ const dataLoader = require('./functions
|
|
|
47
46
|
const computationUtils = require('./functions/computation-system/utils/utils');
|
|
48
47
|
|
|
49
48
|
|
|
50
|
-
// API v2 (CommonJS)
|
|
51
49
|
const { createApiV2App } = require('./functions/api-v2/index');
|
|
52
50
|
|
|
53
|
-
// Maintenance & Data Acquisition
|
|
54
51
|
const { fetchAndStoreInsights } = require('./functions/fetch-insights/helpers/handler_helpers');
|
|
55
52
|
const { fetchAndStorePrices } = require('./functions/etoro-price-fetcher/helpers/handler_helpers');
|
|
56
53
|
const { runSocialOrchestrator } = require('./functions/social-orchestrator/helpers/orchestrator_helpers');
|
|
57
54
|
const { handleSocialTask } = require('./functions/social-task-handler/helpers/handler_helpers');
|
|
58
55
|
const { runBackfillAssetPrices } = require('./functions/price-backfill/helpers/handler_helpers');
|
|
59
56
|
const { runRootDataIndexer } = require('./functions/root-data-indexer/index');
|
|
60
|
-
// [NEW] Popular Investor Fetcher
|
|
61
57
|
const { runPopularInvestorFetch } = require('./functions/fetch-popular-investors/index');
|
|
62
|
-
// [NEW] Backfill Task Engine Data
|
|
63
58
|
const { backfillTaskEngineData } = require('./functions/maintenance/backfill-task-engine-data/index');
|
|
64
59
|
const { backfillPIMasterListRankings } = require('./functions/maintenance/backfill-pi-master-list-rankings/index');
|
|
65
60
|
const { backfillInstrumentInsights } = require('./functions/maintenance/backfill-instrument-insights/index');
|
|
@@ -86,7 +81,6 @@ const core = {
|
|
|
86
81
|
};
|
|
87
82
|
|
|
88
83
|
const orchestrator = {
|
|
89
|
-
// [UPDATED] Exported handleOrchestratorHttp so it can be mapped in Cloud Functions
|
|
90
84
|
handleOrchestratorHttp,
|
|
91
85
|
runDiscoveryOrchestrator,
|
|
92
86
|
runUpdateOrchestrator,
|
|
@@ -122,21 +116,17 @@ const computationSystem = {
|
|
|
122
116
|
};
|
|
123
117
|
|
|
124
118
|
const api = {
|
|
125
|
-
createApiV2App,
|
|
119
|
+
createApiV2App,
|
|
126
120
|
};
|
|
127
121
|
|
|
128
122
|
const maintenance = {
|
|
129
|
-
runSpeculatorCleanup: runCleanup,
|
|
130
|
-
handleInvalidSpeculator,
|
|
131
123
|
runFetchInsights: fetchAndStoreInsights,
|
|
132
124
|
runFetchPrices: fetchAndStorePrices,
|
|
133
125
|
runSocialOrchestrator,
|
|
134
126
|
handleSocialTask,
|
|
135
127
|
runBackfillAssetPrices,
|
|
136
128
|
runRootDataIndexer,
|
|
137
|
-
// [NEW] Added to maintenance pipe
|
|
138
129
|
runPopularInvestorFetch,
|
|
139
|
-
// [NEW] BigQuery backfills
|
|
140
130
|
backfillTaskEngineData,
|
|
141
131
|
backfillPIMasterListRankings,
|
|
142
132
|
backfillInstrumentInsights,
|
|
@@ -151,8 +141,8 @@ const maintenance = {
|
|
|
151
141
|
const proxy = { handlePost };
|
|
152
142
|
|
|
153
143
|
const alertSystem = {
|
|
154
|
-
handleAlertTrigger,
|
|
155
|
-
handleComputationResultWrite,
|
|
144
|
+
handleAlertTrigger,
|
|
145
|
+
handleComputationResultWrite,
|
|
156
146
|
checkAndSendAllClearNotifications
|
|
157
147
|
};
|
|
158
148
|
|