bulltrackers-module 1.0.710 → 1.0.713
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/api-v2/helpers/data-fetchers/firestore.js +119 -63
- package/functions/computation-system/data/CachedDataLoader.js +22 -1
- package/functions/computation-system/data/DependencyFetcher.js +118 -0
- package/functions/computation-system/persistence/ResultCommitter.js +94 -3
- package/functions/computation-system/utils/data_loader.js +244 -13
- package/functions/core/utils/bigquery_utils.js +1655 -0
- package/functions/core/utils/firestore_utils.js +99 -30
- package/functions/etoro-price-fetcher/helpers/handler_helpers.js +85 -13
- package/functions/fetch-insights/helpers/handler_helpers.js +26 -0
- package/functions/fetch-popular-investors/helpers/fetch_helpers.js +66 -0
- package/functions/maintenance/backfill-instrument-insights/index.js +180 -0
- package/functions/maintenance/backfill-pi-master-list-rankings/index.js +293 -0
- package/functions/maintenance/backfill-task-engine-data/README.md +72 -0
- package/functions/maintenance/backfill-task-engine-data/index.js +844 -0
- package/functions/price-backfill/helpers/handler_helpers.js +59 -10
- package/functions/root-data-indexer/index.js +79 -27
- package/functions/task-engine/helpers/data_storage_helpers.js +194 -102
- package/functions/task-engine/helpers/popular_investor_helpers.js +13 -7
- package/functions/task-engine/utils/bigquery_batch_manager.js +201 -0
- package/functions/task-engine/utils/firestore_batch_manager.js +21 -1
- package/index.js +34 -2
- package/package.json +7 -3
|
@@ -96,7 +96,8 @@ async function updateLastUpdated(db, collectionRegistry, cid, userType, dataType
|
|
|
96
96
|
}
|
|
97
97
|
|
|
98
98
|
async function processPortfolio(context, config, taskData, isPI) {
|
|
99
|
-
const { db, logger, collectionRegistry, proxyManager, headerManager } = context;
|
|
99
|
+
const { db, logger, collectionRegistry, proxyManager, headerManager, batchManager } = context;
|
|
100
|
+
const bigqueryBatchManager = batchManager?.getBigQueryBatchManager?.() || null;
|
|
100
101
|
const { cid, username, uuid, today, requestOptions } = taskData;
|
|
101
102
|
const url = `${config.ETORO_API_PORTFOLIO_URL}?cid=${cid}&client_request_id=${uuid}`;
|
|
102
103
|
|
|
@@ -132,7 +133,8 @@ async function processPortfolio(context, config, taskData, isPI) {
|
|
|
132
133
|
}
|
|
133
134
|
|
|
134
135
|
async function processHistory(context, config, taskData, isPI) {
|
|
135
|
-
const { db, logger, collectionRegistry, proxyManager, headerManager } = context;
|
|
136
|
+
const { db, logger, collectionRegistry, proxyManager, headerManager, batchManager } = context;
|
|
137
|
+
const bigqueryBatchManager = batchManager?.getBigQueryBatchManager?.() || null;
|
|
136
138
|
const { cid, uuid, today, requestOptions } = taskData;
|
|
137
139
|
const oneYearAgo = new Date(); oneYearAgo.setFullYear(oneYearAgo.getFullYear() - 1);
|
|
138
140
|
const url = `${config.ETORO_API_HISTORY_URL}?StartTime=${oneYearAgo.toISOString()}&PageNumber=1&ItemsPerPage=30000&PublicHistoryPortfolioFilter=&CID=${cid}&client_request_id=${uuid}`;
|
|
@@ -145,15 +147,16 @@ async function processHistory(context, config, taskData, isPI) {
|
|
|
145
147
|
data.PublicHistoryPositions = data.PublicHistoryPositions.filter(p => [0, 1, 5].includes(p.CloseReason));
|
|
146
148
|
}
|
|
147
149
|
|
|
148
|
-
if (isPI) await storePopularInvestorTradeHistory({ db, logger, collectionRegistry, cid, date: today, historyData: data });
|
|
149
|
-
else await storeSignedInUserTradeHistory({ db, logger, collectionRegistry, cid, date: today, historyData: data });
|
|
150
|
+
if (isPI) await storePopularInvestorTradeHistory({ db, logger, collectionRegistry, cid, date: today, historyData: data, bigqueryBatchManager });
|
|
151
|
+
else await storeSignedInUserTradeHistory({ db, logger, collectionRegistry, cid, date: today, historyData: data, bigqueryBatchManager });
|
|
150
152
|
|
|
151
153
|
await updateLastUpdated(db, collectionRegistry, cid, isPI ? 'popularInvestor' : 'signedInUser', 'tradeHistory', logger);
|
|
152
154
|
return true;
|
|
153
155
|
}
|
|
154
156
|
|
|
155
157
|
async function processSocial(context, config, taskData, isPI) {
|
|
156
|
-
const { db, logger, collectionRegistry, proxyManager, headerManager } = context;
|
|
158
|
+
const { db, logger, collectionRegistry, proxyManager, headerManager, batchManager } = context;
|
|
159
|
+
const bigqueryBatchManager = batchManager?.getBigQueryBatchManager?.() || null;
|
|
157
160
|
const { cid, username, uuid, today, requestOptions } = taskData;
|
|
158
161
|
const { getGcidForUser } = require('../../social-task-handler/helpers/handler_helpers');
|
|
159
162
|
|
|
@@ -172,8 +175,11 @@ async function processSocial(context, config, taskData, isPI) {
|
|
|
172
175
|
}));
|
|
173
176
|
|
|
174
177
|
// Store posts even if empty (to mark that social fetch was attempted)
|
|
175
|
-
|
|
176
|
-
|
|
178
|
+
const { batchManager } = context;
|
|
179
|
+
const bigqueryBatchManager = batchManager?.getBigQueryBatchManager?.() || null;
|
|
180
|
+
|
|
181
|
+
if (isPI) await storePopularInvestorSocialPosts({ db, logger, collectionRegistry, cid, date: today, posts, bigqueryBatchManager });
|
|
182
|
+
else await storeSignedInUserSocialPosts({ db, logger, collectionRegistry, cid, date: today, posts, bigqueryBatchManager });
|
|
177
183
|
|
|
178
184
|
// Update lastUpdated timestamp to indicate social fetch completed (even if 0 posts)
|
|
179
185
|
await updateLastUpdated(db, collectionRegistry, cid, isPI ? 'popularInvestor' : 'signedInUser', 'socialPosts', logger);
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview BigQuery Batch Manager for Task Engine
|
|
3
|
+
*
|
|
4
|
+
* Batches BigQuery writes and flushes them using LOAD JOBS (FREE).
|
|
5
|
+
* Integrates with FirestoreBatchManager to flush together.
|
|
6
|
+
*
|
|
7
|
+
* Lessons learned from backfill script:
|
|
8
|
+
* - Use createLoadJob() not table.load()
|
|
9
|
+
* - Use temp files for load jobs
|
|
10
|
+
* - Wait for job completion with getMetadata() polling
|
|
11
|
+
* - Handle errors gracefully
|
|
12
|
+
* - Clean up temp files properly
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
const fs = require('fs');
|
|
16
|
+
const path = require('path');
|
|
17
|
+
const os = require('os');
|
|
18
|
+
const {
|
|
19
|
+
getOrCreateDataset,
|
|
20
|
+
ensurePortfolioSnapshotsTable,
|
|
21
|
+
ensureTradeHistorySnapshotsTable,
|
|
22
|
+
ensureSocialPostSnapshotsTable
|
|
23
|
+
} = require('../../core/utils/bigquery_utils');
|
|
24
|
+
|
|
25
|
+
class BigQueryBatchManager {
|
|
26
|
+
constructor(logger) {
|
|
27
|
+
this.logger = logger;
|
|
28
|
+
this.datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
29
|
+
|
|
30
|
+
// Buffers for each table
|
|
31
|
+
this.portfolioBuffer = [];
|
|
32
|
+
this.historyBuffer = [];
|
|
33
|
+
this.socialBuffer = [];
|
|
34
|
+
|
|
35
|
+
// Track if tables are ensured (avoid repeated checks)
|
|
36
|
+
this.tablesEnsured = {
|
|
37
|
+
portfolio: false,
|
|
38
|
+
history: false,
|
|
39
|
+
social: false
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Add portfolio row to buffer
|
|
45
|
+
*/
|
|
46
|
+
async addPortfolioRow(row) {
|
|
47
|
+
if (!this.tablesEnsured.portfolio) {
|
|
48
|
+
await ensurePortfolioSnapshotsTable(this.logger);
|
|
49
|
+
this.tablesEnsured.portfolio = true;
|
|
50
|
+
}
|
|
51
|
+
this.portfolioBuffer.push(row);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Add trade history row to buffer
|
|
56
|
+
*/
|
|
57
|
+
async addHistoryRow(row) {
|
|
58
|
+
if (!this.tablesEnsured.history) {
|
|
59
|
+
await ensureTradeHistorySnapshotsTable(this.logger);
|
|
60
|
+
this.tablesEnsured.history = true;
|
|
61
|
+
}
|
|
62
|
+
this.historyBuffer.push(row);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Add social post row to buffer
|
|
67
|
+
*/
|
|
68
|
+
async addSocialRow(row) {
|
|
69
|
+
if (!this.tablesEnsured.social) {
|
|
70
|
+
await ensureSocialPostSnapshotsTable(this.logger);
|
|
71
|
+
this.tablesEnsured.social = true;
|
|
72
|
+
}
|
|
73
|
+
this.socialBuffer.push(row);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Flush a buffer to BigQuery using load job
|
|
78
|
+
* Uses lessons from backfill: createLoadJob, temp files, proper polling
|
|
79
|
+
*/
|
|
80
|
+
async _flushBuffer(buffer, tableId, tableName) {
|
|
81
|
+
if (buffer.length === 0) return 0;
|
|
82
|
+
|
|
83
|
+
const rows = [...buffer]; // Copy buffer
|
|
84
|
+
buffer.length = 0; // Clear buffer
|
|
85
|
+
|
|
86
|
+
try {
|
|
87
|
+
const dataset = await getOrCreateDataset(this.datasetId, this.logger);
|
|
88
|
+
const table = dataset.table(tableId);
|
|
89
|
+
|
|
90
|
+
// Write to temporary file (load jobs require a file, not in-memory data)
|
|
91
|
+
const tempFile = path.join(os.tmpdir(), `bigquery_${tableId}_${Date.now()}_${Math.random().toString(36).substring(7)}.ndjson`);
|
|
92
|
+
const ndjson = rows.map(r => JSON.stringify(r)).join('\n');
|
|
93
|
+
|
|
94
|
+
try {
|
|
95
|
+
fs.writeFileSync(tempFile, ndjson, 'utf8');
|
|
96
|
+
|
|
97
|
+
// Create load job (FREE) - using createLoadJob as learned from backfill
|
|
98
|
+
const [job] = await table.createLoadJob(tempFile, {
|
|
99
|
+
sourceFormat: 'NEWLINE_DELIMITED_JSON',
|
|
100
|
+
writeDisposition: 'WRITE_APPEND',
|
|
101
|
+
autodetect: false // Use existing table schema
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
// Wait for job to complete using polling (as learned from backfill)
|
|
105
|
+
let jobMetadata;
|
|
106
|
+
const maxAttempts = 60; // 5 minutes max (5 second intervals)
|
|
107
|
+
const pollInterval = 5000; // 5 seconds
|
|
108
|
+
|
|
109
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
110
|
+
[jobMetadata] = await job.getMetadata();
|
|
111
|
+
const state = jobMetadata.status?.state;
|
|
112
|
+
|
|
113
|
+
if (state === 'DONE') {
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if (state === 'PENDING' || state === 'RUNNING') {
|
|
118
|
+
// Wait before next poll
|
|
119
|
+
await new Promise(resolve => setTimeout(resolve, pollInterval));
|
|
120
|
+
} else {
|
|
121
|
+
throw new Error(`Unexpected job state: ${state}`);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Check if we timed out
|
|
126
|
+
if (jobMetadata.status?.state !== 'DONE') {
|
|
127
|
+
throw new Error(`Load job did not complete within ${maxAttempts * pollInterval / 1000} seconds`);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Check for errors
|
|
131
|
+
if (jobMetadata.status?.errorResult) {
|
|
132
|
+
throw new Error(`Load job failed: ${jobMetadata.status.errorResult.message}`);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const rowsLoaded = jobMetadata.statistics?.load?.outputRows || rows.length;
|
|
136
|
+
|
|
137
|
+
if (this.logger) {
|
|
138
|
+
this.logger.log('INFO', `[BigQueryBatch] ✅ Flushed ${rowsLoaded} ${tableName} rows to BigQuery using LOAD JOB (free)`);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return rowsLoaded;
|
|
142
|
+
} finally {
|
|
143
|
+
// Clean up temp file (as learned from backfill)
|
|
144
|
+
try {
|
|
145
|
+
if (fs.existsSync(tempFile)) {
|
|
146
|
+
fs.unlinkSync(tempFile);
|
|
147
|
+
}
|
|
148
|
+
} catch (cleanupError) {
|
|
149
|
+
if (this.logger) {
|
|
150
|
+
this.logger.log('WARN', `[BigQueryBatch] Failed to delete temp file ${tempFile}: ${cleanupError.message}`);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
} catch (error) {
|
|
155
|
+
// Log error but don't throw - allow Firestore writes to continue
|
|
156
|
+
if (this.logger) {
|
|
157
|
+
this.logger.log('WARN', `[BigQueryBatch] Failed to flush ${tableName} to BigQuery: ${error.message}`);
|
|
158
|
+
}
|
|
159
|
+
// Put rows back in buffer for retry on next flush
|
|
160
|
+
buffer.push(...rows);
|
|
161
|
+
return 0;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Flush all buffers to BigQuery
|
|
167
|
+
* Called by FirestoreBatchManager.flushBatches()
|
|
168
|
+
*/
|
|
169
|
+
async flushBatches() {
|
|
170
|
+
if (process.env.BIGQUERY_ENABLED === 'false') {
|
|
171
|
+
return; // Skip if BigQuery disabled
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const results = await Promise.allSettled([
|
|
175
|
+
this._flushBuffer(this.portfolioBuffer, 'portfolio_snapshots', 'portfolio'),
|
|
176
|
+
this._flushBuffer(this.historyBuffer, 'trade_history_snapshots', 'history'),
|
|
177
|
+
this._flushBuffer(this.socialBuffer, 'social_post_snapshots', 'social')
|
|
178
|
+
]);
|
|
179
|
+
|
|
180
|
+
const totalFlushed = results
|
|
181
|
+
.filter(r => r.status === 'fulfilled')
|
|
182
|
+
.reduce((sum, r) => sum + r.value, 0);
|
|
183
|
+
|
|
184
|
+
if (totalFlushed > 0 && this.logger) {
|
|
185
|
+
this.logger.log('INFO', `[BigQueryBatch] Flushed ${totalFlushed} total rows to BigQuery`);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Get buffer sizes (for monitoring)
|
|
191
|
+
*/
|
|
192
|
+
getBufferSizes() {
|
|
193
|
+
return {
|
|
194
|
+
portfolio: this.portfolioBuffer.length,
|
|
195
|
+
history: this.historyBuffer.length,
|
|
196
|
+
social: this.socialBuffer.length
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
module.exports = { BigQueryBatchManager };
|
|
@@ -5,13 +5,20 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
const { FieldValue } = require('@google-cloud/firestore');
|
|
8
|
+
const { BigQueryBatchManager } = require('./bigquery_batch_manager');
|
|
8
9
|
|
|
9
10
|
class FirestoreBatchManager {
|
|
10
11
|
constructor(db, headerManager, logger, config) {
|
|
11
12
|
this.db = db;
|
|
12
13
|
this.headerManager = headerManager;
|
|
13
14
|
this.logger = logger;
|
|
14
|
-
this.config = config;
|
|
15
|
+
this.config = config;
|
|
16
|
+
|
|
17
|
+
// Initialize BigQuery batch manager (if enabled)
|
|
18
|
+
this.bigqueryBatchManager = null;
|
|
19
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
20
|
+
this.bigqueryBatchManager = new BigQueryBatchManager(logger);
|
|
21
|
+
}
|
|
15
22
|
|
|
16
23
|
// State containers for batching
|
|
17
24
|
this.portfolioBatch = {};
|
|
@@ -316,9 +323,22 @@ class FirestoreBatchManager {
|
|
|
316
323
|
}
|
|
317
324
|
|
|
318
325
|
if (batchOps) await firestoreBatch.commit();
|
|
326
|
+
|
|
327
|
+
// Flush BigQuery batches (if enabled) - runs in parallel with Firestore
|
|
328
|
+
if (this.bigqueryBatchManager) {
|
|
329
|
+
await this.bigqueryBatchManager.flushBatches();
|
|
330
|
+
}
|
|
331
|
+
|
|
319
332
|
await this.headerManager.flushPerformanceUpdates();
|
|
320
333
|
this.logger.log('INFO', '[BATCH] All batches flushed successfully.');
|
|
321
334
|
}
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* Get BigQuery batch manager (for adding rows from data_storage_helpers)
|
|
338
|
+
*/
|
|
339
|
+
getBigQueryBatchManager() {
|
|
340
|
+
return this.bigqueryBatchManager;
|
|
341
|
+
}
|
|
322
342
|
}
|
|
323
343
|
|
|
324
344
|
module.exports = { FirestoreBatchManager };
|
package/index.js
CHANGED
|
@@ -61,6 +61,10 @@ const { runBackfillAssetPrices } = require('./functions
|
|
|
61
61
|
const { runRootDataIndexer } = require('./functions/root-data-indexer/index');
|
|
62
62
|
// [NEW] Popular Investor Fetcher
|
|
63
63
|
const { runPopularInvestorFetch } = require('./functions/fetch-popular-investors/index');
|
|
64
|
+
// [NEW] Backfill Task Engine Data
|
|
65
|
+
const { backfillTaskEngineData } = require('./functions/maintenance/backfill-task-engine-data/index');
|
|
66
|
+
const { backfillPIMasterListRankings } = require('./functions/maintenance/backfill-pi-master-list-rankings/index');
|
|
67
|
+
const { backfillInstrumentInsights } = require('./functions/maintenance/backfill-instrument-insights/index');
|
|
64
68
|
|
|
65
69
|
// Alert System
|
|
66
70
|
const { handleAlertTrigger, handleComputationResultWrite, checkAndSendAllClearNotifications } = require('./functions/alert-system/index');
|
|
@@ -127,7 +131,11 @@ const maintenance = {
|
|
|
127
131
|
runBackfillAssetPrices,
|
|
128
132
|
runRootDataIndexer,
|
|
129
133
|
// [NEW] Added to maintenance pipe
|
|
130
|
-
runPopularInvestorFetch
|
|
134
|
+
runPopularInvestorFetch,
|
|
135
|
+
// [NEW] BigQuery backfills
|
|
136
|
+
backfillTaskEngineData,
|
|
137
|
+
backfillPIMasterListRankings,
|
|
138
|
+
backfillInstrumentInsights
|
|
131
139
|
};
|
|
132
140
|
|
|
133
141
|
const proxy = { handlePost };
|
|
@@ -140,4 +148,28 @@ const alertSystem = {
|
|
|
140
148
|
|
|
141
149
|
module.exports = {
|
|
142
150
|
pipe: { core, orchestrator, dispatcher, taskEngine, computationSystem, api, maintenance, proxy, alertSystem },
|
|
143
|
-
};
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
// CLI handling: If running as a script with backfill arguments, execute the backfill
|
|
154
|
+
if (require.main === module) {
|
|
155
|
+
const args = process.argv.slice(2);
|
|
156
|
+
|
|
157
|
+
// Check if this looks like a backfill command (has --startDate or --endDate)
|
|
158
|
+
const isBackfillCommand = args.some(arg =>
|
|
159
|
+
arg.startsWith('--startDate=') ||
|
|
160
|
+
arg.startsWith('--endDate=') ||
|
|
161
|
+
arg.startsWith('--dataType=')
|
|
162
|
+
);
|
|
163
|
+
|
|
164
|
+
if (isBackfillCommand) {
|
|
165
|
+
// Route to backfill function
|
|
166
|
+
console.log('🚀 Starting backfill from main entry point...\n');
|
|
167
|
+
backfillTaskEngineData(null, null).catch(error => {
|
|
168
|
+
console.error('Fatal error:', error);
|
|
169
|
+
process.exit(1);
|
|
170
|
+
});
|
|
171
|
+
} else {
|
|
172
|
+
// No recognized command, just export (normal module behavior)
|
|
173
|
+
// This allows the file to still work as a module when imported
|
|
174
|
+
}
|
|
175
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "bulltrackers-module",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.713",
|
|
4
4
|
"description": "Helper Functions for Bulltrackers.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"files": [
|
|
@@ -21,7 +21,10 @@
|
|
|
21
21
|
"functions/price-backfill/",
|
|
22
22
|
"functions/root-data-indexer/",
|
|
23
23
|
"functions/fetch-popular-investors",
|
|
24
|
-
"functions/alert-system/"
|
|
24
|
+
"functions/alert-system/",
|
|
25
|
+
"functions/maintenance/backfill-instrument-insights",
|
|
26
|
+
"functions/maintenance/backfill-pi-master-list-rankings",
|
|
27
|
+
"functions/maintenance/backfill-task-engine-data"
|
|
25
28
|
],
|
|
26
29
|
"keywords": [
|
|
27
30
|
"bulltrackers",
|
|
@@ -46,7 +49,8 @@
|
|
|
46
49
|
"require-all": "^3.0.0",
|
|
47
50
|
"sharedsetup": "latest",
|
|
48
51
|
"zod": "^4.3.5",
|
|
49
|
-
"@google-cloud/storage": "^7.18.0"
|
|
52
|
+
"@google-cloud/storage": "^7.18.0",
|
|
53
|
+
"@google-cloud/bigquery": "^7.3.0"
|
|
50
54
|
},
|
|
51
55
|
"devDependencies": {
|
|
52
56
|
"bulltracker-deployer": "file:../bulltracker-deployer"
|