bulltrackers-module 1.0.710 → 1.0.713

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -96,7 +96,8 @@ async function updateLastUpdated(db, collectionRegistry, cid, userType, dataType
96
96
  }
97
97
 
98
98
  async function processPortfolio(context, config, taskData, isPI) {
99
- const { db, logger, collectionRegistry, proxyManager, headerManager } = context;
99
+ const { db, logger, collectionRegistry, proxyManager, headerManager, batchManager } = context;
100
+ const bigqueryBatchManager = batchManager?.getBigQueryBatchManager?.() || null;
100
101
  const { cid, username, uuid, today, requestOptions } = taskData;
101
102
  const url = `${config.ETORO_API_PORTFOLIO_URL}?cid=${cid}&client_request_id=${uuid}`;
102
103
 
@@ -132,7 +133,8 @@ async function processPortfolio(context, config, taskData, isPI) {
132
133
  }
133
134
 
134
135
  async function processHistory(context, config, taskData, isPI) {
135
- const { db, logger, collectionRegistry, proxyManager, headerManager } = context;
136
+ const { db, logger, collectionRegistry, proxyManager, headerManager, batchManager } = context;
137
+ const bigqueryBatchManager = batchManager?.getBigQueryBatchManager?.() || null;
136
138
  const { cid, uuid, today, requestOptions } = taskData;
137
139
  const oneYearAgo = new Date(); oneYearAgo.setFullYear(oneYearAgo.getFullYear() - 1);
138
140
  const url = `${config.ETORO_API_HISTORY_URL}?StartTime=${oneYearAgo.toISOString()}&PageNumber=1&ItemsPerPage=30000&PublicHistoryPortfolioFilter=&CID=${cid}&client_request_id=${uuid}`;
@@ -145,15 +147,16 @@ async function processHistory(context, config, taskData, isPI) {
145
147
  data.PublicHistoryPositions = data.PublicHistoryPositions.filter(p => [0, 1, 5].includes(p.CloseReason));
146
148
  }
147
149
 
148
- if (isPI) await storePopularInvestorTradeHistory({ db, logger, collectionRegistry, cid, date: today, historyData: data });
149
- else await storeSignedInUserTradeHistory({ db, logger, collectionRegistry, cid, date: today, historyData: data });
150
+ if (isPI) await storePopularInvestorTradeHistory({ db, logger, collectionRegistry, cid, date: today, historyData: data, bigqueryBatchManager });
151
+ else await storeSignedInUserTradeHistory({ db, logger, collectionRegistry, cid, date: today, historyData: data, bigqueryBatchManager });
150
152
 
151
153
  await updateLastUpdated(db, collectionRegistry, cid, isPI ? 'popularInvestor' : 'signedInUser', 'tradeHistory', logger);
152
154
  return true;
153
155
  }
154
156
 
155
157
  async function processSocial(context, config, taskData, isPI) {
156
- const { db, logger, collectionRegistry, proxyManager, headerManager } = context;
158
+ const { db, logger, collectionRegistry, proxyManager, headerManager, batchManager } = context;
159
+ const bigqueryBatchManager = batchManager?.getBigQueryBatchManager?.() || null;
157
160
  const { cid, username, uuid, today, requestOptions } = taskData;
158
161
  const { getGcidForUser } = require('../../social-task-handler/helpers/handler_helpers');
159
162
 
@@ -172,8 +175,11 @@ async function processSocial(context, config, taskData, isPI) {
172
175
  }));
173
176
 
174
177
  // Store posts even if empty (to mark that social fetch was attempted)
175
- if (isPI) await storePopularInvestorSocialPosts({ db, logger, collectionRegistry, cid, date: today, posts });
176
- else await storeSignedInUserSocialPosts({ db, logger, collectionRegistry, cid, date: today, posts });
178
+ const { batchManager } = context;
179
+ const bigqueryBatchManager = batchManager?.getBigQueryBatchManager?.() || null;
180
+
181
+ if (isPI) await storePopularInvestorSocialPosts({ db, logger, collectionRegistry, cid, date: today, posts, bigqueryBatchManager });
182
+ else await storeSignedInUserSocialPosts({ db, logger, collectionRegistry, cid, date: today, posts, bigqueryBatchManager });
177
183
 
178
184
  // Update lastUpdated timestamp to indicate social fetch completed (even if 0 posts)
179
185
  await updateLastUpdated(db, collectionRegistry, cid, isPI ? 'popularInvestor' : 'signedInUser', 'socialPosts', logger);
@@ -0,0 +1,201 @@
1
+ /**
2
+ * @fileoverview BigQuery Batch Manager for Task Engine
3
+ *
4
+ * Batches BigQuery writes and flushes them using LOAD JOBS (FREE).
5
+ * Integrates with FirestoreBatchManager to flush together.
6
+ *
7
+ * Lessons learned from backfill script:
8
+ * - Use createLoadJob() not table.load()
9
+ * - Use temp files for load jobs
10
+ * - Wait for job completion with getMetadata() polling
11
+ * - Handle errors gracefully
12
+ * - Clean up temp files properly
13
+ */
14
+
15
+ const fs = require('fs');
16
+ const path = require('path');
17
+ const os = require('os');
18
+ const {
19
+ getOrCreateDataset,
20
+ ensurePortfolioSnapshotsTable,
21
+ ensureTradeHistorySnapshotsTable,
22
+ ensureSocialPostSnapshotsTable
23
+ } = require('../../core/utils/bigquery_utils');
24
+
25
+ class BigQueryBatchManager {
26
+ constructor(logger) {
27
+ this.logger = logger;
28
+ this.datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
29
+
30
+ // Buffers for each table
31
+ this.portfolioBuffer = [];
32
+ this.historyBuffer = [];
33
+ this.socialBuffer = [];
34
+
35
+ // Track if tables are ensured (avoid repeated checks)
36
+ this.tablesEnsured = {
37
+ portfolio: false,
38
+ history: false,
39
+ social: false
40
+ };
41
+ }
42
+
43
+ /**
44
+ * Add portfolio row to buffer
45
+ */
46
+ async addPortfolioRow(row) {
47
+ if (!this.tablesEnsured.portfolio) {
48
+ await ensurePortfolioSnapshotsTable(this.logger);
49
+ this.tablesEnsured.portfolio = true;
50
+ }
51
+ this.portfolioBuffer.push(row);
52
+ }
53
+
54
+ /**
55
+ * Add trade history row to buffer
56
+ */
57
+ async addHistoryRow(row) {
58
+ if (!this.tablesEnsured.history) {
59
+ await ensureTradeHistorySnapshotsTable(this.logger);
60
+ this.tablesEnsured.history = true;
61
+ }
62
+ this.historyBuffer.push(row);
63
+ }
64
+
65
+ /**
66
+ * Add social post row to buffer
67
+ */
68
+ async addSocialRow(row) {
69
+ if (!this.tablesEnsured.social) {
70
+ await ensureSocialPostSnapshotsTable(this.logger);
71
+ this.tablesEnsured.social = true;
72
+ }
73
+ this.socialBuffer.push(row);
74
+ }
75
+
76
+ /**
77
+ * Flush a buffer to BigQuery using load job
78
+ * Uses lessons from backfill: createLoadJob, temp files, proper polling
79
+ */
80
+ async _flushBuffer(buffer, tableId, tableName) {
81
+ if (buffer.length === 0) return 0;
82
+
83
+ const rows = [...buffer]; // Copy buffer
84
+ buffer.length = 0; // Clear buffer
85
+
86
+ try {
87
+ const dataset = await getOrCreateDataset(this.datasetId, this.logger);
88
+ const table = dataset.table(tableId);
89
+
90
+ // Write to temporary file (load jobs require a file, not in-memory data)
91
+ const tempFile = path.join(os.tmpdir(), `bigquery_${tableId}_${Date.now()}_${Math.random().toString(36).substring(7)}.ndjson`);
92
+ const ndjson = rows.map(r => JSON.stringify(r)).join('\n');
93
+
94
+ try {
95
+ fs.writeFileSync(tempFile, ndjson, 'utf8');
96
+
97
+ // Create load job (FREE) - using createLoadJob as learned from backfill
98
+ const [job] = await table.createLoadJob(tempFile, {
99
+ sourceFormat: 'NEWLINE_DELIMITED_JSON',
100
+ writeDisposition: 'WRITE_APPEND',
101
+ autodetect: false // Use existing table schema
102
+ });
103
+
104
+ // Wait for job to complete using polling (as learned from backfill)
105
+ let jobMetadata;
106
+ const maxAttempts = 60; // 5 minutes max (5 second intervals)
107
+ const pollInterval = 5000; // 5 seconds
108
+
109
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
110
+ [jobMetadata] = await job.getMetadata();
111
+ const state = jobMetadata.status?.state;
112
+
113
+ if (state === 'DONE') {
114
+ break;
115
+ }
116
+
117
+ if (state === 'PENDING' || state === 'RUNNING') {
118
+ // Wait before next poll
119
+ await new Promise(resolve => setTimeout(resolve, pollInterval));
120
+ } else {
121
+ throw new Error(`Unexpected job state: ${state}`);
122
+ }
123
+ }
124
+
125
+ // Check if we timed out
126
+ if (jobMetadata.status?.state !== 'DONE') {
127
+ throw new Error(`Load job did not complete within ${maxAttempts * pollInterval / 1000} seconds`);
128
+ }
129
+
130
+ // Check for errors
131
+ if (jobMetadata.status?.errorResult) {
132
+ throw new Error(`Load job failed: ${jobMetadata.status.errorResult.message}`);
133
+ }
134
+
135
+ const rowsLoaded = jobMetadata.statistics?.load?.outputRows || rows.length;
136
+
137
+ if (this.logger) {
138
+ this.logger.log('INFO', `[BigQueryBatch] ✅ Flushed ${rowsLoaded} ${tableName} rows to BigQuery using LOAD JOB (free)`);
139
+ }
140
+
141
+ return rowsLoaded;
142
+ } finally {
143
+ // Clean up temp file (as learned from backfill)
144
+ try {
145
+ if (fs.existsSync(tempFile)) {
146
+ fs.unlinkSync(tempFile);
147
+ }
148
+ } catch (cleanupError) {
149
+ if (this.logger) {
150
+ this.logger.log('WARN', `[BigQueryBatch] Failed to delete temp file ${tempFile}: ${cleanupError.message}`);
151
+ }
152
+ }
153
+ }
154
+ } catch (error) {
155
+ // Log error but don't throw - allow Firestore writes to continue
156
+ if (this.logger) {
157
+ this.logger.log('WARN', `[BigQueryBatch] Failed to flush ${tableName} to BigQuery: ${error.message}`);
158
+ }
159
+ // Put rows back in buffer for retry on next flush
160
+ buffer.push(...rows);
161
+ return 0;
162
+ }
163
+ }
164
+
165
+ /**
166
+ * Flush all buffers to BigQuery
167
+ * Called by FirestoreBatchManager.flushBatches()
168
+ */
169
+ async flushBatches() {
170
+ if (process.env.BIGQUERY_ENABLED === 'false') {
171
+ return; // Skip if BigQuery disabled
172
+ }
173
+
174
+ const results = await Promise.allSettled([
175
+ this._flushBuffer(this.portfolioBuffer, 'portfolio_snapshots', 'portfolio'),
176
+ this._flushBuffer(this.historyBuffer, 'trade_history_snapshots', 'history'),
177
+ this._flushBuffer(this.socialBuffer, 'social_post_snapshots', 'social')
178
+ ]);
179
+
180
+ const totalFlushed = results
181
+ .filter(r => r.status === 'fulfilled')
182
+ .reduce((sum, r) => sum + r.value, 0);
183
+
184
+ if (totalFlushed > 0 && this.logger) {
185
+ this.logger.log('INFO', `[BigQueryBatch] Flushed ${totalFlushed} total rows to BigQuery`);
186
+ }
187
+ }
188
+
189
+ /**
190
+ * Get buffer sizes (for monitoring)
191
+ */
192
+ getBufferSizes() {
193
+ return {
194
+ portfolio: this.portfolioBuffer.length,
195
+ history: this.historyBuffer.length,
196
+ social: this.socialBuffer.length
197
+ };
198
+ }
199
+ }
200
+
201
+ module.exports = { BigQueryBatchManager };
@@ -5,13 +5,20 @@
5
5
  */
6
6
 
7
7
  const { FieldValue } = require('@google-cloud/firestore');
8
+ const { BigQueryBatchManager } = require('./bigquery_batch_manager');
8
9
 
9
10
  class FirestoreBatchManager {
10
11
  constructor(db, headerManager, logger, config) {
11
12
  this.db = db;
12
13
  this.headerManager = headerManager;
13
14
  this.logger = logger;
14
- this.config = config;
15
+ this.config = config;
16
+
17
+ // Initialize BigQuery batch manager (if enabled)
18
+ this.bigqueryBatchManager = null;
19
+ if (process.env.BIGQUERY_ENABLED !== 'false') {
20
+ this.bigqueryBatchManager = new BigQueryBatchManager(logger);
21
+ }
15
22
 
16
23
  // State containers for batching
17
24
  this.portfolioBatch = {};
@@ -316,9 +323,22 @@ class FirestoreBatchManager {
316
323
  }
317
324
 
318
325
  if (batchOps) await firestoreBatch.commit();
326
+
327
+ // Flush BigQuery batches (if enabled) - runs in parallel with Firestore
328
+ if (this.bigqueryBatchManager) {
329
+ await this.bigqueryBatchManager.flushBatches();
330
+ }
331
+
319
332
  await this.headerManager.flushPerformanceUpdates();
320
333
  this.logger.log('INFO', '[BATCH] All batches flushed successfully.');
321
334
  }
335
+
336
+ /**
337
+ * Get BigQuery batch manager (for adding rows from data_storage_helpers)
338
+ */
339
+ getBigQueryBatchManager() {
340
+ return this.bigqueryBatchManager;
341
+ }
322
342
  }
323
343
 
324
344
  module.exports = { FirestoreBatchManager };
package/index.js CHANGED
@@ -61,6 +61,10 @@ const { runBackfillAssetPrices } = require('./functions
61
61
  const { runRootDataIndexer } = require('./functions/root-data-indexer/index');
62
62
  // [NEW] Popular Investor Fetcher
63
63
  const { runPopularInvestorFetch } = require('./functions/fetch-popular-investors/index');
64
+ // [NEW] Backfill Task Engine Data
65
+ const { backfillTaskEngineData } = require('./functions/maintenance/backfill-task-engine-data/index');
66
+ const { backfillPIMasterListRankings } = require('./functions/maintenance/backfill-pi-master-list-rankings/index');
67
+ const { backfillInstrumentInsights } = require('./functions/maintenance/backfill-instrument-insights/index');
64
68
 
65
69
  // Alert System
66
70
  const { handleAlertTrigger, handleComputationResultWrite, checkAndSendAllClearNotifications } = require('./functions/alert-system/index');
@@ -127,7 +131,11 @@ const maintenance = {
127
131
  runBackfillAssetPrices,
128
132
  runRootDataIndexer,
129
133
  // [NEW] Added to maintenance pipe
130
- runPopularInvestorFetch
134
+ runPopularInvestorFetch,
135
+ // [NEW] BigQuery backfills
136
+ backfillTaskEngineData,
137
+ backfillPIMasterListRankings,
138
+ backfillInstrumentInsights
131
139
  };
132
140
 
133
141
  const proxy = { handlePost };
@@ -140,4 +148,28 @@ const alertSystem = {
140
148
 
141
149
  module.exports = {
142
150
  pipe: { core, orchestrator, dispatcher, taskEngine, computationSystem, api, maintenance, proxy, alertSystem },
143
- };
151
+ };
152
+
153
+ // CLI handling: If running as a script with backfill arguments, execute the backfill
154
+ if (require.main === module) {
155
+ const args = process.argv.slice(2);
156
+
157
+ // Check if this looks like a backfill command (has --startDate or --endDate)
158
+ const isBackfillCommand = args.some(arg =>
159
+ arg.startsWith('--startDate=') ||
160
+ arg.startsWith('--endDate=') ||
161
+ arg.startsWith('--dataType=')
162
+ );
163
+
164
+ if (isBackfillCommand) {
165
+ // Route to backfill function
166
+ console.log('🚀 Starting backfill from main entry point...\n');
167
+ backfillTaskEngineData(null, null).catch(error => {
168
+ console.error('Fatal error:', error);
169
+ process.exit(1);
170
+ });
171
+ } else {
172
+ // No recognized command, just export (normal module behavior)
173
+ // This allows the file to still work as a module when imported
174
+ }
175
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.710",
3
+ "version": "1.0.713",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -21,7 +21,10 @@
21
21
  "functions/price-backfill/",
22
22
  "functions/root-data-indexer/",
23
23
  "functions/fetch-popular-investors",
24
- "functions/alert-system/"
24
+ "functions/alert-system/",
25
+ "functions/maintenance/backfill-instrument-insights",
26
+ "functions/maintenance/backfill-pi-master-list-rankings",
27
+ "functions/maintenance/backfill-task-engine-data"
25
28
  ],
26
29
  "keywords": [
27
30
  "bulltrackers",
@@ -46,7 +49,8 @@
46
49
  "require-all": "^3.0.0",
47
50
  "sharedsetup": "latest",
48
51
  "zod": "^4.3.5",
49
- "@google-cloud/storage": "^7.18.0"
52
+ "@google-cloud/storage": "^7.18.0",
53
+ "@google-cloud/bigquery": "^7.3.0"
50
54
  },
51
55
  "devDependencies": {
52
56
  "bulltracker-deployer": "file:../bulltracker-deployer"