bulltrackers-module 1.0.710 → 1.0.712

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,13 +2,20 @@
2
2
  * @fileoverview Data Storage Helpers for New Collection Structure
3
3
  *
4
4
  * Stores data to:
5
- * 1. Root data collections (date-based, per-user) - for computation system
5
+ * 1. BigQuery (date-based, per-user) - for computation system (NEW)
6
6
  * 2. User-centric collections (latest snapshot) - for fallback/quick access
7
7
  *
8
+ * UPDATED: Removed root data Firestore writes, now writes to BigQuery instead
8
9
  * Uses the centralized collection registry for all paths.
9
10
  */
10
11
 
11
12
  const { FieldValue } = require('@google-cloud/firestore');
13
+ const {
14
+ ensurePortfolioSnapshotsTable,
15
+ ensureTradeHistorySnapshotsTable,
16
+ ensureSocialPostSnapshotsTable,
17
+ insertRows
18
+ } = require('../../core/utils/bigquery_utils');
12
19
 
13
20
  /**
14
21
  * Store portfolio data for a signed-in user
@@ -19,23 +26,38 @@ const { FieldValue } = require('@google-cloud/firestore');
19
26
  * @param {string} params.cid - User CID
20
27
  * @param {string} params.date - Date string (YYYY-MM-DD)
21
28
  * @param {object} params.portfolioData - Portfolio data to store
29
+ * @param {object} params.bigqueryBatchManager - Optional BigQuery batch manager (if provided, batches writes)
22
30
  * @returns {Promise<void>}
23
31
  */
24
- async function storeSignedInUserPortfolio({ db, logger, collectionRegistry, cid, date, portfolioData }) {
25
- // 1. Store to root data collection (for computations)
26
- // Structure: SignedInUserPortfolioData/{date}/{cid}/{cid}
27
- const rootDataRef = db.collection('SignedInUserPortfolioData')
28
- .doc(date)
29
- .collection(String(cid))
30
- .doc(String(cid));
31
-
32
- await rootDataRef.set({
33
- ...portfolioData,
34
- fetchedAt: FieldValue.serverTimestamp(),
35
- cid: String(cid)
36
- }, { merge: false });
32
+ async function storeSignedInUserPortfolio({ db, logger, collectionRegistry, cid, date, portfolioData, bigqueryBatchManager = null }) {
33
+ // 1. Write to BigQuery (for computations) - use batch manager if provided, otherwise direct write
34
+ if (process.env.BIGQUERY_ENABLED !== 'false') {
35
+ try {
36
+ const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
37
+ const row = {
38
+ date: date,
39
+ user_id: Number(cid),
40
+ user_type: 'SIGNED_IN_USER',
41
+ portfolio_data: JSON.stringify(portfolioData), // BigQuery JSON type requires a string
42
+ fetched_at: new Date().toISOString()
43
+ };
44
+
45
+ if (bigqueryBatchManager) {
46
+ // Add to batch (will flush with Firestore batches)
47
+ await bigqueryBatchManager.addPortfolioRow(row);
48
+ } else {
49
+ // Direct write (fallback for when batch manager not available)
50
+ await ensurePortfolioSnapshotsTable(logger);
51
+ await insertRows(datasetId, 'portfolio_snapshots', [row], logger);
52
+ logger.log('INFO', `[DataStorage] ✅ Wrote portfolio to BigQuery for signed-in user ${cid} (date: ${date})`);
53
+ }
54
+ } catch (bqError) {
55
+ logger.log('WARN', `[DataStorage] BigQuery write failed for signed-in user ${cid} (${date}): ${bqError.message}`);
56
+ // Continue to Firestore write (fallback)
57
+ }
58
+ }
37
59
 
38
- // 2. Store latest snapshot to user-centric collection (for fallback)
60
+ // 2. Store latest snapshot to user-centric collection (for fallback/quick access)
39
61
  const { getCollectionPath } = collectionRegistry || {};
40
62
  if (!getCollectionPath) {
41
63
  throw new Error('collectionRegistry.getCollectionPath is required');
@@ -63,23 +85,38 @@ async function storeSignedInUserPortfolio({ db, logger, collectionRegistry, cid,
63
85
  * @param {string} params.cid - User CID
64
86
  * @param {string} params.date - Date string (YYYY-MM-DD)
65
87
  * @param {object} params.historyData - Trade history data to store
88
+ * @param {object} params.bigqueryBatchManager - Optional BigQuery batch manager (if provided, batches writes)
66
89
  * @returns {Promise<void>}
67
90
  */
68
- async function storeSignedInUserTradeHistory({ db, logger, collectionRegistry, cid, date, historyData }) {
69
- // 1. Store to root data collection (for computations)
70
- // Structure: SignedInUserTradeHistoryData/{date}/{cid}/{cid}
71
- const rootDataRef = db.collection('SignedInUserTradeHistoryData')
72
- .doc(date)
73
- .collection(String(cid))
74
- .doc(String(cid));
75
-
76
- await rootDataRef.set({
77
- ...historyData,
78
- fetchedAt: FieldValue.serverTimestamp(),
79
- cid: String(cid)
80
- }, { merge: false });
91
+ async function storeSignedInUserTradeHistory({ db, logger, collectionRegistry, cid, date, historyData, bigqueryBatchManager = null }) {
92
+ // 1. Write to BigQuery (for computations) - use batch manager if provided, otherwise direct write
93
+ if (process.env.BIGQUERY_ENABLED !== 'false') {
94
+ try {
95
+ const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
96
+ const row = {
97
+ date: date,
98
+ user_id: Number(cid),
99
+ user_type: 'SIGNED_IN_USER',
100
+ history_data: JSON.stringify(historyData), // BigQuery JSON type requires a string
101
+ fetched_at: new Date().toISOString()
102
+ };
103
+
104
+ if (bigqueryBatchManager) {
105
+ // Add to batch (will flush with Firestore batches)
106
+ await bigqueryBatchManager.addHistoryRow(row);
107
+ } else {
108
+ // Direct write (fallback for when batch manager not available)
109
+ await ensureTradeHistorySnapshotsTable(logger);
110
+ await insertRows(datasetId, 'trade_history_snapshots', [row], logger);
111
+ logger.log('INFO', `[DataStorage] ✅ Wrote trade history to BigQuery for signed-in user ${cid} (date: ${date})`);
112
+ }
113
+ } catch (bqError) {
114
+ logger.log('WARN', `[DataStorage] BigQuery write failed for signed-in user ${cid} (${date}): ${bqError.message}`);
115
+ // Continue to Firestore write (fallback)
116
+ }
117
+ }
81
118
 
82
- // 2. Store latest snapshot to user-centric collection (for fallback)
119
+ // 2. Store latest snapshot to user-centric collection (for fallback/quick access)
83
120
  const { getCollectionPath } = collectionRegistry || {};
84
121
  if (!getCollectionPath) {
85
122
  throw new Error('collectionRegistry.getCollectionPath is required');
@@ -107,30 +144,39 @@ async function storeSignedInUserTradeHistory({ db, logger, collectionRegistry, c
107
144
  * @param {string} params.cid - User CID
108
145
  * @param {string} params.date - Date string (YYYY-MM-DD)
109
146
  * @param {Array} params.posts - Array of social posts
147
+ * @param {object} params.bigqueryBatchManager - Optional BigQuery batch manager (if provided, batches writes)
110
148
  * @returns {Promise<void>}
111
149
  */
112
- async function storeSignedInUserSocialPosts({ db, logger, collectionRegistry, cid, date, posts }) {
113
- // 1. Store to root data collection (for computations)
114
- // Structure: SignedInUserSocialPostData/{date}/{cid}/{cid}
115
- const rootDataRef = db.collection('SignedInUserSocialPostData')
116
- .doc(date)
117
- .collection(String(cid))
118
- .doc(String(cid));
119
-
120
- const postsMap = {};
121
- for (const post of posts) {
122
- if (post.id || post.postId) {
123
- postsMap[post.id || post.postId] = post;
150
+ async function storeSignedInUserSocialPosts({ db, logger, collectionRegistry, cid, date, posts, bigqueryBatchManager = null }) {
151
+ // 1. Write to BigQuery (for computations)
152
+ if (process.env.BIGQUERY_ENABLED !== 'false') {
153
+ try {
154
+ await ensureSocialPostSnapshotsTable(logger);
155
+
156
+ const postsMap = {};
157
+ for (const post of posts) {
158
+ if (post.id || post.postId) {
159
+ postsMap[post.id || post.postId] = post;
160
+ }
161
+ }
162
+
163
+ const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
164
+ const row = {
165
+ date: date,
166
+ user_id: Number(cid),
167
+ user_type: 'SIGNED_IN_USER',
168
+ posts_data: JSON.stringify({ posts: postsMap, postCount: posts.length }), // BigQuery JSON type requires a string
169
+ fetched_at: new Date().toISOString()
170
+ };
171
+
172
+ await insertRows(datasetId, 'social_post_snapshots', [row], logger);
173
+ logger.log('INFO', `[DataStorage] ✅ Wrote social posts to BigQuery for signed-in user ${cid} (date: ${date}, ${posts.length} posts)`);
174
+ } catch (bqError) {
175
+ logger.log('WARN', `[DataStorage] BigQuery write failed for signed-in user ${cid} (${date}): ${bqError.message}`);
176
+ // Continue to Firestore write (fallback)
124
177
  }
125
178
  }
126
179
 
127
- await rootDataRef.set({
128
- posts: postsMap,
129
- fetchedAt: FieldValue.serverTimestamp(),
130
- cid: String(cid),
131
- postCount: posts.length
132
- }, { merge: false });
133
-
134
180
  // 2. Store latest posts to user-centric collection (for fallback)
135
181
  // Path structure: SignedInUsers/{cid}/posts/{postId}
136
182
  // Construct path directly - we know the structure
@@ -180,31 +226,48 @@ async function storeSignedInUserSocialPosts({ db, logger, collectionRegistry, ci
180
226
  * @param {string} params.date - Date string (YYYY-MM-DD)
181
227
  * @param {object} params.portfolioData - Portfolio data to store
182
228
  * @param {object} params.deepPortfolioData - Optional deep portfolio data
229
+ * @param {object} params.bigqueryBatchManager - Optional BigQuery batch manager (if provided, batches writes)
183
230
  * @returns {Promise<void>}
184
231
  */
185
- async function storePopularInvestorPortfolio({ db, logger, collectionRegistry, cid, date, portfolioData, deepPortfolioData = null }) {
186
- // 1. Store overall portfolio to root data collection
187
- // Structure: PopularInvestorPortfolioData/{date}/{cid}/{cid}
188
- const rootDataRef = db.collection('PopularInvestorPortfolioData')
189
- .doc(date)
190
- .collection(String(cid))
191
- .doc(String(cid));
192
-
193
- const portfolioDoc = {
194
- ...portfolioData,
195
- fetchedAt: FieldValue.serverTimestamp(),
196
- cid: String(cid)
197
- };
198
-
199
- // 2. If deep portfolio data exists, merge it
200
- if (deepPortfolioData && deepPortfolioData.positions) {
201
- portfolioDoc.deepPositions = deepPortfolioData.positions;
202
- portfolioDoc.deepFetchedAt = FieldValue.serverTimestamp();
232
+ async function storePopularInvestorPortfolio({ db, logger, collectionRegistry, cid, date, portfolioData, deepPortfolioData = null, bigqueryBatchManager = null }) {
233
+ // 1. Write to BigQuery (for computations) - use batch manager if provided, otherwise direct write
234
+ if (process.env.BIGQUERY_ENABLED !== 'false') {
235
+ try {
236
+ const portfolioDoc = {
237
+ ...portfolioData,
238
+ cid: String(cid)
239
+ };
240
+
241
+ // If deep portfolio data exists, merge it
242
+ if (deepPortfolioData && deepPortfolioData.positions) {
243
+ portfolioDoc.deepPositions = deepPortfolioData.positions;
244
+ }
245
+
246
+ const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
247
+ const row = {
248
+ date: date,
249
+ user_id: Number(cid),
250
+ user_type: 'POPULAR_INVESTOR',
251
+ portfolio_data: JSON.stringify(portfolioDoc), // BigQuery JSON type requires a string
252
+ fetched_at: new Date().toISOString()
253
+ };
254
+
255
+ if (bigqueryBatchManager) {
256
+ // Add to batch (will flush with Firestore batches)
257
+ await bigqueryBatchManager.addPortfolioRow(row);
258
+ } else {
259
+ // Direct write (fallback for when batch manager not available)
260
+ await ensurePortfolioSnapshotsTable(logger);
261
+ await insertRows(datasetId, 'portfolio_snapshots', [row], logger);
262
+ logger.log('INFO', `[DataStorage] ✅ Wrote portfolio to BigQuery for PI ${cid} (date: ${date})`);
263
+ }
264
+ } catch (bqError) {
265
+ logger.log('WARN', `[DataStorage] BigQuery write failed for PI ${cid} (${date}): ${bqError.message}`);
266
+ // Continue to Firestore write (fallback)
267
+ }
203
268
  }
204
269
 
205
- await rootDataRef.set(portfolioDoc, { merge: false });
206
-
207
- // 3. Store latest snapshot to user-centric collection (for fallback)
270
+ // 2. Store latest snapshot to user-centric collection (for fallback/quick access)
208
271
  const { getCollectionPath } = collectionRegistry || {};
209
272
  if (!getCollectionPath) {
210
273
  throw new Error('collectionRegistry.getCollectionPath is required');
@@ -231,23 +294,38 @@ async function storePopularInvestorPortfolio({ db, logger, collectionRegistry, c
231
294
  * @param {string} params.cid - PI CID
232
295
  * @param {string} params.date - Date string (YYYY-MM-DD)
233
296
  * @param {object} params.historyData - Trade history data to store
297
+ * @param {object} params.bigqueryBatchManager - Optional BigQuery batch manager (if provided, batches writes)
234
298
  * @returns {Promise<void>}
235
299
  */
236
- async function storePopularInvestorTradeHistory({ db, logger, collectionRegistry, cid, date, historyData }) {
237
- // 1. Store to root data collection (for computations)
238
- // Structure: PopularInvestorTradeHistoryData/{date}/{cid}/{cid}
239
- const rootDataRef = db.collection('PopularInvestorTradeHistoryData')
240
- .doc(date)
241
- .collection(String(cid))
242
- .doc(String(cid));
243
-
244
- await rootDataRef.set({
245
- ...historyData,
246
- fetchedAt: FieldValue.serverTimestamp(),
247
- cid: String(cid)
248
- }, { merge: false });
300
+ async function storePopularInvestorTradeHistory({ db, logger, collectionRegistry, cid, date, historyData, bigqueryBatchManager = null }) {
301
+ // 1. Write to BigQuery (for computations) - use batch manager if provided, otherwise direct write
302
+ if (process.env.BIGQUERY_ENABLED !== 'false') {
303
+ try {
304
+ const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
305
+ const row = {
306
+ date: date,
307
+ user_id: Number(cid),
308
+ user_type: 'POPULAR_INVESTOR',
309
+ history_data: JSON.stringify(historyData), // BigQuery JSON type requires a string
310
+ fetched_at: new Date().toISOString()
311
+ };
312
+
313
+ if (bigqueryBatchManager) {
314
+ // Add to batch (will flush with Firestore batches)
315
+ await bigqueryBatchManager.addHistoryRow(row);
316
+ } else {
317
+ // Direct write (fallback for when batch manager not available)
318
+ await ensureTradeHistorySnapshotsTable(logger);
319
+ await insertRows(datasetId, 'trade_history_snapshots', [row], logger);
320
+ logger.log('INFO', `[DataStorage] ✅ Wrote trade history to BigQuery for PI ${cid} (date: ${date})`);
321
+ }
322
+ } catch (bqError) {
323
+ logger.log('WARN', `[DataStorage] BigQuery write failed for PI ${cid} (${date}): ${bqError.message}`);
324
+ // Continue to Firestore write (fallback)
325
+ }
326
+ }
249
327
 
250
- // 2. Store latest snapshot to user-centric collection (for fallback)
328
+ // 2. Store latest snapshot to user-centric collection (for fallback/quick access)
251
329
  const { getCollectionPath } = collectionRegistry || {};
252
330
  if (!getCollectionPath) {
253
331
  throw new Error('collectionRegistry.getCollectionPath is required');
@@ -275,30 +353,44 @@ async function storePopularInvestorTradeHistory({ db, logger, collectionRegistry
275
353
  * @param {string} params.cid - PI CID
276
354
  * @param {string} params.date - Date string (YYYY-MM-DD)
277
355
  * @param {Array} params.posts - Array of social posts
356
+ * @param {object} params.bigqueryBatchManager - Optional BigQuery batch manager (if provided, batches writes)
278
357
  * @returns {Promise<void>}
279
358
  */
280
- async function storePopularInvestorSocialPosts({ db, logger, collectionRegistry, cid, date, posts }) {
281
- // 1. Store to root data collection (for computations)
282
- // Structure: PopularInvestorSocialPostData/{date}/{cid}/{cid}
283
- const rootDataRef = db.collection('PopularInvestorSocialPostData')
284
- .doc(date)
285
- .collection(String(cid))
286
- .doc(String(cid));
287
-
288
- const postsMap = {};
289
- for (const post of posts) {
290
- if (post.id || post.postId) {
291
- postsMap[post.id || post.postId] = post;
359
+ async function storePopularInvestorSocialPosts({ db, logger, collectionRegistry, cid, date, posts, bigqueryBatchManager = null }) {
360
+ // 1. Write to BigQuery (for computations) - use batch manager if provided, otherwise direct write
361
+ if (process.env.BIGQUERY_ENABLED !== 'false') {
362
+ try {
363
+ const postsMap = {};
364
+ for (const post of posts) {
365
+ if (post.id || post.postId) {
366
+ postsMap[post.id || post.postId] = post;
367
+ }
368
+ }
369
+
370
+ const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
371
+ const row = {
372
+ date: date,
373
+ user_id: Number(cid),
374
+ user_type: 'POPULAR_INVESTOR',
375
+ posts_data: JSON.stringify({ posts: postsMap, postCount: posts.length }), // BigQuery JSON type requires a string
376
+ fetched_at: new Date().toISOString()
377
+ };
378
+
379
+ if (bigqueryBatchManager) {
380
+ // Add to batch (will flush with Firestore batches)
381
+ await bigqueryBatchManager.addSocialRow(row);
382
+ } else {
383
+ // Direct write (fallback for when batch manager not available)
384
+ await ensureSocialPostSnapshotsTable(logger);
385
+ await insertRows(datasetId, 'social_post_snapshots', [row], logger);
386
+ logger.log('INFO', `[DataStorage] ✅ Wrote social posts to BigQuery for PI ${cid} (date: ${date}, ${posts.length} posts)`);
387
+ }
388
+ } catch (bqError) {
389
+ logger.log('WARN', `[DataStorage] BigQuery write failed for PI ${cid} (${date}): ${bqError.message}`);
390
+ // Continue to Firestore write (fallback)
292
391
  }
293
392
  }
294
393
 
295
- await rootDataRef.set({
296
- posts: postsMap,
297
- fetchedAt: FieldValue.serverTimestamp(),
298
- cid: String(cid),
299
- postCount: posts.length
300
- }, { merge: false });
301
-
302
394
  // 2. Store latest posts to user-centric collection (for fallback)
303
395
  // Path structure: PopularInvestors/{piCid}/posts/{postId}
304
396
  // Construct path directly - we know the structure
@@ -96,7 +96,8 @@ async function updateLastUpdated(db, collectionRegistry, cid, userType, dataType
96
96
  }
97
97
 
98
98
  async function processPortfolio(context, config, taskData, isPI) {
99
- const { db, logger, collectionRegistry, proxyManager, headerManager } = context;
99
+ const { db, logger, collectionRegistry, proxyManager, headerManager, batchManager } = context;
100
+ const bigqueryBatchManager = batchManager?.getBigQueryBatchManager?.() || null;
100
101
  const { cid, username, uuid, today, requestOptions } = taskData;
101
102
  const url = `${config.ETORO_API_PORTFOLIO_URL}?cid=${cid}&client_request_id=${uuid}`;
102
103
 
@@ -132,7 +133,8 @@ async function processPortfolio(context, config, taskData, isPI) {
132
133
  }
133
134
 
134
135
  async function processHistory(context, config, taskData, isPI) {
135
- const { db, logger, collectionRegistry, proxyManager, headerManager } = context;
136
+ const { db, logger, collectionRegistry, proxyManager, headerManager, batchManager } = context;
137
+ const bigqueryBatchManager = batchManager?.getBigQueryBatchManager?.() || null;
136
138
  const { cid, uuid, today, requestOptions } = taskData;
137
139
  const oneYearAgo = new Date(); oneYearAgo.setFullYear(oneYearAgo.getFullYear() - 1);
138
140
  const url = `${config.ETORO_API_HISTORY_URL}?StartTime=${oneYearAgo.toISOString()}&PageNumber=1&ItemsPerPage=30000&PublicHistoryPortfolioFilter=&CID=${cid}&client_request_id=${uuid}`;
@@ -145,15 +147,16 @@ async function processHistory(context, config, taskData, isPI) {
145
147
  data.PublicHistoryPositions = data.PublicHistoryPositions.filter(p => [0, 1, 5].includes(p.CloseReason));
146
148
  }
147
149
 
148
- if (isPI) await storePopularInvestorTradeHistory({ db, logger, collectionRegistry, cid, date: today, historyData: data });
149
- else await storeSignedInUserTradeHistory({ db, logger, collectionRegistry, cid, date: today, historyData: data });
150
+ if (isPI) await storePopularInvestorTradeHistory({ db, logger, collectionRegistry, cid, date: today, historyData: data, bigqueryBatchManager });
151
+ else await storeSignedInUserTradeHistory({ db, logger, collectionRegistry, cid, date: today, historyData: data, bigqueryBatchManager });
150
152
 
151
153
  await updateLastUpdated(db, collectionRegistry, cid, isPI ? 'popularInvestor' : 'signedInUser', 'tradeHistory', logger);
152
154
  return true;
153
155
  }
154
156
 
155
157
  async function processSocial(context, config, taskData, isPI) {
156
- const { db, logger, collectionRegistry, proxyManager, headerManager } = context;
158
+ const { db, logger, collectionRegistry, proxyManager, headerManager, batchManager } = context;
159
+ const bigqueryBatchManager = batchManager?.getBigQueryBatchManager?.() || null;
157
160
  const { cid, username, uuid, today, requestOptions } = taskData;
158
161
  const { getGcidForUser } = require('../../social-task-handler/helpers/handler_helpers');
159
162
 
@@ -172,8 +175,11 @@ async function processSocial(context, config, taskData, isPI) {
172
175
  }));
173
176
 
174
177
  // Store posts even if empty (to mark that social fetch was attempted)
175
- if (isPI) await storePopularInvestorSocialPosts({ db, logger, collectionRegistry, cid, date: today, posts });
176
- else await storeSignedInUserSocialPosts({ db, logger, collectionRegistry, cid, date: today, posts });
178
+ const { batchManager } = context;
179
+ const bigqueryBatchManager = batchManager?.getBigQueryBatchManager?.() || null;
180
+
181
+ if (isPI) await storePopularInvestorSocialPosts({ db, logger, collectionRegistry, cid, date: today, posts, bigqueryBatchManager });
182
+ else await storeSignedInUserSocialPosts({ db, logger, collectionRegistry, cid, date: today, posts, bigqueryBatchManager });
177
183
 
178
184
  // Update lastUpdated timestamp to indicate social fetch completed (even if 0 posts)
179
185
  await updateLastUpdated(db, collectionRegistry, cid, isPI ? 'popularInvestor' : 'signedInUser', 'socialPosts', logger);
@@ -0,0 +1,201 @@
1
+ /**
2
+ * @fileoverview BigQuery Batch Manager for Task Engine
3
+ *
4
+ * Batches BigQuery writes and flushes them using LOAD JOBS (FREE).
5
+ * Integrates with FirestoreBatchManager to flush together.
6
+ *
7
+ * Lessons learned from backfill script:
8
+ * - Use createLoadJob() not table.load()
9
+ * - Use temp files for load jobs
10
+ * - Wait for job completion with getMetadata() polling
11
+ * - Handle errors gracefully
12
+ * - Clean up temp files properly
13
+ */
14
+
15
+ const fs = require('fs');
16
+ const path = require('path');
17
+ const os = require('os');
18
+ const {
19
+ getOrCreateDataset,
20
+ ensurePortfolioSnapshotsTable,
21
+ ensureTradeHistorySnapshotsTable,
22
+ ensureSocialPostSnapshotsTable
23
+ } = require('../../core/utils/bigquery_utils');
24
+
25
+ class BigQueryBatchManager {
26
+ constructor(logger) {
27
+ this.logger = logger;
28
+ this.datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
29
+
30
+ // Buffers for each table
31
+ this.portfolioBuffer = [];
32
+ this.historyBuffer = [];
33
+ this.socialBuffer = [];
34
+
35
+ // Track if tables are ensured (avoid repeated checks)
36
+ this.tablesEnsured = {
37
+ portfolio: false,
38
+ history: false,
39
+ social: false
40
+ };
41
+ }
42
+
43
+ /**
44
+ * Add portfolio row to buffer
45
+ */
46
+ async addPortfolioRow(row) {
47
+ if (!this.tablesEnsured.portfolio) {
48
+ await ensurePortfolioSnapshotsTable(this.logger);
49
+ this.tablesEnsured.portfolio = true;
50
+ }
51
+ this.portfolioBuffer.push(row);
52
+ }
53
+
54
+ /**
55
+ * Add trade history row to buffer
56
+ */
57
+ async addHistoryRow(row) {
58
+ if (!this.tablesEnsured.history) {
59
+ await ensureTradeHistorySnapshotsTable(this.logger);
60
+ this.tablesEnsured.history = true;
61
+ }
62
+ this.historyBuffer.push(row);
63
+ }
64
+
65
+ /**
66
+ * Add social post row to buffer
67
+ */
68
+ async addSocialRow(row) {
69
+ if (!this.tablesEnsured.social) {
70
+ await ensureSocialPostSnapshotsTable(this.logger);
71
+ this.tablesEnsured.social = true;
72
+ }
73
+ this.socialBuffer.push(row);
74
+ }
75
+
76
+ /**
77
+ * Flush a buffer to BigQuery using load job
78
+ * Uses lessons from backfill: createLoadJob, temp files, proper polling
79
+ */
80
+ async _flushBuffer(buffer, tableId, tableName) {
81
+ if (buffer.length === 0) return 0;
82
+
83
+ const rows = [...buffer]; // Copy buffer
84
+ buffer.length = 0; // Clear buffer
85
+
86
+ try {
87
+ const dataset = await getOrCreateDataset(this.datasetId, this.logger);
88
+ const table = dataset.table(tableId);
89
+
90
+ // Write to temporary file (load jobs require a file, not in-memory data)
91
+ const tempFile = path.join(os.tmpdir(), `bigquery_${tableId}_${Date.now()}_${Math.random().toString(36).substring(7)}.ndjson`);
92
+ const ndjson = rows.map(r => JSON.stringify(r)).join('\n');
93
+
94
+ try {
95
+ fs.writeFileSync(tempFile, ndjson, 'utf8');
96
+
97
+ // Create load job (FREE) - using createLoadJob as learned from backfill
98
+ const [job] = await table.createLoadJob(tempFile, {
99
+ sourceFormat: 'NEWLINE_DELIMITED_JSON',
100
+ writeDisposition: 'WRITE_APPEND',
101
+ autodetect: false // Use existing table schema
102
+ });
103
+
104
+ // Wait for job to complete using polling (as learned from backfill)
105
+ let jobMetadata;
106
+ const maxAttempts = 60; // 5 minutes max (5 second intervals)
107
+ const pollInterval = 5000; // 5 seconds
108
+
109
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
110
+ [jobMetadata] = await job.getMetadata();
111
+ const state = jobMetadata.status?.state;
112
+
113
+ if (state === 'DONE') {
114
+ break;
115
+ }
116
+
117
+ if (state === 'PENDING' || state === 'RUNNING') {
118
+ // Wait before next poll
119
+ await new Promise(resolve => setTimeout(resolve, pollInterval));
120
+ } else {
121
+ throw new Error(`Unexpected job state: ${state}`);
122
+ }
123
+ }
124
+
125
+ // Check if we timed out
126
+ if (jobMetadata.status?.state !== 'DONE') {
127
+ throw new Error(`Load job did not complete within ${maxAttempts * pollInterval / 1000} seconds`);
128
+ }
129
+
130
+ // Check for errors
131
+ if (jobMetadata.status?.errorResult) {
132
+ throw new Error(`Load job failed: ${jobMetadata.status.errorResult.message}`);
133
+ }
134
+
135
+ const rowsLoaded = jobMetadata.statistics?.load?.outputRows || rows.length;
136
+
137
+ if (this.logger) {
138
+ this.logger.log('INFO', `[BigQueryBatch] ✅ Flushed ${rowsLoaded} ${tableName} rows to BigQuery using LOAD JOB (free)`);
139
+ }
140
+
141
+ return rowsLoaded;
142
+ } finally {
143
+ // Clean up temp file (as learned from backfill)
144
+ try {
145
+ if (fs.existsSync(tempFile)) {
146
+ fs.unlinkSync(tempFile);
147
+ }
148
+ } catch (cleanupError) {
149
+ if (this.logger) {
150
+ this.logger.log('WARN', `[BigQueryBatch] Failed to delete temp file ${tempFile}: ${cleanupError.message}`);
151
+ }
152
+ }
153
+ }
154
+ } catch (error) {
155
+ // Log error but don't throw - allow Firestore writes to continue
156
+ if (this.logger) {
157
+ this.logger.log('WARN', `[BigQueryBatch] Failed to flush ${tableName} to BigQuery: ${error.message}`);
158
+ }
159
+ // Put rows back in buffer for retry on next flush
160
+ buffer.push(...rows);
161
+ return 0;
162
+ }
163
+ }
164
+
165
+ /**
166
+ * Flush all buffers to BigQuery
167
+ * Called by FirestoreBatchManager.flushBatches()
168
+ */
169
+ async flushBatches() {
170
+ if (process.env.BIGQUERY_ENABLED === 'false') {
171
+ return; // Skip if BigQuery disabled
172
+ }
173
+
174
+ const results = await Promise.allSettled([
175
+ this._flushBuffer(this.portfolioBuffer, 'portfolio_snapshots', 'portfolio'),
176
+ this._flushBuffer(this.historyBuffer, 'trade_history_snapshots', 'history'),
177
+ this._flushBuffer(this.socialBuffer, 'social_post_snapshots', 'social')
178
+ ]);
179
+
180
+ const totalFlushed = results
181
+ .filter(r => r.status === 'fulfilled')
182
+ .reduce((sum, r) => sum + r.value, 0);
183
+
184
+ if (totalFlushed > 0 && this.logger) {
185
+ this.logger.log('INFO', `[BigQueryBatch] Flushed ${totalFlushed} total rows to BigQuery`);
186
+ }
187
+ }
188
+
189
+ /**
190
+ * Get buffer sizes (for monitoring)
191
+ */
192
+ getBufferSizes() {
193
+ return {
194
+ portfolio: this.portfolioBuffer.length,
195
+ history: this.historyBuffer.length,
196
+ social: this.socialBuffer.length
197
+ };
198
+ }
199
+ }
200
+
201
+ module.exports = { BigQueryBatchManager };