bulltrackers-module 1.0.710 → 1.0.712
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/api-v2/helpers/data-fetchers/firestore.js +119 -63
- package/functions/computation-system/data/CachedDataLoader.js +22 -1
- package/functions/computation-system/data/DependencyFetcher.js +118 -0
- package/functions/computation-system/persistence/ResultCommitter.js +94 -3
- package/functions/computation-system/utils/data_loader.js +244 -13
- package/functions/core/utils/bigquery_utils.js +1655 -0
- package/functions/core/utils/firestore_utils.js +99 -30
- package/functions/etoro-price-fetcher/helpers/handler_helpers.js +85 -13
- package/functions/fetch-insights/helpers/handler_helpers.js +26 -0
- package/functions/fetch-popular-investors/helpers/fetch_helpers.js +66 -0
- package/functions/price-backfill/helpers/handler_helpers.js +59 -10
- package/functions/root-data-indexer/index.js +79 -27
- package/functions/task-engine/helpers/data_storage_helpers.js +194 -102
- package/functions/task-engine/helpers/popular_investor_helpers.js +13 -7
- package/functions/task-engine/utils/bigquery_batch_manager.js +201 -0
- package/functions/task-engine/utils/firestore_batch_manager.js +21 -1
- package/index.js +34 -2
- package/package.json +3 -2
|
@@ -2,13 +2,20 @@
|
|
|
2
2
|
* @fileoverview Data Storage Helpers for New Collection Structure
|
|
3
3
|
*
|
|
4
4
|
* Stores data to:
|
|
5
|
-
* 1.
|
|
5
|
+
* 1. BigQuery (date-based, per-user) - for computation system (NEW)
|
|
6
6
|
* 2. User-centric collections (latest snapshot) - for fallback/quick access
|
|
7
7
|
*
|
|
8
|
+
* UPDATED: Removed root data Firestore writes, now writes to BigQuery instead
|
|
8
9
|
* Uses the centralized collection registry for all paths.
|
|
9
10
|
*/
|
|
10
11
|
|
|
11
12
|
const { FieldValue } = require('@google-cloud/firestore');
|
|
13
|
+
const {
|
|
14
|
+
ensurePortfolioSnapshotsTable,
|
|
15
|
+
ensureTradeHistorySnapshotsTable,
|
|
16
|
+
ensureSocialPostSnapshotsTable,
|
|
17
|
+
insertRows
|
|
18
|
+
} = require('../../core/utils/bigquery_utils');
|
|
12
19
|
|
|
13
20
|
/**
|
|
14
21
|
* Store portfolio data for a signed-in user
|
|
@@ -19,23 +26,38 @@ const { FieldValue } = require('@google-cloud/firestore');
|
|
|
19
26
|
* @param {string} params.cid - User CID
|
|
20
27
|
* @param {string} params.date - Date string (YYYY-MM-DD)
|
|
21
28
|
* @param {object} params.portfolioData - Portfolio data to store
|
|
29
|
+
* @param {object} params.bigqueryBatchManager - Optional BigQuery batch manager (if provided, batches writes)
|
|
22
30
|
* @returns {Promise<void>}
|
|
23
31
|
*/
|
|
24
|
-
async function storeSignedInUserPortfolio({ db, logger, collectionRegistry, cid, date, portfolioData }) {
|
|
25
|
-
// 1.
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
32
|
+
async function storeSignedInUserPortfolio({ db, logger, collectionRegistry, cid, date, portfolioData, bigqueryBatchManager = null }) {
|
|
33
|
+
// 1. Write to BigQuery (for computations) - use batch manager if provided, otherwise direct write
|
|
34
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
35
|
+
try {
|
|
36
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
37
|
+
const row = {
|
|
38
|
+
date: date,
|
|
39
|
+
user_id: Number(cid),
|
|
40
|
+
user_type: 'SIGNED_IN_USER',
|
|
41
|
+
portfolio_data: JSON.stringify(portfolioData), // BigQuery JSON type requires a string
|
|
42
|
+
fetched_at: new Date().toISOString()
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
if (bigqueryBatchManager) {
|
|
46
|
+
// Add to batch (will flush with Firestore batches)
|
|
47
|
+
await bigqueryBatchManager.addPortfolioRow(row);
|
|
48
|
+
} else {
|
|
49
|
+
// Direct write (fallback for when batch manager not available)
|
|
50
|
+
await ensurePortfolioSnapshotsTable(logger);
|
|
51
|
+
await insertRows(datasetId, 'portfolio_snapshots', [row], logger);
|
|
52
|
+
logger.log('INFO', `[DataStorage] ✅ Wrote portfolio to BigQuery for signed-in user ${cid} (date: ${date})`);
|
|
53
|
+
}
|
|
54
|
+
} catch (bqError) {
|
|
55
|
+
logger.log('WARN', `[DataStorage] BigQuery write failed for signed-in user ${cid} (${date}): ${bqError.message}`);
|
|
56
|
+
// Continue to Firestore write (fallback)
|
|
57
|
+
}
|
|
58
|
+
}
|
|
37
59
|
|
|
38
|
-
// 2. Store latest snapshot to user-centric collection (for fallback)
|
|
60
|
+
// 2. Store latest snapshot to user-centric collection (for fallback/quick access)
|
|
39
61
|
const { getCollectionPath } = collectionRegistry || {};
|
|
40
62
|
if (!getCollectionPath) {
|
|
41
63
|
throw new Error('collectionRegistry.getCollectionPath is required');
|
|
@@ -63,23 +85,38 @@ async function storeSignedInUserPortfolio({ db, logger, collectionRegistry, cid,
|
|
|
63
85
|
* @param {string} params.cid - User CID
|
|
64
86
|
* @param {string} params.date - Date string (YYYY-MM-DD)
|
|
65
87
|
* @param {object} params.historyData - Trade history data to store
|
|
88
|
+
* @param {object} params.bigqueryBatchManager - Optional BigQuery batch manager (if provided, batches writes)
|
|
66
89
|
* @returns {Promise<void>}
|
|
67
90
|
*/
|
|
68
|
-
async function storeSignedInUserTradeHistory({ db, logger, collectionRegistry, cid, date, historyData }) {
|
|
69
|
-
// 1.
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
91
|
+
async function storeSignedInUserTradeHistory({ db, logger, collectionRegistry, cid, date, historyData, bigqueryBatchManager = null }) {
|
|
92
|
+
// 1. Write to BigQuery (for computations) - use batch manager if provided, otherwise direct write
|
|
93
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
94
|
+
try {
|
|
95
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
96
|
+
const row = {
|
|
97
|
+
date: date,
|
|
98
|
+
user_id: Number(cid),
|
|
99
|
+
user_type: 'SIGNED_IN_USER',
|
|
100
|
+
history_data: JSON.stringify(historyData), // BigQuery JSON type requires a string
|
|
101
|
+
fetched_at: new Date().toISOString()
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
if (bigqueryBatchManager) {
|
|
105
|
+
// Add to batch (will flush with Firestore batches)
|
|
106
|
+
await bigqueryBatchManager.addHistoryRow(row);
|
|
107
|
+
} else {
|
|
108
|
+
// Direct write (fallback for when batch manager not available)
|
|
109
|
+
await ensureTradeHistorySnapshotsTable(logger);
|
|
110
|
+
await insertRows(datasetId, 'trade_history_snapshots', [row], logger);
|
|
111
|
+
logger.log('INFO', `[DataStorage] ✅ Wrote trade history to BigQuery for signed-in user ${cid} (date: ${date})`);
|
|
112
|
+
}
|
|
113
|
+
} catch (bqError) {
|
|
114
|
+
logger.log('WARN', `[DataStorage] BigQuery write failed for signed-in user ${cid} (${date}): ${bqError.message}`);
|
|
115
|
+
// Continue to Firestore write (fallback)
|
|
116
|
+
}
|
|
117
|
+
}
|
|
81
118
|
|
|
82
|
-
// 2. Store latest snapshot to user-centric collection (for fallback)
|
|
119
|
+
// 2. Store latest snapshot to user-centric collection (for fallback/quick access)
|
|
83
120
|
const { getCollectionPath } = collectionRegistry || {};
|
|
84
121
|
if (!getCollectionPath) {
|
|
85
122
|
throw new Error('collectionRegistry.getCollectionPath is required');
|
|
@@ -107,30 +144,39 @@ async function storeSignedInUserTradeHistory({ db, logger, collectionRegistry, c
|
|
|
107
144
|
* @param {string} params.cid - User CID
|
|
108
145
|
* @param {string} params.date - Date string (YYYY-MM-DD)
|
|
109
146
|
* @param {Array} params.posts - Array of social posts
|
|
147
|
+
* @param {object} params.bigqueryBatchManager - Optional BigQuery batch manager (if provided, batches writes)
|
|
110
148
|
* @returns {Promise<void>}
|
|
111
149
|
*/
|
|
112
|
-
async function storeSignedInUserSocialPosts({ db, logger, collectionRegistry, cid, date, posts }) {
|
|
113
|
-
// 1.
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
150
|
+
async function storeSignedInUserSocialPosts({ db, logger, collectionRegistry, cid, date, posts, bigqueryBatchManager = null }) {
|
|
151
|
+
// 1. Write to BigQuery (for computations)
|
|
152
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
153
|
+
try {
|
|
154
|
+
await ensureSocialPostSnapshotsTable(logger);
|
|
155
|
+
|
|
156
|
+
const postsMap = {};
|
|
157
|
+
for (const post of posts) {
|
|
158
|
+
if (post.id || post.postId) {
|
|
159
|
+
postsMap[post.id || post.postId] = post;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
164
|
+
const row = {
|
|
165
|
+
date: date,
|
|
166
|
+
user_id: Number(cid),
|
|
167
|
+
user_type: 'SIGNED_IN_USER',
|
|
168
|
+
posts_data: JSON.stringify({ posts: postsMap, postCount: posts.length }), // BigQuery JSON type requires a string
|
|
169
|
+
fetched_at: new Date().toISOString()
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
await insertRows(datasetId, 'social_post_snapshots', [row], logger);
|
|
173
|
+
logger.log('INFO', `[DataStorage] ✅ Wrote social posts to BigQuery for signed-in user ${cid} (date: ${date}, ${posts.length} posts)`);
|
|
174
|
+
} catch (bqError) {
|
|
175
|
+
logger.log('WARN', `[DataStorage] BigQuery write failed for signed-in user ${cid} (${date}): ${bqError.message}`);
|
|
176
|
+
// Continue to Firestore write (fallback)
|
|
124
177
|
}
|
|
125
178
|
}
|
|
126
179
|
|
|
127
|
-
await rootDataRef.set({
|
|
128
|
-
posts: postsMap,
|
|
129
|
-
fetchedAt: FieldValue.serverTimestamp(),
|
|
130
|
-
cid: String(cid),
|
|
131
|
-
postCount: posts.length
|
|
132
|
-
}, { merge: false });
|
|
133
|
-
|
|
134
180
|
// 2. Store latest posts to user-centric collection (for fallback)
|
|
135
181
|
// Path structure: SignedInUsers/{cid}/posts/{postId}
|
|
136
182
|
// Construct path directly - we know the structure
|
|
@@ -180,31 +226,48 @@ async function storeSignedInUserSocialPosts({ db, logger, collectionRegistry, ci
|
|
|
180
226
|
* @param {string} params.date - Date string (YYYY-MM-DD)
|
|
181
227
|
* @param {object} params.portfolioData - Portfolio data to store
|
|
182
228
|
* @param {object} params.deepPortfolioData - Optional deep portfolio data
|
|
229
|
+
* @param {object} params.bigqueryBatchManager - Optional BigQuery batch manager (if provided, batches writes)
|
|
183
230
|
* @returns {Promise<void>}
|
|
184
231
|
*/
|
|
185
|
-
async function storePopularInvestorPortfolio({ db, logger, collectionRegistry, cid, date, portfolioData, deepPortfolioData = null }) {
|
|
186
|
-
// 1.
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
232
|
+
async function storePopularInvestorPortfolio({ db, logger, collectionRegistry, cid, date, portfolioData, deepPortfolioData = null, bigqueryBatchManager = null }) {
|
|
233
|
+
// 1. Write to BigQuery (for computations) - use batch manager if provided, otherwise direct write
|
|
234
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
235
|
+
try {
|
|
236
|
+
const portfolioDoc = {
|
|
237
|
+
...portfolioData,
|
|
238
|
+
cid: String(cid)
|
|
239
|
+
};
|
|
240
|
+
|
|
241
|
+
// If deep portfolio data exists, merge it
|
|
242
|
+
if (deepPortfolioData && deepPortfolioData.positions) {
|
|
243
|
+
portfolioDoc.deepPositions = deepPortfolioData.positions;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
247
|
+
const row = {
|
|
248
|
+
date: date,
|
|
249
|
+
user_id: Number(cid),
|
|
250
|
+
user_type: 'POPULAR_INVESTOR',
|
|
251
|
+
portfolio_data: JSON.stringify(portfolioDoc), // BigQuery JSON type requires a string
|
|
252
|
+
fetched_at: new Date().toISOString()
|
|
253
|
+
};
|
|
254
|
+
|
|
255
|
+
if (bigqueryBatchManager) {
|
|
256
|
+
// Add to batch (will flush with Firestore batches)
|
|
257
|
+
await bigqueryBatchManager.addPortfolioRow(row);
|
|
258
|
+
} else {
|
|
259
|
+
// Direct write (fallback for when batch manager not available)
|
|
260
|
+
await ensurePortfolioSnapshotsTable(logger);
|
|
261
|
+
await insertRows(datasetId, 'portfolio_snapshots', [row], logger);
|
|
262
|
+
logger.log('INFO', `[DataStorage] ✅ Wrote portfolio to BigQuery for PI ${cid} (date: ${date})`);
|
|
263
|
+
}
|
|
264
|
+
} catch (bqError) {
|
|
265
|
+
logger.log('WARN', `[DataStorage] BigQuery write failed for PI ${cid} (${date}): ${bqError.message}`);
|
|
266
|
+
// Continue to Firestore write (fallback)
|
|
267
|
+
}
|
|
203
268
|
}
|
|
204
269
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
// 3. Store latest snapshot to user-centric collection (for fallback)
|
|
270
|
+
// 2. Store latest snapshot to user-centric collection (for fallback/quick access)
|
|
208
271
|
const { getCollectionPath } = collectionRegistry || {};
|
|
209
272
|
if (!getCollectionPath) {
|
|
210
273
|
throw new Error('collectionRegistry.getCollectionPath is required');
|
|
@@ -231,23 +294,38 @@ async function storePopularInvestorPortfolio({ db, logger, collectionRegistry, c
|
|
|
231
294
|
* @param {string} params.cid - PI CID
|
|
232
295
|
* @param {string} params.date - Date string (YYYY-MM-DD)
|
|
233
296
|
* @param {object} params.historyData - Trade history data to store
|
|
297
|
+
* @param {object} params.bigqueryBatchManager - Optional BigQuery batch manager (if provided, batches writes)
|
|
234
298
|
* @returns {Promise<void>}
|
|
235
299
|
*/
|
|
236
|
-
async function storePopularInvestorTradeHistory({ db, logger, collectionRegistry, cid, date, historyData }) {
|
|
237
|
-
// 1.
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
300
|
+
async function storePopularInvestorTradeHistory({ db, logger, collectionRegistry, cid, date, historyData, bigqueryBatchManager = null }) {
|
|
301
|
+
// 1. Write to BigQuery (for computations) - use batch manager if provided, otherwise direct write
|
|
302
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
303
|
+
try {
|
|
304
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
305
|
+
const row = {
|
|
306
|
+
date: date,
|
|
307
|
+
user_id: Number(cid),
|
|
308
|
+
user_type: 'POPULAR_INVESTOR',
|
|
309
|
+
history_data: JSON.stringify(historyData), // BigQuery JSON type requires a string
|
|
310
|
+
fetched_at: new Date().toISOString()
|
|
311
|
+
};
|
|
312
|
+
|
|
313
|
+
if (bigqueryBatchManager) {
|
|
314
|
+
// Add to batch (will flush with Firestore batches)
|
|
315
|
+
await bigqueryBatchManager.addHistoryRow(row);
|
|
316
|
+
} else {
|
|
317
|
+
// Direct write (fallback for when batch manager not available)
|
|
318
|
+
await ensureTradeHistorySnapshotsTable(logger);
|
|
319
|
+
await insertRows(datasetId, 'trade_history_snapshots', [row], logger);
|
|
320
|
+
logger.log('INFO', `[DataStorage] ✅ Wrote trade history to BigQuery for PI ${cid} (date: ${date})`);
|
|
321
|
+
}
|
|
322
|
+
} catch (bqError) {
|
|
323
|
+
logger.log('WARN', `[DataStorage] BigQuery write failed for PI ${cid} (${date}): ${bqError.message}`);
|
|
324
|
+
// Continue to Firestore write (fallback)
|
|
325
|
+
}
|
|
326
|
+
}
|
|
249
327
|
|
|
250
|
-
// 2. Store latest snapshot to user-centric collection (for fallback)
|
|
328
|
+
// 2. Store latest snapshot to user-centric collection (for fallback/quick access)
|
|
251
329
|
const { getCollectionPath } = collectionRegistry || {};
|
|
252
330
|
if (!getCollectionPath) {
|
|
253
331
|
throw new Error('collectionRegistry.getCollectionPath is required');
|
|
@@ -275,30 +353,44 @@ async function storePopularInvestorTradeHistory({ db, logger, collectionRegistry
|
|
|
275
353
|
* @param {string} params.cid - PI CID
|
|
276
354
|
* @param {string} params.date - Date string (YYYY-MM-DD)
|
|
277
355
|
* @param {Array} params.posts - Array of social posts
|
|
356
|
+
* @param {object} params.bigqueryBatchManager - Optional BigQuery batch manager (if provided, batches writes)
|
|
278
357
|
* @returns {Promise<void>}
|
|
279
358
|
*/
|
|
280
|
-
async function storePopularInvestorSocialPosts({ db, logger, collectionRegistry, cid, date, posts }) {
|
|
281
|
-
// 1.
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
359
|
+
async function storePopularInvestorSocialPosts({ db, logger, collectionRegistry, cid, date, posts, bigqueryBatchManager = null }) {
|
|
360
|
+
// 1. Write to BigQuery (for computations) - use batch manager if provided, otherwise direct write
|
|
361
|
+
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
362
|
+
try {
|
|
363
|
+
const postsMap = {};
|
|
364
|
+
for (const post of posts) {
|
|
365
|
+
if (post.id || post.postId) {
|
|
366
|
+
postsMap[post.id || post.postId] = post;
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
371
|
+
const row = {
|
|
372
|
+
date: date,
|
|
373
|
+
user_id: Number(cid),
|
|
374
|
+
user_type: 'POPULAR_INVESTOR',
|
|
375
|
+
posts_data: JSON.stringify({ posts: postsMap, postCount: posts.length }), // BigQuery JSON type requires a string
|
|
376
|
+
fetched_at: new Date().toISOString()
|
|
377
|
+
};
|
|
378
|
+
|
|
379
|
+
if (bigqueryBatchManager) {
|
|
380
|
+
// Add to batch (will flush with Firestore batches)
|
|
381
|
+
await bigqueryBatchManager.addSocialRow(row);
|
|
382
|
+
} else {
|
|
383
|
+
// Direct write (fallback for when batch manager not available)
|
|
384
|
+
await ensureSocialPostSnapshotsTable(logger);
|
|
385
|
+
await insertRows(datasetId, 'social_post_snapshots', [row], logger);
|
|
386
|
+
logger.log('INFO', `[DataStorage] ✅ Wrote social posts to BigQuery for PI ${cid} (date: ${date}, ${posts.length} posts)`);
|
|
387
|
+
}
|
|
388
|
+
} catch (bqError) {
|
|
389
|
+
logger.log('WARN', `[DataStorage] BigQuery write failed for PI ${cid} (${date}): ${bqError.message}`);
|
|
390
|
+
// Continue to Firestore write (fallback)
|
|
292
391
|
}
|
|
293
392
|
}
|
|
294
393
|
|
|
295
|
-
await rootDataRef.set({
|
|
296
|
-
posts: postsMap,
|
|
297
|
-
fetchedAt: FieldValue.serverTimestamp(),
|
|
298
|
-
cid: String(cid),
|
|
299
|
-
postCount: posts.length
|
|
300
|
-
}, { merge: false });
|
|
301
|
-
|
|
302
394
|
// 2. Store latest posts to user-centric collection (for fallback)
|
|
303
395
|
// Path structure: PopularInvestors/{piCid}/posts/{postId}
|
|
304
396
|
// Construct path directly - we know the structure
|
|
@@ -96,7 +96,8 @@ async function updateLastUpdated(db, collectionRegistry, cid, userType, dataType
|
|
|
96
96
|
}
|
|
97
97
|
|
|
98
98
|
async function processPortfolio(context, config, taskData, isPI) {
|
|
99
|
-
const { db, logger, collectionRegistry, proxyManager, headerManager } = context;
|
|
99
|
+
const { db, logger, collectionRegistry, proxyManager, headerManager, batchManager } = context;
|
|
100
|
+
const bigqueryBatchManager = batchManager?.getBigQueryBatchManager?.() || null;
|
|
100
101
|
const { cid, username, uuid, today, requestOptions } = taskData;
|
|
101
102
|
const url = `${config.ETORO_API_PORTFOLIO_URL}?cid=${cid}&client_request_id=${uuid}`;
|
|
102
103
|
|
|
@@ -132,7 +133,8 @@ async function processPortfolio(context, config, taskData, isPI) {
|
|
|
132
133
|
}
|
|
133
134
|
|
|
134
135
|
async function processHistory(context, config, taskData, isPI) {
|
|
135
|
-
const { db, logger, collectionRegistry, proxyManager, headerManager } = context;
|
|
136
|
+
const { db, logger, collectionRegistry, proxyManager, headerManager, batchManager } = context;
|
|
137
|
+
const bigqueryBatchManager = batchManager?.getBigQueryBatchManager?.() || null;
|
|
136
138
|
const { cid, uuid, today, requestOptions } = taskData;
|
|
137
139
|
const oneYearAgo = new Date(); oneYearAgo.setFullYear(oneYearAgo.getFullYear() - 1);
|
|
138
140
|
const url = `${config.ETORO_API_HISTORY_URL}?StartTime=${oneYearAgo.toISOString()}&PageNumber=1&ItemsPerPage=30000&PublicHistoryPortfolioFilter=&CID=${cid}&client_request_id=${uuid}`;
|
|
@@ -145,15 +147,16 @@ async function processHistory(context, config, taskData, isPI) {
|
|
|
145
147
|
data.PublicHistoryPositions = data.PublicHistoryPositions.filter(p => [0, 1, 5].includes(p.CloseReason));
|
|
146
148
|
}
|
|
147
149
|
|
|
148
|
-
if (isPI) await storePopularInvestorTradeHistory({ db, logger, collectionRegistry, cid, date: today, historyData: data });
|
|
149
|
-
else await storeSignedInUserTradeHistory({ db, logger, collectionRegistry, cid, date: today, historyData: data });
|
|
150
|
+
if (isPI) await storePopularInvestorTradeHistory({ db, logger, collectionRegistry, cid, date: today, historyData: data, bigqueryBatchManager });
|
|
151
|
+
else await storeSignedInUserTradeHistory({ db, logger, collectionRegistry, cid, date: today, historyData: data, bigqueryBatchManager });
|
|
150
152
|
|
|
151
153
|
await updateLastUpdated(db, collectionRegistry, cid, isPI ? 'popularInvestor' : 'signedInUser', 'tradeHistory', logger);
|
|
152
154
|
return true;
|
|
153
155
|
}
|
|
154
156
|
|
|
155
157
|
async function processSocial(context, config, taskData, isPI) {
|
|
156
|
-
const { db, logger, collectionRegistry, proxyManager, headerManager } = context;
|
|
158
|
+
const { db, logger, collectionRegistry, proxyManager, headerManager, batchManager } = context;
|
|
159
|
+
const bigqueryBatchManager = batchManager?.getBigQueryBatchManager?.() || null;
|
|
157
160
|
const { cid, username, uuid, today, requestOptions } = taskData;
|
|
158
161
|
const { getGcidForUser } = require('../../social-task-handler/helpers/handler_helpers');
|
|
159
162
|
|
|
@@ -172,8 +175,11 @@ async function processSocial(context, config, taskData, isPI) {
|
|
|
172
175
|
}));
|
|
173
176
|
|
|
174
177
|
// Store posts even if empty (to mark that social fetch was attempted)
|
|
175
|
-
|
|
176
|
-
|
|
178
|
+
const { batchManager } = context;
|
|
179
|
+
const bigqueryBatchManager = batchManager?.getBigQueryBatchManager?.() || null;
|
|
180
|
+
|
|
181
|
+
if (isPI) await storePopularInvestorSocialPosts({ db, logger, collectionRegistry, cid, date: today, posts, bigqueryBatchManager });
|
|
182
|
+
else await storeSignedInUserSocialPosts({ db, logger, collectionRegistry, cid, date: today, posts, bigqueryBatchManager });
|
|
177
183
|
|
|
178
184
|
// Update lastUpdated timestamp to indicate social fetch completed (even if 0 posts)
|
|
179
185
|
await updateLastUpdated(db, collectionRegistry, cid, isPI ? 'popularInvestor' : 'signedInUser', 'socialPosts', logger);
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview BigQuery Batch Manager for Task Engine
|
|
3
|
+
*
|
|
4
|
+
* Batches BigQuery writes and flushes them using LOAD JOBS (FREE).
|
|
5
|
+
* Integrates with FirestoreBatchManager to flush together.
|
|
6
|
+
*
|
|
7
|
+
* Lessons learned from backfill script:
|
|
8
|
+
* - Use createLoadJob() not table.load()
|
|
9
|
+
* - Use temp files for load jobs
|
|
10
|
+
* - Wait for job completion with getMetadata() polling
|
|
11
|
+
* - Handle errors gracefully
|
|
12
|
+
* - Clean up temp files properly
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
const fs = require('fs');
|
|
16
|
+
const path = require('path');
|
|
17
|
+
const os = require('os');
|
|
18
|
+
const {
|
|
19
|
+
getOrCreateDataset,
|
|
20
|
+
ensurePortfolioSnapshotsTable,
|
|
21
|
+
ensureTradeHistorySnapshotsTable,
|
|
22
|
+
ensureSocialPostSnapshotsTable
|
|
23
|
+
} = require('../../core/utils/bigquery_utils');
|
|
24
|
+
|
|
25
|
+
class BigQueryBatchManager {
|
|
26
|
+
constructor(logger) {
|
|
27
|
+
this.logger = logger;
|
|
28
|
+
this.datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
29
|
+
|
|
30
|
+
// Buffers for each table
|
|
31
|
+
this.portfolioBuffer = [];
|
|
32
|
+
this.historyBuffer = [];
|
|
33
|
+
this.socialBuffer = [];
|
|
34
|
+
|
|
35
|
+
// Track if tables are ensured (avoid repeated checks)
|
|
36
|
+
this.tablesEnsured = {
|
|
37
|
+
portfolio: false,
|
|
38
|
+
history: false,
|
|
39
|
+
social: false
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Add portfolio row to buffer
|
|
45
|
+
*/
|
|
46
|
+
async addPortfolioRow(row) {
|
|
47
|
+
if (!this.tablesEnsured.portfolio) {
|
|
48
|
+
await ensurePortfolioSnapshotsTable(this.logger);
|
|
49
|
+
this.tablesEnsured.portfolio = true;
|
|
50
|
+
}
|
|
51
|
+
this.portfolioBuffer.push(row);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Add trade history row to buffer
|
|
56
|
+
*/
|
|
57
|
+
async addHistoryRow(row) {
|
|
58
|
+
if (!this.tablesEnsured.history) {
|
|
59
|
+
await ensureTradeHistorySnapshotsTable(this.logger);
|
|
60
|
+
this.tablesEnsured.history = true;
|
|
61
|
+
}
|
|
62
|
+
this.historyBuffer.push(row);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Add social post row to buffer
|
|
67
|
+
*/
|
|
68
|
+
async addSocialRow(row) {
|
|
69
|
+
if (!this.tablesEnsured.social) {
|
|
70
|
+
await ensureSocialPostSnapshotsTable(this.logger);
|
|
71
|
+
this.tablesEnsured.social = true;
|
|
72
|
+
}
|
|
73
|
+
this.socialBuffer.push(row);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Flush a buffer to BigQuery using load job
|
|
78
|
+
* Uses lessons from backfill: createLoadJob, temp files, proper polling
|
|
79
|
+
*/
|
|
80
|
+
async _flushBuffer(buffer, tableId, tableName) {
|
|
81
|
+
if (buffer.length === 0) return 0;
|
|
82
|
+
|
|
83
|
+
const rows = [...buffer]; // Copy buffer
|
|
84
|
+
buffer.length = 0; // Clear buffer
|
|
85
|
+
|
|
86
|
+
try {
|
|
87
|
+
const dataset = await getOrCreateDataset(this.datasetId, this.logger);
|
|
88
|
+
const table = dataset.table(tableId);
|
|
89
|
+
|
|
90
|
+
// Write to temporary file (load jobs require a file, not in-memory data)
|
|
91
|
+
const tempFile = path.join(os.tmpdir(), `bigquery_${tableId}_${Date.now()}_${Math.random().toString(36).substring(7)}.ndjson`);
|
|
92
|
+
const ndjson = rows.map(r => JSON.stringify(r)).join('\n');
|
|
93
|
+
|
|
94
|
+
try {
|
|
95
|
+
fs.writeFileSync(tempFile, ndjson, 'utf8');
|
|
96
|
+
|
|
97
|
+
// Create load job (FREE) - using createLoadJob as learned from backfill
|
|
98
|
+
const [job] = await table.createLoadJob(tempFile, {
|
|
99
|
+
sourceFormat: 'NEWLINE_DELIMITED_JSON',
|
|
100
|
+
writeDisposition: 'WRITE_APPEND',
|
|
101
|
+
autodetect: false // Use existing table schema
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
// Wait for job to complete using polling (as learned from backfill)
|
|
105
|
+
let jobMetadata;
|
|
106
|
+
const maxAttempts = 60; // 5 minutes max (5 second intervals)
|
|
107
|
+
const pollInterval = 5000; // 5 seconds
|
|
108
|
+
|
|
109
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
110
|
+
[jobMetadata] = await job.getMetadata();
|
|
111
|
+
const state = jobMetadata.status?.state;
|
|
112
|
+
|
|
113
|
+
if (state === 'DONE') {
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if (state === 'PENDING' || state === 'RUNNING') {
|
|
118
|
+
// Wait before next poll
|
|
119
|
+
await new Promise(resolve => setTimeout(resolve, pollInterval));
|
|
120
|
+
} else {
|
|
121
|
+
throw new Error(`Unexpected job state: ${state}`);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Check if we timed out
|
|
126
|
+
if (jobMetadata.status?.state !== 'DONE') {
|
|
127
|
+
throw new Error(`Load job did not complete within ${maxAttempts * pollInterval / 1000} seconds`);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Check for errors
|
|
131
|
+
if (jobMetadata.status?.errorResult) {
|
|
132
|
+
throw new Error(`Load job failed: ${jobMetadata.status.errorResult.message}`);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const rowsLoaded = jobMetadata.statistics?.load?.outputRows || rows.length;
|
|
136
|
+
|
|
137
|
+
if (this.logger) {
|
|
138
|
+
this.logger.log('INFO', `[BigQueryBatch] ✅ Flushed ${rowsLoaded} ${tableName} rows to BigQuery using LOAD JOB (free)`);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return rowsLoaded;
|
|
142
|
+
} finally {
|
|
143
|
+
// Clean up temp file (as learned from backfill)
|
|
144
|
+
try {
|
|
145
|
+
if (fs.existsSync(tempFile)) {
|
|
146
|
+
fs.unlinkSync(tempFile);
|
|
147
|
+
}
|
|
148
|
+
} catch (cleanupError) {
|
|
149
|
+
if (this.logger) {
|
|
150
|
+
this.logger.log('WARN', `[BigQueryBatch] Failed to delete temp file ${tempFile}: ${cleanupError.message}`);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
} catch (error) {
|
|
155
|
+
// Log error but don't throw - allow Firestore writes to continue
|
|
156
|
+
if (this.logger) {
|
|
157
|
+
this.logger.log('WARN', `[BigQueryBatch] Failed to flush ${tableName} to BigQuery: ${error.message}`);
|
|
158
|
+
}
|
|
159
|
+
// Put rows back in buffer for retry on next flush
|
|
160
|
+
buffer.push(...rows);
|
|
161
|
+
return 0;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Flush all buffers to BigQuery
|
|
167
|
+
* Called by FirestoreBatchManager.flushBatches()
|
|
168
|
+
*/
|
|
169
|
+
async flushBatches() {
|
|
170
|
+
if (process.env.BIGQUERY_ENABLED === 'false') {
|
|
171
|
+
return; // Skip if BigQuery disabled
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const results = await Promise.allSettled([
|
|
175
|
+
this._flushBuffer(this.portfolioBuffer, 'portfolio_snapshots', 'portfolio'),
|
|
176
|
+
this._flushBuffer(this.historyBuffer, 'trade_history_snapshots', 'history'),
|
|
177
|
+
this._flushBuffer(this.socialBuffer, 'social_post_snapshots', 'social')
|
|
178
|
+
]);
|
|
179
|
+
|
|
180
|
+
const totalFlushed = results
|
|
181
|
+
.filter(r => r.status === 'fulfilled')
|
|
182
|
+
.reduce((sum, r) => sum + r.value, 0);
|
|
183
|
+
|
|
184
|
+
if (totalFlushed > 0 && this.logger) {
|
|
185
|
+
this.logger.log('INFO', `[BigQueryBatch] Flushed ${totalFlushed} total rows to BigQuery`);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Get buffer sizes (for monitoring)
|
|
191
|
+
*/
|
|
192
|
+
getBufferSizes() {
|
|
193
|
+
return {
|
|
194
|
+
portfolio: this.portfolioBuffer.length,
|
|
195
|
+
history: this.historyBuffer.length,
|
|
196
|
+
social: this.socialBuffer.length
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
module.exports = { BigQueryBatchManager };
|