bulltrackers-module 1.0.721 → 1.0.723

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,40 +1,30 @@
1
1
  /**
2
2
  * @fileoverview Logic for fetching and storing Popular Investor rankings.
3
- * Integrates IntelligentProxyManager for IP rotation and IntelligentHeaderManager for
4
- * user-agent/header rotation and performance tracking.
5
- * UPDATED: Added compression support for large rankings data to avoid Firestore index limits.
3
+ * REFACTORED: Fully migrated to BigQuery. Removed Firestore storage logic.
4
+ * NOW: Uses BigQuery for both 'Rankings' (Historical) and 'Master List' (State).
5
+ * DEDUPLICATION: Uses 'insertRowsWithMerge' to ensure idempotency.
6
6
  */
7
7
  const { IntelligentProxyManager } = require('../../core/utils/intelligent_proxy_manager');
8
8
  const { IntelligentHeaderManager } = require('../../core/utils/intelligent_header_manager');
9
- const zlib = require('zlib');
9
+ const {
10
+ insertRowsWithMerge,
11
+ ensurePIRankingsTable,
12
+ ensurePIMasterListTable,
13
+ queryPIMasterList
14
+ } = require('../../core/utils/bigquery_utils');
10
15
 
11
16
  /**
12
17
  * Fetches individual user rankings data by CID
13
- * @param {string} cid - Customer ID
14
- * @param {object} headers - Request headers to use
15
- * @param {object} proxyManager - ProxyManager instance
16
- * @param {object} logger - Logger instance
17
- * @returns {object|null} - User rankings data or null if failed
18
18
  */
19
19
  async function fetchIndividualUserRankings(cid, headers, proxyManager, logger) {
20
20
  const individualUrl = `https://www.etoro.com/sapi/rankings/cid/${cid}/rankings/?Period=OneYearAgo`;
21
-
22
21
  try {
23
- logger.log('INFO', `[PopularInvestorFetch] Fetching individual rankings for CID: ${cid}`);
24
-
25
22
  // Try with proxy first
26
23
  try {
27
- const response = await proxyManager.fetch(individualUrl, {
28
- method: 'GET',
29
- headers
30
- });
31
-
24
+ const response = await proxyManager.fetch(individualUrl, { method: 'GET', headers });
32
25
  if (response.ok) {
33
26
  const data = await response.json();
34
- if (data && data.Data) {
35
- logger.log('SUCCESS', `[PopularInvestorFetch] Successfully fetched individual rankings for CID: ${cid} via proxy`);
36
- return data.Data; // Return the Data object which matches the Items schema
37
- }
27
+ if (data && data.Data) return data.Data;
38
28
  }
39
29
  } catch (proxyError) {
40
30
  logger.log('WARN', `[PopularInvestorFetch] Proxy fetch failed for CID ${cid}: ${proxyError.message}`);
@@ -42,23 +32,14 @@ async function fetchIndividualUserRankings(cid, headers, proxyManager, logger) {
42
32
 
43
33
  // Fallback to direct fetch
44
34
  try {
45
- const directResponse = await fetch(individualUrl, {
46
- method: 'GET',
47
- headers
48
- });
49
-
35
+ const directResponse = await fetch(individualUrl, { method: 'GET', headers });
50
36
  if (directResponse.ok) {
51
37
  const data = await directResponse.json();
52
- if (data && data.Data) {
53
- logger.log('SUCCESS', `[PopularInvestorFetch] Successfully fetched individual rankings for CID: ${cid} via direct fetch`);
54
- return data.Data;
55
- }
38
+ if (data && data.Data) return data.Data;
56
39
  }
57
40
  } catch (directError) {
58
41
  logger.log('WARN', `[PopularInvestorFetch] Direct fetch failed for CID ${cid}: ${directError.message}`);
59
42
  }
60
-
61
- logger.log('ERROR', `[PopularInvestorFetch] Failed to fetch individual rankings for CID: ${cid} from all sources`);
62
43
  return null;
63
44
  } catch (error) {
64
45
  logger.log('ERROR', `[PopularInvestorFetch] Error fetching individual rankings for CID ${cid}`, { errorMessage: error.message });
@@ -67,39 +48,22 @@ async function fetchIndividualUserRankings(cid, headers, proxyManager, logger) {
67
48
  }
68
49
 
69
50
  /**
70
- * Fetches the top Popular Investors and stores the raw result in Firestore.
71
- * @param {object} dependencies - Contains db, logger.
72
- * @param {object} config - Configuration object.
73
- * @param {string} config.rankingsApiUrl - The eToro Rankings API URL.
74
- * @param {string} config.rankingsCollectionName - e.g., 'popular_investor_rankings'.
75
- * @param {object} config.proxyConfig - Configuration for the IntelligentProxyManager.
76
- * @param {object} config.headerConfig - Configuration for the IntelligentHeaderManager.
51
+ * Fetches the top Popular Investors and stores them directly to BigQuery.
77
52
  */
78
53
  async function fetchAndStorePopularInvestors(config, dependencies) {
79
- const { db, logger, collectionRegistry } = dependencies;
80
- const { rankingsApiUrl, rankingsCollectionName, proxyConfig, headerConfig } = config;
54
+ const { db, logger } = dependencies;
55
+ const { rankingsApiUrl, proxyConfig, headerConfig } = config;
81
56
 
82
- // Get collection name from registry if available, fallback to config
83
- const { getCollectionPath } = collectionRegistry || {};
84
- let finalRankingsCollectionName = rankingsCollectionName;
85
-
86
- if (getCollectionPath) {
87
- try {
88
- // Extract collection name from registry path: popular_investor_rankings/{date}
89
- const basePath = getCollectionPath('rootData', 'popularInvestorRankings', { date: '2025-01-01' });
90
- // Path is like "popular_investor_rankings/2025-01-01", extract collection name
91
- finalRankingsCollectionName = basePath.split('/')[0];
92
- } catch (e) {
93
- logger.log('WARN', `[PopularInvestorFetch] Failed to get collection from registry, using config: ${e.message}`);
94
- }
57
+ // Validation
58
+ if (process.env.BIGQUERY_ENABLED === 'false') {
59
+ throw new Error("[PopularInvestorFetch] BIGQUERY_ENABLED is false. This module now strictly requires BigQuery.");
95
60
  }
96
-
97
- if (!rankingsApiUrl || !finalRankingsCollectionName || !proxyConfig || !headerConfig) {
98
- throw new Error("[PopularInvestorFetch] Missing required config (rankingsApiUrl, rankingsCollectionName, proxyConfig, headerConfig).");
61
+ if (!rankingsApiUrl || !proxyConfig || !headerConfig) {
62
+ throw new Error("[PopularInvestorFetch] Missing required config.");
99
63
  }
100
64
 
101
- const today = new Date().toISOString().split('T')[0]; // YYYY-MM-DD
102
- logger.log('INFO', `[PopularInvestorFetch] Starting fetch for date: ${today}`);
65
+ const today = new Date().toISOString().split('T')[0];
66
+ logger.log('INFO', `[PopularInvestorFetch] Starting BigQuery-native fetch for date: ${today}`);
103
67
 
104
68
  // 1. Initialize Managers
105
69
  const headerManager = new IntelligentHeaderManager(db, logger, headerConfig);
@@ -110,444 +74,165 @@ async function fetchAndStorePopularInvestors(config, dependencies) {
110
74
  let selectedHeaderId = 'fallback';
111
75
 
112
76
  try {
113
- // 2. Select the best performing header
77
+ // 2. Select Header & Fetch
114
78
  const { id, header } = await headerManager.selectHeader();
115
79
  selectedHeaderId = id;
116
80
 
117
- logger.log('INFO', `[PopularInvestorFetch] Selected header configuration: ${id}`);
118
-
119
- // Merge selected headers with specific API requirements
120
- // We prioritize the dynamic headers but ensure critical fields like Accept exist
121
81
  const requestHeaders = {
122
82
  'Accept': 'application/json',
123
83
  'Referer': 'https://www.etoro.com/',
124
84
  ...header
125
85
  };
126
86
 
127
- // 3. Attempt Fetch via Proxy Manager
87
+ // Proxy Fetch
128
88
  try {
129
- logger.log('INFO', '[PopularInvestorFetch] Attempting fetch via Proxy Manager...');
130
-
131
- const response = await proxyManager.fetch(rankingsApiUrl, {
132
- method: 'GET',
133
- headers: requestHeaders
134
- });
135
-
89
+ const response = await proxyManager.fetch(rankingsApiUrl, { method: 'GET', headers: requestHeaders });
136
90
  if (response.ok) {
137
91
  data = await response.json();
138
92
  fetchSuccess = true;
139
- logger.log('SUCCESS', '[PopularInvestorFetch] Successfully fetched data via Proxy.');
140
- } else {
141
- logger.log('WARN', `[PopularInvestorFetch] Proxy fetch failed with status ${response.status}.`);
142
93
  }
143
- } catch (proxyError) {
144
- logger.log('ERROR', '[PopularInvestorFetch] Error during Proxy Manager execution.', { errorMessage: proxyError.message });
145
- }
94
+ } catch (e) { logger.log('WARN', `[PopularInvestorFetch] Proxy failed: ${e.message}`); }
146
95
 
147
- // 4. Fallback: Direct Fetch (if Proxy failed)
96
+ // Fallback Fetch
148
97
  if (!data) {
149
- logger.log('WARN', '[PopularInvestorFetch] Falling back to Direct Node Fetch...');
150
98
  try {
151
- const directResponse = await fetch(rankingsApiUrl, {
152
- method: 'GET',
153
- headers: requestHeaders
154
- });
155
-
99
+ const directResponse = await fetch(rankingsApiUrl, { method: 'GET', headers: requestHeaders });
156
100
  if (directResponse.ok) {
157
101
  data = await directResponse.json();
158
102
  fetchSuccess = true;
159
- logger.log('SUCCESS', '[PopularInvestorFetch] Successfully fetched data via Direct Fetch.');
160
- } else {
161
- throw new Error(`Direct fetch failed with status: ${directResponse.status}`);
162
103
  }
163
- } catch (directError) {
164
- logger.log('ERROR', '[PopularInvestorFetch] Direct fetch failed.', { errorMessage: directError.message });
165
- }
104
+ } catch (e) { logger.log('ERROR', `[PopularInvestorFetch] Direct fetch failed: ${e.message}`); }
166
105
  }
167
106
 
168
107
  } catch (error) {
169
- logger.log('ERROR', '[PopularInvestorFetch] Critical error in fetch orchestration.', { errorMessage: error.message });
108
+ logger.log('ERROR', '[PopularInvestorFetch] Fetch error', error);
170
109
  } finally {
171
- // 5. Update Header Performance (Reinforcement Learning)
172
- // This ensures the system "learns" which headers are getting blocked vs accepted
173
- logger.log('INFO', `[PopularInvestorFetch] Updating header performance for ${selectedHeaderId}: Success=${fetchSuccess}`);
174
110
  headerManager.updatePerformance(selectedHeaderId, fetchSuccess);
175
- await headerManager.flushPerformanceUpdates();
176
111
  }
177
112
 
178
- // 5.5. Check for missing users from master list and fetch them individually
179
- if (data && data.Items && Array.isArray(data.Items)) {
180
- try {
181
- logger.log('INFO', '[PopularInvestorFetch] Checking for missing users from master list...');
182
-
183
- // Get master list path
184
- let masterListPath = 'system_state/popular_investor_master_list';
185
- if (collectionRegistry && collectionRegistry.getCollectionPath) {
186
- try {
187
- const registryPath = collectionRegistry.getCollectionPath('system', 'popularInvestorMasterList', {});
188
- masterListPath = registryPath;
189
- } catch (e) {
190
- logger.log('WARN', `[PopularInvestorFetch] Failed to get master list path from registry, using default: ${e.message}`);
191
- }
192
- }
193
-
194
- const masterListRef = db.doc(masterListPath);
195
- const masterListDoc = await masterListRef.get();
113
+ if (!data || !data.Items || !Array.isArray(data.Items)) {
114
+ throw new Error('[PopularInvestorFetch] Failed to fetch valid data.');
115
+ }
116
+
117
+ // 3. Check for Missing Users (Using BigQuery Master List)
118
+ // We now fetch the state from BigQuery instead of Firestore
119
+ let bqMasterList = {};
120
+ try {
121
+ logger.log('INFO', '[PopularInvestorFetch] Querying BigQuery Master List for missing user check...');
122
+ bqMasterList = await queryPIMasterList(logger) || {};
123
+
124
+ const fetchedCids = new Set(data.Items.map(item => String(item.CustomerId)));
125
+ const knownCids = Object.keys(bqMasterList);
126
+ const missingCids = knownCids.filter(cid => !fetchedCids.has(cid));
127
+
128
+ if (missingCids.length > 0) {
129
+ logger.log('INFO', `[PopularInvestorFetch] Found ${missingCids.length} missing users. Fetching individually...`);
130
+ const { header } = await headerManager.selectHeader();
196
131
 
197
- if (masterListDoc.exists) {
198
- const masterListData = masterListDoc.data();
199
- const masterInvestors = masterListData.investors || {};
200
-
201
- // Build a Set of CIDs from the fetched data for fast lookup
202
- const fetchedCids = new Set(data.Items.map(item => String(item.CustomerId)));
203
-
204
- // Identify missing CIDs
205
- const masterCids = Object.keys(masterInvestors);
206
- const missingCids = masterCids.filter(cid => !fetchedCids.has(cid));
207
-
208
- if (missingCids.length > 0) {
209
- logger.log('INFO', `[PopularInvestorFetch] Found ${missingCids.length} missing users from master list. Fetching individually...`);
210
-
211
- // Prepare headers for individual fetches
212
- const requestHeaders = {
213
- 'Accept': 'application/json',
214
- 'Referer': 'https://www.etoro.com/',
215
- ...(await headerManager.selectHeader()).header
216
- };
217
-
218
- // Fetch missing users with rate limiting
219
- const missingUserData = [];
220
- let successCount = 0;
221
- let failureCount = 0;
222
-
223
- for (const cid of missingCids) {
224
- const userData = await fetchIndividualUserRankings(cid, requestHeaders, proxyManager, logger);
225
-
226
- if (userData) {
227
- missingUserData.push(userData);
228
- successCount++;
229
- } else {
230
- failureCount++;
231
- logger.log('WARN', `[PopularInvestorFetch] Failed to fetch data for missing user CID: ${cid} (${masterInvestors[cid].username})`);
232
- }
233
-
234
- // Add small delay between requests to avoid rate limiting
235
- if (missingCids.length > 10 && missingCids.indexOf(cid) < missingCids.length - 1) {
236
- await new Promise(resolve => setTimeout(resolve, 200)); // 200ms delay
237
- }
238
- }
239
-
240
- // Append successfully fetched missing users to the main data
241
- if (missingUserData.length > 0) {
242
- data.Items.push(...missingUserData);
243
- data.TotalRows += missingUserData.length;
244
- logger.log('SUCCESS', `[PopularInvestorFetch] Successfully fetched ${successCount}/${missingCids.length} missing users. Updated Items array from ${data.Items.length - missingUserData.length} to ${data.Items.length} users.`);
245
- }
246
-
247
- if (failureCount > 0) {
248
- logger.log('WARN', `[PopularInvestorFetch] Failed to fetch ${failureCount}/${missingCids.length} missing users.`);
249
- }
250
- } else {
251
- logger.log('INFO', '[PopularInvestorFetch] All users from master list are present in the main fetch. No missing users to fetch individually.');
132
+ for (const cid of missingCids) {
133
+ const userData = await fetchIndividualUserRankings(cid, { ...header, 'Referer': 'https://www.etoro.com/' }, proxyManager, logger);
134
+ if (userData) {
135
+ data.Items.push(userData);
136
+ data.TotalRows++;
252
137
  }
253
- } else {
254
- logger.log('INFO', '[PopularInvestorFetch] Master list document does not exist yet. Skipping missing user check.');
138
+ await new Promise(r => setTimeout(r, 200)); // Rate limit
255
139
  }
256
- } catch (missingUserError) {
257
- logger.log('WARN', `[PopularInvestorFetch] Error while checking/fetching missing users: ${missingUserError.message}. Continuing with main fetch data.`);
258
- // Non-critical error, continue with whatever data we have
259
140
  }
141
+ } catch (e) {
142
+ logger.log('WARN', `[PopularInvestorFetch] Failed to check master list: ${e.message}`);
260
143
  }
261
144
 
262
- // 6. Final Validation & Storage
263
- if (data && data.Items && Array.isArray(data.Items)) {
264
- try {
265
- const docRef = db.collection(finalRankingsCollectionName).doc(today);
266
-
267
- // Prepare data for storage
268
- const dataToStore = {
269
- fetchedAt: new Date(),
270
- totalRows: data.TotalRows,
271
- itemsCount: data.Items.length,
272
- ...data
273
- };
274
-
275
- // Apply compression if data is large enough (similar to ResultCommitter pattern)
276
- let firestorePayload;
277
- try {
278
- const jsonString = JSON.stringify(dataToStore);
279
- const rawBuffer = Buffer.from(jsonString);
280
- const SIZE_THRESHOLD = 50 * 1024; // 50KB threshold (same as ResultCommitter)
281
- const MAX_COMPRESSED_SIZE = 900 * 1024; // 900KB max (Firestore limit is 1MB)
282
-
283
- if (rawBuffer.length > SIZE_THRESHOLD) {
284
- logger.log('INFO', `[PopularInvestorFetch] Rankings data size ${(rawBuffer.length / 1024).toFixed(2)}KB exceeds threshold. Compressing...`);
285
-
286
- // Gzip the JSON string
287
- const compressedBuffer = zlib.gzipSync(rawBuffer);
288
-
289
- if (compressedBuffer.length < MAX_COMPRESSED_SIZE) {
290
- // Create the compressed wrapper structure
291
- firestorePayload = {
292
- fetchedAt: new Date(), // Keep outer timestamp for indexing
293
- totalRows: data.TotalRows, // Keep outer metadata for indexing
294
- itemsCount: data.Items.length, // Keep outer metadata for indexing
295
- _compressed: true,
296
- payload: compressedBuffer
297
- };
298
-
299
- logger.log('INFO', `[PopularInvestorFetch] Compressed rankings data: ${(rawBuffer.length / 1024).toFixed(2)}KB -> ${(compressedBuffer.length / 1024).toFixed(2)}KB`);
300
- } else {
301
- // Compressed size still too large, store uncompressed (will need sharding in future)
302
- logger.log('WARN', `[PopularInvestorFetch] Compressed size ${(compressedBuffer.length / 1024).toFixed(2)}KB still exceeds limit. Storing uncompressed (may hit index limits).`);
303
- firestorePayload = dataToStore;
304
- }
305
- } else {
306
- // Data is small enough, store uncompressed
307
- firestorePayload = dataToStore;
308
- }
309
- } catch (compressionError) {
310
- logger.log('WARN', `[PopularInvestorFetch] Compression failed, storing uncompressed: ${compressionError.message}`);
311
- firestorePayload = dataToStore;
312
- }
313
-
314
- await docRef.set(firestorePayload);
145
+ // 4. Store Rankings to BigQuery (Deduplicated)
146
+ const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
147
+
148
+ try {
149
+ await ensurePIRankingsTable(logger);
150
+
151
+ const fetchedAt = new Date().toISOString();
152
+ const rankingsRows = data.Items.map((item, index) => ({
153
+ date: today,
154
+ pi_id: parseInt(item.CustomerId, 10),
155
+ username: item.UserName || null,
156
+ rank: index + 1,
157
+ category: item.Category || null,
158
+ rankings_data: item, // Store full JSON
159
+ fetched_at: fetchedAt
160
+ }));
161
+
162
+ // MERGE: Overwrite if (date, pi_id) exists.
163
+ // This effectively handles "duplicate data" by updating the record to the latest fetch.
164
+ await insertRowsWithMerge(datasetId, 'pi_rankings', rankingsRows, ['date', 'pi_id'], logger);
165
+ logger.log('SUCCESS', `[PopularInvestorFetch] Merged ${rankingsRows.length} ranking records into BigQuery.`);
166
+
167
+ } catch (e) {
168
+ logger.log('ERROR', `[PopularInvestorFetch] Failed to write rankings to BigQuery: ${e.message}`);
169
+ throw e;
170
+ }
315
171
 
316
- logger.log('SUCCESS', `[PopularInvestorFetch] Stored ${data.TotalRows} rankings into ${finalRankingsCollectionName}/${today}${firestorePayload._compressed ? ' (compressed)' : ''}`);
317
-
318
- // Write rankings to BigQuery (one row per PI)
319
- if (process.env.BIGQUERY_ENABLED !== 'false') {
320
- try {
321
- const { insertRows, ensurePIRankingsTable } = require('../../core/utils/bigquery_utils');
322
- await ensurePIRankingsTable(logger);
323
-
324
- const fetchedAt = new Date().toISOString();
325
- const bigqueryRows = data.Items.map((item, index) => {
326
- return {
327
- date: today,
328
- pi_id: parseInt(item.CustomerId, 10),
329
- username: item.UserName || null,
330
- rank: index + 1, // Rank is position in array (1-indexed)
331
- category: item.Category || null,
332
- rankings_data: item, // Store full item data as JSON
333
- fetched_at: fetchedAt
334
- };
335
- });
336
-
337
- const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
338
- await insertRows(datasetId, 'pi_rankings', bigqueryRows, logger);
339
-
340
- logger.log('INFO', `[PopularInvestorFetch] Successfully stored ${bigqueryRows.length} ranking records to BigQuery`);
341
- } catch (bqError) {
342
- logger.log('WARN', `[PopularInvestorFetch] BigQuery rankings write failed: ${bqError.message}`);
343
- // Continue - Firestore write succeeded
344
- }
345
- }
346
-
347
- // Update the master list of Popular Investors
348
- // Use batched writes to avoid 500 field transform limit
349
- try {
350
- const { FieldValue } = require('@google-cloud/firestore');
351
- let masterListPath = 'system_state/popular_investor_master_list';
352
-
353
- if (collectionRegistry && collectionRegistry.getCollectionPath) {
354
- try {
355
- // Get the path from registry
356
- const registryPath = collectionRegistry.getCollectionPath('system', 'popularInvestorMasterList', {});
357
- masterListPath = registryPath;
358
- } catch (e) {
359
- logger.log('WARN', `[PopularInvestorFetch] Failed to get master list path from registry, using default: ${e.message}`);
360
- }
361
- }
362
-
363
- const masterListRef = db.doc(masterListPath);
364
- const masterListDoc = await masterListRef.get();
365
-
366
- const now = new Date();
367
- const existingInvestors = masterListDoc.exists ? (masterListDoc.data().investors || {}) : {};
368
- const investorsToUpdate = {};
369
- let newInvestorsCount = 0;
370
- let updatedInvestorsCount = 0;
371
-
372
- // Identify which investors need updating (only those in current fetch)
373
- for (const item of data.Items) {
374
- const cid = String(item.CustomerId);
375
- const username = item.UserName;
376
-
377
- if (!cid || !username) continue;
378
-
379
- if (!existingInvestors[cid]) {
380
- // New PI discovered
381
- investorsToUpdate[cid] = {
382
- cid: cid,
383
- username: username,
384
- firstSeenAt: now,
385
- lastSeenAt: now
386
- };
387
- newInvestorsCount++;
388
- } else {
389
- // Existing PI - check if username changed or needs lastSeenAt update
390
- const needsUpdate = existingInvestors[cid].username !== username;
391
- if (needsUpdate) {
392
- investorsToUpdate[cid] = {
393
- ...existingInvestors[cid],
394
- username: username,
395
- lastSeenAt: now
396
- };
397
- updatedInvestorsCount++;
398
- } else {
399
- // Just update lastSeenAt timestamp
400
- investorsToUpdate[cid] = {
401
- ...existingInvestors[cid],
402
- lastSeenAt: now
403
- };
404
- updatedInvestorsCount++;
405
- }
406
- }
407
- }
408
-
409
- // Use batched writes to update only changed investors
410
- // Since we're using regular Date objects (not serverTimestamp()), we avoid field transform limits
411
- // But we still batch to handle large numbers of investors efficiently
412
- const BATCH_SIZE = 450; // Firestore allows 500 operations per batch, leave room for metadata
413
- const investorsToUpdateEntries = Object.entries(investorsToUpdate);
414
- const totalBatches = Math.ceil(investorsToUpdateEntries.length / BATCH_SIZE);
415
-
416
- // If document doesn't exist, create it with all investors in first batch
417
- if (!masterListDoc.exists && investorsToUpdateEntries.length > 0) {
418
- const batch = db.batch();
419
- const finalInvestorsMap = { ...existingInvestors, ...investorsToUpdate };
420
- batch.set(masterListRef, {
421
- investors: finalInvestorsMap,
422
- lastUpdated: FieldValue.serverTimestamp(),
423
- totalInvestors: Object.keys(finalInvestorsMap).length
424
- }, { merge: true });
425
- await batch.commit();
426
- logger.log('INFO', `[PopularInvestorFetch] Created master list with ${Object.keys(finalInvestorsMap).length} investors`);
427
- } else if (investorsToUpdateEntries.length > 0) {
428
- // Document exists - update only changed investors in batches
429
- for (let batchIdx = 0; batchIdx < totalBatches; batchIdx++) {
430
- const batch = db.batch();
431
- const startIdx = batchIdx * BATCH_SIZE;
432
- const endIdx = Math.min(startIdx + BATCH_SIZE, investorsToUpdateEntries.length);
433
- const batchEntries = investorsToUpdateEntries.slice(startIdx, endIdx);
434
-
435
- // Build update object with all investors in this batch
436
- const batchUpdateData = {};
437
- for (const [cid, investorData] of batchEntries) {
438
- batchUpdateData[`investors.${cid}`] = investorData;
439
- }
440
-
441
- // Update metadata (lastUpdated, totalInvestors) only in the last batch
442
- if (batchIdx === totalBatches - 1) {
443
- const finalInvestorsMap = { ...existingInvestors, ...investorsToUpdate };
444
- batchUpdateData.lastUpdated = FieldValue.serverTimestamp();
445
- batchUpdateData.totalInvestors = Object.keys(finalInvestorsMap).length;
446
- }
447
-
448
- batch.update(masterListRef, batchUpdateData);
449
- await batch.commit();
450
-
451
- if (totalBatches > 1) {
452
- logger.log('INFO', `[PopularInvestorFetch] Updated master list batch ${batchIdx + 1}/${totalBatches} (${batchEntries.length} investors)`);
453
- }
454
- }
455
- }
456
-
457
- logger.log('SUCCESS', `[PopularInvestorFetch] Updated master list: ${newInvestorsCount} new, ${updatedInvestorsCount} updated. Total unique PIs: ${Object.keys({ ...existingInvestors, ...investorsToUpdate }).length}`);
458
-
459
- // Write master list updates to BigQuery
460
- if (process.env.BIGQUERY_ENABLED !== 'false' && Object.keys(investorsToUpdate).length > 0) {
461
- try {
462
- const { insertRowsWithMerge, ensurePIMasterListTable } = require('../../core/utils/bigquery_utils');
463
- await ensurePIMasterListTable(logger);
464
-
465
- const now = new Date().toISOString();
466
- const bigqueryRows = Object.entries(investorsToUpdate).map(([cid, investorData]) => {
467
- // Handle Firestore Timestamp objects
468
- const convertTimestamp = (ts) => {
469
- if (!ts) return now;
470
- if (ts instanceof Date) return ts.toISOString();
471
- if (ts.toDate && typeof ts.toDate === 'function') return ts.toDate().toISOString();
472
- if (typeof ts === 'string') return ts;
473
- return now;
474
- };
475
-
476
- return {
477
- cid: parseInt(cid, 10),
478
- username: investorData.username,
479
- first_seen_at: convertTimestamp(investorData.firstSeenAt),
480
- last_seen_at: convertTimestamp(investorData.lastSeenAt),
481
- last_updated: now
482
- };
483
- });
484
-
485
- const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
486
- // Use MERGE to update existing records or insert new ones
487
- await insertRowsWithMerge(datasetId, 'pi_master_list', bigqueryRows, ['cid'], logger);
488
-
489
- logger.log('INFO', `[PopularInvestorFetch] Successfully stored ${bigqueryRows.length} master list records to BigQuery`);
490
- } catch (bqError) {
491
- logger.log('WARN', `[PopularInvestorFetch] BigQuery master list write failed: ${bqError.message}`);
492
- // Continue - Firestore write succeeded
493
- }
494
- }
495
- } catch (masterListError) {
496
- logger.log('WARN', `[PopularInvestorFetch] Failed to update master list: ${masterListError.message}`);
497
- // Non-critical, continue
498
- }
499
-
500
- // Update root data indexer for today's date after rankings data is stored
501
- try {
502
- const { runRootDataIndexer } = require('../../root-data-indexer/index');
503
-
504
- // Access rootDataIndexer from config (passed from index.js) or use defaults
505
- // Using bracket notation to avoid TypeScript errors
506
- const rootDataIndexerConfig = (config && typeof config === 'object' && config['rootDataIndexer'])
507
- ? config['rootDataIndexer']
508
- : null;
509
-
510
- if (!rootDataIndexerConfig) {
511
- logger.log('WARN', `[PopularInvestorFetch] Root data indexer config not provided. Skipping index update.`);
512
- } else {
513
- // Merge rankings collection name into the full config
514
- const indexerConfig = {
515
- ...rootDataIndexerConfig,
516
- collections: {
517
- ...rootDataIndexerConfig.collections,
518
- piRankings: finalRankingsCollectionName // Override with actual collection name used
519
- },
520
- targetDate: today // Index only today's date for speed
521
- };
522
-
523
- logger.log('INFO', `[PopularInvestorFetch] Triggering root data indexer for date ${today} after rankings data storage...`);
524
- const result = await runRootDataIndexer(indexerConfig, dependencies);
525
-
526
- if (result.success && result.count > 0) {
527
- logger.log('INFO', `[PopularInvestorFetch] Root data indexer completed successfully for date ${today} (updated ${result.count} dates)`);
528
- } else {
529
- logger.log('WARN', `[PopularInvestorFetch] Root data indexer completed but no dates were updated for ${today}`);
530
- }
531
- }
532
- } catch (indexerError) {
533
- logger.log('ERROR', `[PopularInvestorFetch] Failed to run root data indexer for ${today}`, indexerError);
534
- // Continue - rankings data is stored, indexer failure is non-critical
172
+ // 5. Update Master List in BigQuery (State Management)
173
+ try {
174
+ await ensurePIMasterListTable(logger);
175
+
176
+ const now = new Date().toISOString();
177
+ const masterListUpdates = [];
178
+
179
+ // Calculate updates based on what we just fetched vs what we know from BQ
180
+ for (const item of data.Items) {
181
+ const cid = String(item.CustomerId);
182
+ const existing = bqMasterList[cid];
183
+
184
+ if (!existing) {
185
+ // New Discovery
186
+ masterListUpdates.push({
187
+ cid: parseInt(cid, 10),
188
+ username: item.UserName,
189
+ first_seen_at: now,
190
+ last_seen_at: now,
191
+ last_updated: now
192
+ });
193
+ } else {
194
+ // Existing - Update timestamps and username if changed
195
+ masterListUpdates.push({
196
+ cid: parseInt(cid, 10),
197
+ username: item.UserName, // Update username in case of rebrand
198
+ first_seen_at: existing.firstSeenAt ? (existing.firstSeenAt.value || existing.firstSeenAt) : now, // Preserve original discovery time
199
+ last_seen_at: now,
200
+ last_updated: now
201
+ });
535
202
  }
536
-
537
- return { success: true, count: data.TotalRows };
203
+ }
538
204
 
539
- } catch (dbError) {
540
- logger.log('ERROR', '[PopularInvestorFetch] Failed to write to Firestore.', { errorMessage: dbError.message });
541
- throw dbError;
205
+ if (masterListUpdates.length > 0) {
206
+ // MERGE: Overwrite based on 'cid'
207
+ await insertRowsWithMerge(datasetId, 'pi_master_list', masterListUpdates, ['cid'], logger);
208
+ logger.log('SUCCESS', `[PopularInvestorFetch] Updated Master List for ${masterListUpdates.length} investors in BigQuery.`);
542
209
  }
543
- } else {
544
- const errorMsg = fetchSuccess
545
- ? '[PopularInvestorFetch] Fetched data format is invalid (missing Items array).'
546
- : '[PopularInvestorFetch] Failed to fetch data from all sources.';
547
-
548
- logger.log('ERROR', errorMsg);
549
- throw new Error(errorMsg);
210
+
211
+ } catch (e) {
212
+ logger.log('WARN', `[PopularInvestorFetch] Failed to update Master List: ${e.message}`);
213
+ }
214
+
215
+ // 6. Trigger Root Data Indexer
216
+ try {
217
+ const { runRootDataIndexer } = require('../../root-data-indexer/index');
218
+ const rootDataIndexerConfig = config.rootDataIndexer || {};
219
+
220
+ // Pass the BQ table name as the collection hint, though the indexer should auto-detect BQ availability
221
+ const indexerConfig = {
222
+ ...rootDataIndexerConfig,
223
+ collections: {
224
+ ...rootDataIndexerConfig.collections,
225
+ piRankings: 'pi_rankings'
226
+ },
227
+ targetDate: today
228
+ };
229
+
230
+ await runRootDataIndexer(indexerConfig, dependencies);
231
+ } catch (e) {
232
+ logger.log('WARN', `[PopularInvestorFetch] Indexer trigger failed: ${e.message}`);
550
233
  }
234
+
235
+ return { success: true, count: data.TotalRows, storage: 'BIGQUERY_ONLY' };
551
236
  }
552
237
 
553
238
  module.exports = { fetchAndStorePopularInvestors };